# BERT Sentiment Analysis tool
Used to find the tone of each article using the headline, abstract and key words

In [None]:
!pip install transformers

In [2]:
!pip install torch



Load the pretrained BERT model

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd

tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

## Tones
0 = Negative <br>
1 = Neutral <br>
2 = Positive

In [2]:
# Define the function that computes the sentiment score
def sentiment_score(title, abstract, keywords):
    tokens = tokenizer.encode(title + ". " + abstract + ". " + keywords, return_tensors = 'pt')
    result = model(tokens)
    score = int(torch.argmax(result.logits))
    #convert to 3 classes rather than 5
    if score == 1:
        score = 0 if result.logits[0][0] > result.logits[0][2] else 1
    elif score == 2:
        score = 1
    elif score == 3:
        score = 2 if result.logits[0][4] > result.logits[0][2] else 1
    elif score == 4:
        score = 2
    return score

In [3]:
#Test the model
print(sentiment_score("average", "The mediocre", "alright. neutral"))

1


In [5]:
# Load the CSV files into a pandas DataFrame
train_df = pd.read_csv("../data/cleaned_train.csv")

# Cast the input columns to string
train_df["headline"] = train_df["headline"].astype(str)
train_df["abstract"] = train_df["abstract"].astype(str)
train_df["keywords"] = train_df["keywords"].astype(str)

# Apply the function to the three columns and store the result in a new column
train_df["BERT_sentiment_score"] = train_df.apply(lambda x: sentiment_score(x["headline"], x["abstract"], x["keywords"]), axis=1)

# Write the updated DataFrame back to the CSV file
train_df.to_csv("../data/cleaned_train.csv", index=False)

In [4]:
test_df = pd.read_csv("../data/cleaned_test.csv")

# Cast the input columns to string
test_df["headline"] = test_df["headline"].astype(str)
test_df["abstract"] = test_df["abstract"].astype(str)
test_df["keywords"] = test_df["keywords"].astype(str)

# Apply the function to the three columns and store the result in a new column
test_df["BERT_sentiment_score"] = test_df.apply(lambda x: sentiment_score(x["headline"], x["abstract"], x["keywords"]), axis=1)

# Write the updated DataFrame back to the CSV file
test_df.to_csv("../data/cleaned_test.csv", index=False)