Install the following libraries: <br>
-transformers <br>
-torch <br>
-pytorch <br>
-tqdm <br>


SENTIMENT ANALYSIS - roBERTa pre-trained model classification. <br>

This project will be using 'cardiffnlp/twitter-roberta-base-sentiment' as it is fine-tuned on a large corpus of Twitter data and specifically trained for sentiment analysis tasks.

In [18]:
#Import libraries and dependencies
import pandas as pd
import numpy as np

from tqdm import tqdm

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

In [19]:
# Load the reviews from your dataset
df = pd.read_csv("../Trial+Dom/Resources/Restaurant_Reviews.tsv", sep="\t")

df.head()


#Create and Id column
df['Id'] = np.arange(len(df))
df.head()


#Rearrange the columns
df = df[['Id', 'Review', 'Liked']]
df.head()

Unnamed: 0,Id,Review,Liked
0,0,Wow... Loved this place.,1
1,1,Crust is not good.,0
2,2,Not tasty and the texture was just nasty.,0
3,3,Stopped by during the late May bank holiday of...,1
4,4,The selection on the menu was great and so wer...,1


In [20]:
#Pull the pre-trained model provided by HuggingFace

model_name = f"cardiffnlp/twitter-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [22]:
# This line is only for demo for faster processing. Real dataset will be at least 1000.

reviews = df.head(10)
reviews.tail()

Unnamed: 0,Id,Review,Liked
5,5,Now I am getting angry and I want my damn pho.,0
6,6,Honeslty it didn't taste THAT fresh.),0
7,7,The potatoes were like rubber and you could te...,0
8,8,The fries were great too.,1
9,9,A great touch.,1


In [23]:
#Create one example

example = reviews['Review'][6]
print(example)

Honeslty it didn't taste THAT fresh.)


In [24]:
#Create a function through to process the example into the pre-trained model

def roberta_polarity_scores(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    scores_dict = {
        'roberta_neg': scores[0],
        'roberta_neu': scores[1],
        'roberta_pos': scores[2]
    }

    return scores_dict

In [25]:
#iterrate the function using a loop to each row in dataset
#Use Try and Except for any errors

res = {}

for i, row in tqdm(reviews.iterrows(), total=len(reviews)):
    try:
        text = row['Review']
        myid = row['Id']
        roberta_result = roberta_polarity_scores(text)
        res[myid] = roberta_result
        
    except RuntimeError:
        print(f'Broke for id {myid}')

100%|██████████| 10/10 [00:11<00:00,  1.15s/it]


In [32]:
#Create a DataFrame
results_df = pd.DataFrame(res)

results_df.head()

#Flip the columns and rows

results_df = results_df.T


results_df

Unnamed: 0,roberta_neg,roberta_neu,roberta_pos
0,0.002303,0.010709,0.986987
1,0.964183,0.032228,0.003589
2,0.977226,0.019545,0.003228
3,0.002215,0.029861,0.967925
4,0.002389,0.016415,0.981195
5,0.975816,0.021743,0.002441
6,0.769339,0.213488,0.017173
7,0.228633,0.704015,0.067352
8,0.002494,0.025748,0.971758
9,0.006676,0.113918,0.879407


In [33]:
results_df = results_df.reset_index().rename(columns={'index': 'Id'})

results_df = results_df.merge(reviews, how='left')

results_df

Unnamed: 0,Id,roberta_neg,roberta_neu,roberta_pos,Review,Liked
0,0,0.002303,0.010709,0.986987,Wow... Loved this place.,1
1,1,0.964183,0.032228,0.003589,Crust is not good.,0
2,2,0.977226,0.019545,0.003228,Not tasty and the texture was just nasty.,0
3,3,0.002215,0.029861,0.967925,Stopped by during the late May bank holiday of...,1
4,4,0.002389,0.016415,0.981195,The selection on the menu was great and so wer...,1
5,5,0.975816,0.021743,0.002441,Now I am getting angry and I want my damn pho.,0
6,6,0.769339,0.213488,0.017173,Honeslty it didn't taste THAT fresh.),0
7,7,0.228633,0.704015,0.067352,The potatoes were like rubber and you could te...,0
8,8,0.002494,0.025748,0.971758,The fries were great too.,1
9,9,0.006676,0.113918,0.879407,A great touch.,1


SAVE CSV FILE then ADD INTO DATABASE