In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import os
import seaborn as sns

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

plt.style.use("ggplot")

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
data = pd.read_csv("/kaggle/input/amazon-fine-food-reviews/Reviews.csv")
print(data.shape)
plots = data["Score"].value_counts().sort_index().plot(kind = "bar", title= "count by stars", figsize = (10, 5))
plots.set_xlabel("Review Score")
plots.set_ylabel("Number of users")
plt.show()

In [None]:
model = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model)
net = AutoModelForSequenceClassification.from_pretrained(model)

In [None]:
intensity = SentimentIntensityAnalyzer()
#quick_analysis = input("Enter a statement to check its sentiment score")
#print(intensity.polarity_scores(quick_analysis))


def scoring_roberta_vaders(id_value, text):
    roberta_token = tokenizer(text, return_tensors="pt")
    outcome = net(**roberta_token)
    roberta = softmax(outcome[0][0].detach().numpy())
    vaders =intensity.polarity_scores(text)
    roberta_dict = {"roberta_neg": roberta[0],
                    "roberta_neu": roberta[1],
                    "roberta_pos": roberta[2]}
    combined = {**roberta_dict, **vaders}
    sentiment[id_value] = combined
    
sentiment = {}

for i, reviews in enumerate(tqdm((data["Text"]))):
    id_value = data['Id'][i]
    text = data["Text"][id_value]
    try:
        scoring_roberta_vaders(id_value, text)
    except RuntimeError:
        print(f"Can't run on Id: {i}")


In [None]:
roberts_vaders_sen= pd.DataFrame(sentiment).T
roberts_vaders_sen = roberts_vaders_sen.reset_index().rename(columns= {"index":"Id"})
roberts_vaders_sen = roberts_vaders_sen.merge(roberta_sen)
roberts_vaders_sen.head(5)

In [None]:
sns.pairplot(data = roberts_vaders_sen,
            vars = ["roberta_neg","roberta_neu","roberta_pos","neg","neu", "pos"],
            hue = "Score",
            palette = "tab10")
plt.show()

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(20, 8), sharex="col")
sns.barplot(data=roberts_vaders_sen, x="Score", y="neg", ax=ax[0][0])
sns.barplot(data=roberts_vaders_sen, x="Score", y="neu", ax=ax[0][1])
sns.barplot(data=roberts_vaders_sen, x="Score", y="pos", ax=ax[0][2])
sns.barplot(data=roberts_vaders_sen, x="Score", y="roberta_neg", ax=ax[1][0])
sns.barplot(data=roberts_vaders_sen, x="Score", y="roberta_neu", ax=ax[1][1])
sns.barplot(data=roberts_vaders_sen, x="Score", y="roberta_pos", ax=ax[1][2])
ax[0][0].set_title("VadersNegative")
ax[0][1].set_title("VadersNeutral")
ax[0][2].set_title("VadersPositive")
ax[1][0].set_title("roberta_neg")
ax[1][1].set_title("roberta_neu")
ax[1][2].set_title("roberta_pos")
plt.show()