In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import pandas as pd
nltk.download('vader_lexicon')
import plotly.express as px 
import plotly.graph_objs as pgo
import matplotlib.pyplot as plt


In [None]:
# Load the data into a Pandas dataframe
df = pd.read_csv("df_clean.csv",encoding='utf8')
df.head()

In [None]:
# Initialize the sentiment analyzer
sentiment = SentimentIntensityAnalyzer()

In [None]:
# Predict sentiments using SentimentIntensityAnalyzer
df['Sentiment'] = [sentiment.polarity_scores(text)["compound"] for text in df["Text"]]


In [None]:
# tags for the sentiment values
df['sentiment_tag'] = ['Positive' if label >= 0.05 else 'Negative' if label <= -0.05 else 'Neutral' for label in df['sentiment']]


In [None]:
# sentiment score from the sentiment values
df['sentiment_score'] = [int(((label + 1) * 5 / 2) + 1) for label in df['sentiment']]


In [None]:
from pathlib import Path  
filepath = Path('df_sentimentresults.csv')
filepath.parent.mkdir(parents=True, exist_ok=True) 
df.to_csv(filepath)

In [None]:
from sklearn.metrics import precision_recall_fscore_support

true_labels = df['Score']
predicted_labels = df['sentiment_score']

precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

true_labels = df['Score']
predicted_labels = df['sentiment_score']

accuracy = accuracy_score(true_labels, predicted_labels)
conf_matrix = confusion_matrix(true_labels, predicted_labels)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


In [None]:
df_1=df.sentiment_tag.value_counts().to_frame()
# Convert the index to a column
df_1 = df_1.reset_index()

# Rename the columns if desired
df_1.rename(columns={'index': 'sentiment_label','sentiment_tag':'values'}, inplace=True)
# Pie chart
labels = df_1['sentiment_label']
sizes = df_1['values']

fig1 = px.pie(df_1, values=sizes, names=labels, hole=.3,title='Sentiment tags distribution')
fig1.write_html("Sentiment tags distribution.html")

fig1.show()


### "Users who frequently provides reviews"

In [None]:
df=pd.read_csv('df_clean.csv')
user_id_counts = df['UserId'].value_counts()
labels = user_id_counts[user_id_counts > 100].index
labels

In [None]:
import numpy as np
df_filtered = df.loc[df['UserId'].isin(labels)]
df_filtered=df_filtered.groupby(["UserId"]).agg(mean_score=('Score',np.mean)).sort_values(by="mean_score",ascending=False).reset_index()
df_filtered.head()

In [None]:
fig2=px.bar(df_filtered,x='UserId',y='mean_score',title="Users who frequently provides reviews")
fig2.write_html("Users who frequently provides reviews.html")
fig2.show()

### "Products average score"

In [None]:
# calculating average score for each product id
import numpy as np
df_temp=df.groupby(["ProductId"]).agg(mean_score=('Score',np.mean)).sort_values(by="mean_score",ascending=False).reset_index()
df_temp.head()

In [None]:
# rating tag creation
def condition(x):
    if x==5:
        return "top rating"
    elif x>=4:
        return "4+"
    elif x>=3:
        return "3+"
    elif x>=2:
        return "2+"
    elif x>=1:
        return "least rating"
    
# Applying the conditions
df_temp['rating'] = df_temp['mean_score'].apply(condition)

In [None]:
df_temp.head()

In [None]:
#product id count with rating tags
values=df_temp.rating.value_counts().to_frame()
# Convert the index to a column
values = values.reset_index()

# Rename the columns if desired
values.rename(columns={'index': 'rating_category'}, inplace=True)
values

In [None]:
import matplotlib.pyplot as plt
# Pie chart
labels = values['rating_category']
sizes = values['rating']
fig3 = px.pie(df_1, values=sizes, names=labels, hole=.3,title='Products average score distribution')
fig3.write_html("Products average score distribution.html")
fig3.show()