<a href="https://colab.research.google.com/github/Liza-IITP/Book-Recommendation-Based-on-Semantic-Similarity/blob/main/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
df = pd.read_csv("/content/books_cleaned_with_categ.csv")
df.columns

Index(['isbn13', 'isbn10', 'title', 'authors', 'categories', 'thumbnail',
       'description', 'published_year', 'average_rating', 'num_pages',
       'ratings_count', 'titles_subtitles', 'tagged_description',
       'simple_categ_x'],
      dtype='object')

In [None]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
classifier("I love this!")

In [4]:
sentences = df['description'][0].split(".")
prediction = classifier(sentences)

In [5]:
sorted(prediction[0],key=lambda x: x['label'])

[{'label': 'anger', 'score': 0.009156372398138046},
 {'label': 'disgust', 'score': 0.002628477057442069},
 {'label': 'fear', 'score': 0.06816234439611435},
 {'label': 'joy', 'score': 0.047942597419023514},
 {'label': 'neutral', 'score': 0.14038586616516113},
 {'label': 'sadness', 'score': 0.002122163074091077},
 {'label': 'surprise', 'score': 0.7296022176742554}]

In [6]:
emotion_labels = ["anger","disgust","fear","joy","neutral","sadness","surprise"]

In [7]:
isbn = []
emotion_scores = {label : [] for label in emotion_labels}

In [8]:
import numpy as np

def calculate_max_emotion_scores(prediction):
    per_emotion_scores = {label: [] for label in emotion_labels}

    for sentence_pred in prediction:
      sorted_pred = sorted(sentence_pred ,key = lambda x:x["label"])

      for index,label in enumerate(emotion_labels) :
          per_emotion_scores[label].append(sorted_pred[index]["score"])

    return {label: float(np.max(scores)) for label, scores in per_emotion_scores.items()}


In [9]:
for i in range(5) :
  isbn.append(df["isbn13"][i])
  sentences = df["description"][i].split(".")
  prediction = classifier(sentences)
  max_scores = calculate_max_emotion_scores(prediction)
  for label in emotion_labels :
    emotion_scores[label].append(max_scores[label])

In [10]:
emotion_scores

{'anger': [0.0641336739063263,
  0.6126192212104797,
  0.0641336739063263,
  0.3514849543571472,
  0.08141248673200607],
 'disgust': [0.273592084646225,
  0.348284512758255,
  0.10400678217411041,
  0.15072233974933624,
  0.18449552357196808],
 'fear': [0.9281684160232544,
  0.9425276517868042,
  0.9723207950592041,
  0.3607054650783539,
  0.09504339098930359],
 'joy': [0.9327981472015381,
  0.7044220566749573,
  0.7672385573387146,
  0.25188079476356506,
  0.04056445509195328],
 'neutral': [0.6462154984474182,
  0.8879395127296448,
  0.5494765043258667,
  0.7326849102973938,
  0.8843895196914673],
 'sadness': [0.9671575427055359,
  0.11169024556875229,
  0.11169024556875229,
  0.11169024556875229,
  0.47588038444519043],
 'surprise': [0.7296022176742554,
  0.2525462210178375,
  0.07876549661159515,
  0.07876549661159515,
  0.07876549661159515]}

In [18]:
isbn = []
emotion_scores = {label : [] for label in emotion_labels}
from tqdm import tqdm
for i in range(len(df)) :
  isbn.append(df["isbn13"][i])
  sentences = df["description"][i].split(".")
  prediction = classifier(sentences)
  max_scores = calculate_max_emotion_scores(prediction)
  for label in emotion_labels :
    emotion_scores[label].append(max_scores[label])

In [14]:
emotionsDf = pd.DataFrame(emotion_scores)
emotionsDf["isbn13"] = isbn

In [None]:
emotionsDf.head()

In [24]:
df = df.drop(columns=[c for c in df.columns if c.endswith('_x') or c.endswith('_y')])

In [26]:
df.to_csv(
    "books_with_emotions.csv",
    index=False,
    encoding="utf-8",
    quoting=1  )
