In [None]:
import pandas as pd

books = pd.read_csv("data/books_with_categories.csv")

In [None]:
from transformers import pipeline

classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None, device=0)

In [None]:
classifier("I like this!")

In [None]:
books["description"][0]

In [None]:
classifier(books["description"][0])

In [None]:
classifier(books['description'][0].split("."))

In [None]:
sentences = books["description"][0].split(".")
predictions = classifier(sentences)

In [None]:
sentences[0]

In [None]:
predictions[0]

In [None]:
sentences[3]

In [None]:
predictions[3]

In [None]:
sorted(predictions[0], key=lambda x: x['label'])

In [None]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
  per_emotion_scores = {label: [] for label in emotion_labels}
  for prediction in predictions:
    sorted_predictions = sorted(prediction, key=lambda x: x['label'])
    for index, label in enumerate(emotion_labels):
      per_emotion_scores[label].append(sorted_predictions[index]["score"])
  return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [None]:
for i in range(10):
  isbn.append(books["isbn13"][i])
  sentences = books["description"][i].split(".")
  predictions = classifier(sentences)
  max_scores = calculate_max_emotion_scores(predictions)
  for label in emotion_labels:
    emotion_scores[label].append(max_scores[label])

In [None]:
emotion_scores

In [None]:
from tqdm import tqdm

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
  isbn.append(books["isbn13"][i])
  sentences = books["description"][i].split(".")
  predictions = classifier(sentences)
  max_scores = calculate_max_emotion_scores(predictions)
  for label in emotion_labels:
    emotion_scores[label].append(max_scores[label])

In [None]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
emotions_df

In [None]:
books = pd.merge(books, emotions_df, on="isbn13")
books.head()

In [None]:
books.to_csv("data/books_with_emotions.csv", index=False)