In [4]:
import pandas as pd
books = pd.read_csv("books_with_categories.csv")

In [6]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base",return_all_scores=True)
classifier("I love this!")

Device set to use mps:0


[[{'label': 'anger', 'score': 0.004419783595949411},
  {'label': 'disgust', 'score': 0.001611992483958602},
  {'label': 'fear', 'score': 0.00041385198710486293},
  {'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'neutral', 'score': 0.00576459476724267},
  {'label': 'sadness', 'score': 0.002092391485348344},
  {'label': 'surprise', 'score': 0.00852868054062128}]]

In [7]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base",top_k=None, device="mps")
classifier("I love this!")

Device set to use mps


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.00852868054062128},
  {'label': 'neutral', 'score': 0.00576459476724267},
  {'label': 'anger', 'score': 0.004419783595949411},
  {'label': 'sadness', 'score': 0.002092391485348344},
  {'label': 'disgust', 'score': 0.001611992483958602},
  {'label': 'fear', 'score': 0.00041385198710486293}]]

In [8]:
classifier(books["description"][0])

[[{'label': 'fear', 'score': 0.6548422574996948},
  {'label': 'neutral', 'score': 0.16985155642032623},
  {'label': 'sadness', 'score': 0.11640843003988266},
  {'label': 'surprise', 'score': 0.020700642839074135},
  {'label': 'disgust', 'score': 0.019100701436400414},
  {'label': 'joy', 'score': 0.015161211602389812},
  {'label': 'anger', 'score': 0.003935141488909721}]]

In [9]:
classifier(books["description"][0].split("."))

[[{'label': 'surprise', 'score': 0.7296021580696106},
  {'label': 'neutral', 'score': 0.14038625359535217},
  {'label': 'fear', 'score': 0.06816215068101883},
  {'label': 'joy', 'score': 0.0479423962533474},
  {'label': 'anger', 'score': 0.009156353771686554},
  {'label': 'disgust', 'score': 0.0026284793857485056},
  {'label': 'sadness', 'score': 0.0021221640054136515}],
 [{'label': 'neutral', 'score': 0.44937190413475037},
  {'label': 'disgust', 'score': 0.2735902965068817},
  {'label': 'joy', 'score': 0.1090828999876976},
  {'label': 'sadness', 'score': 0.09362740814685822},
  {'label': 'anger', 'score': 0.04047819972038269},
  {'label': 'surprise', 'score': 0.02697022259235382},
  {'label': 'fear', 'score': 0.00687906239181757}],
 [{'label': 'neutral', 'score': 0.6462162137031555},
  {'label': 'sadness', 'score': 0.24273328483104706},
  {'label': 'disgust', 'score': 0.043422579765319824},
  {'label': 'surprise', 'score': 0.028300493955612183},
  {'label': 'joy', 'score': 0.014211435

In [10]:
import numpy as np
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}



In [11]:
for i in range(10):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])
    
    

In [12]:
from tqdm import tqdm

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}


for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [09:01<00:00,  9.61it/s]  


In [13]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn

In [14]:
emotions_df

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.064134,0.273590,0.928169,0.932798,0.646216,0.967158,0.729602,9780002005883
1,0.612619,0.348286,0.942528,0.704422,0.887940,0.111690,0.252544,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.111690,0.078765,9780006178736
3,0.351483,0.150722,0.360707,0.251881,0.732687,0.111690,0.078765,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.884389,0.475881,0.078765,9780006280934
...,...,...,...,...,...,...,...,...
5192,0.148208,0.030643,0.919165,0.255170,0.853723,0.980877,0.030656,9788172235222
5193,0.064134,0.114383,0.051363,0.400263,0.883198,0.111690,0.227765,9788173031014
5194,0.009997,0.009929,0.339217,0.947779,0.375755,0.066685,0.057625,9788179921623
5195,0.064134,0.104007,0.459270,0.759455,0.951104,0.368110,0.078765,9788185300535


In [15]:
books = pd.merge(books, emotions_df, on = "isbn13")

In [16]:
books.to_csv("books_with_emotions.csv", index=False)