In [7]:
import pandas as pd
import numpy as np

from tqdm import tqdm

from transformers import pipeline

In [3]:
books      = pd.read_csv("books_with_categories.csv")
classifier = pipeline("text-classification", model = "j-hartmann/emotion-english-distilroberta-base", top_k = None)

Device set to use cpu


In [4]:
classifier("I love this!")

[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.008528684265911579},
  {'label': 'neutral', 'score': 0.005764591973274946},
  {'label': 'anger', 'score': 0.004419785924255848},
  {'label': 'sadness', 'score': 0.002092393347993493},
  {'label': 'disgust', 'score': 0.001611992483958602},
  {'label': 'fear', 'score': 0.0004138524236623198}]]

In [6]:
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surpirse", "neutral"]
isbn           = []
emotion_scores = {label : [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
  per_emotion_scores = {label : [] for label in emotion_labels}
  for prediction in predictions:
    sorted_predictions = sorted(prediction, key = lambda x : x["label"])
    for index, label in enumerate(emotion_labels):
      per_emotion_scores[label].append(sorted_predictions[index]["score"])
  return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

for i in range(10):
  isbn.append(books["isbn13"][i])
  sentences   = books["description"][i].split(".")
  predictions = classifier(sentences)
  max_scores  = calculate_max_emotion_scores(predictions)
  for label in emotion_labels:
    emotion_scores[label].append(max_scores[label])

print(emotion_scores)

{'anger': [np.float64(0.0641336739063263), np.float64(0.6126192212104797), np.float64(0.0641336739063263), np.float64(0.3514849543571472), np.float64(0.08141248673200607), np.float64(0.23222453892230988), np.float64(0.5381843447685242), np.float64(0.0641336739063263), np.float64(0.30066990852355957), np.float64(0.0641336739063263)], 'disgust': [np.float64(0.273592084646225), np.float64(0.348284512758255), np.float64(0.10400678217411041), np.float64(0.15072233974933624), np.float64(0.18449552357196808), np.float64(0.7271750569343567), np.float64(0.15585479140281677), np.float64(0.10400678217411041), np.float64(0.279481440782547), np.float64(0.17792588472366333)], 'fear': [np.float64(0.9281684160232544), np.float64(0.9425276517868042), np.float64(0.9723207950592041), np.float64(0.3607054650783539), np.float64(0.09504339098930359), np.float64(0.051362860947847366), np.float64(0.7474274039268494), np.float64(0.40449756383895874), np.float64(0.9155241250991821), np.float64(0.051362860947847

In [8]:
emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn           = []
emotion_scores = {label : [] for label in emotion_labels}

for i in tqdm (range(len(books))):
  isbn.append(books["isbn13"][i])
  sentences   = books["description"][i].split(".")
  predictions = classifier(sentences)
  max_scores  = calculate_max_emotion_scores(predictions)
  for label in emotion_labels:
    emotion_scores[label].append(max_scores[label])

emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
print(emotions_df.head())

100%|██████████| 5197/5197 [1:02:08<00:00,  1.39it/s]

      anger   disgust      fear       joy   sadness  surprise   neutral  \
0  0.064134  0.273592  0.928168  0.932798  0.646215  0.967158  0.729602   
1  0.612619  0.348285  0.942528  0.704422  0.887940  0.111690  0.252546   
2  0.064134  0.104007  0.972321  0.767239  0.549477  0.111690  0.078765   
3  0.351485  0.150722  0.360705  0.251881  0.732685  0.111690  0.078765   
4  0.081412  0.184496  0.095043  0.040564  0.884390  0.475880  0.078765   

          isbn13  
0  9780002005883  
1  9780002261982  
2  9780006178736  
3  9780006280897  
4  9780006280934  





In [9]:
books = pd.merge(books, emotions_df, on = "isbn13")
print(books)
books.to_csv("books_with_emotions.csv", index = False)

             isbn13      isbn10  \
0     9780002005883  0002005883   
1     9780002261982  0002261987   
2     9780006178736  0006178731   
3     9780006280897  0006280897   
4     9780006280934  0006280935   
...             ...         ...   
5192  9788172235222  8172235224   
5193  9788173031014  8173031010   
5194  9788179921623  817992162X   
5195  9788185300535  8185300534   
5196  9789027712059  9027712050   

                                                  title  \
0                                                Gilead   
1                                          Spider's Web   
2                                        Rage of angels   
3                                        The Four Loves   
4                                   The Problem of Pain   
...                                                 ...   
5192                                  Mistaken Identity   
5193                                Journey to the East   
5194  The Monk Who Sold His Ferrari: A Fable Abo