In [1]:
import pandas as pd

books = pd.read_csv("final_books.csv")

In [3]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k= None, device="mps")
classifier("I love programming!")

Device set to use mps


[[{'label': 'joy', 'score': 0.9126096963882446},
  {'label': 'sadness', 'score': 0.03860821574926376},
  {'label': 'surprise', 'score': 0.02735244669020176},
  {'label': 'anger', 'score': 0.007762046996504068},
  {'label': 'neutral', 'score': 0.006822151131927967},
  {'label': 'fear', 'score': 0.005535698030143976},
  {'label': 'disgust', 'score': 0.001309773069806397}]]

In [4]:
classifier(books["description"][0])

[[{'label': 'fear', 'score': 0.654841423034668},
  {'label': 'neutral', 'score': 0.16985200345516205},
  {'label': 'sadness', 'score': 0.11640875786542892},
  {'label': 'surprise', 'score': 0.02070065401494503},
  {'label': 'disgust', 'score': 0.019100766628980637},
  {'label': 'joy', 'score': 0.015161258168518543},
  {'label': 'anger', 'score': 0.003935154993087053}]]

In [5]:
classifier(books["description"][0].split("."))

[[{'label': 'surprise', 'score': 0.7296027541160583},
  {'label': 'neutral', 'score': 0.1403856724500656},
  {'label': 'fear', 'score': 0.06816212832927704},
  {'label': 'joy', 'score': 0.047942448407411575},
  {'label': 'anger', 'score': 0.009156345389783382},
  {'label': 'disgust', 'score': 0.0026284728664904833},
  {'label': 'sadness', 'score': 0.002122161677107215}],
 [{'label': 'neutral', 'score': 0.44937166571617126},
  {'label': 'disgust', 'score': 0.2735905349254608},
  {'label': 'joy', 'score': 0.10908280313014984},
  {'label': 'sadness', 'score': 0.09362749755382538},
  {'label': 'anger', 'score': 0.040478236973285675},
  {'label': 'surprise', 'score': 0.02697022631764412},
  {'label': 'fear', 'score': 0.006879068911075592}],
 [{'label': 'neutral', 'score': 0.6462157964706421},
  {'label': 'sadness', 'score': 0.24273350834846497},
  {'label': 'disgust', 'score': 0.0434226468205452},
  {'label': 'surprise', 'score': 0.028300516307353973},
  {'label': 'joy', 'score': 0.01421145

In [6]:
sentences = books["description"][0].split(".")
predictions = classifier(sentences)
sentences[0]

'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'

In [7]:
predictions[0]

[{'label': 'surprise', 'score': 0.7296027541160583},
 {'label': 'neutral', 'score': 0.1403856724500656},
 {'label': 'fear', 'score': 0.06816212832927704},
 {'label': 'joy', 'score': 0.047942448407411575},
 {'label': 'anger', 'score': 0.009156345389783382},
 {'label': 'disgust', 'score': 0.0026284728664904833},
 {'label': 'sadness', 'score': 0.002122161677107215}]

In [9]:
sorted(predictions[0], key=lambda x: x["label"])

[{'label': 'anger', 'score': 0.009156345389783382},
 {'label': 'disgust', 'score': 0.0026284728664904833},
 {'label': 'fear', 'score': 0.06816212832927704},
 {'label': 'joy', 'score': 0.047942448407411575},
 {'label': 'neutral', 'score': 0.1403856724500656},
 {'label': 'sadness', 'score': 0.002122161677107215},
 {'label': 'surprise', 'score': 0.7296027541160583}]

In [13]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [14]:
for i in range(10):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

In [16]:
emotion_scores

{'anger': [np.float64(0.051973048597574234),
  np.float64(0.6126183867454529),
  np.float64(0.051973048597574234),
  np.float64(0.3514830768108368),
  np.float64(0.08141231536865234),
  np.float64(0.23222483694553375),
  np.float64(0.5381841063499451),
  np.float64(0.051973048597574234),
  np.float64(0.3006706237792969),
  np.float64(0.051973048597574234)],
 'disgust': [np.float64(0.2735905349254608),
  np.float64(0.348285436630249),
  np.float64(0.15766695141792297),
  np.float64(0.15766695141792297),
  np.float64(0.18449489772319794),
  np.float64(0.7271748185157776),
  np.float64(0.15766695141792297),
  np.float64(0.15766695141792297),
  np.float64(0.27948111295700073),
  np.float64(0.17792636156082153)],
 'fear': [np.float64(0.9281684160232544),
  np.float64(0.942527711391449),
  np.float64(0.9723208546638489),
  np.float64(0.3607070744037628),
  np.float64(0.09504328668117523),
  np.float64(0.038786713033914566),
  np.float64(0.747427761554718),
  np.float64(0.4044956862926483),
 

In [17]:
from tqdm import tqdm

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [05:55<00:00, 14.61it/s]


In [18]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
emotions_df.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.051973,0.273591,0.928168,0.932798,0.646216,0.967157,0.729603,9780002005883
1,0.612618,0.348285,0.942528,0.704422,0.88794,0.074825,0.252545,9780002261982
2,0.051973,0.157667,0.972321,0.767237,0.608933,0.074825,0.046931,9780006178736
3,0.351483,0.157667,0.360707,0.251881,0.732687,0.074825,0.046931,9780006280897
4,0.081412,0.184495,0.095043,0.035207,0.925904,0.475881,0.046931,9780006280934


In [19]:
books = pd.merge(books, emotions_df, on="isbn13")
books.to_csv("books_with_emotions.csv", index=False)