In [2]:
import pandas as pd 

books = pd.read_csv("books_with_cats.csv")

In [3]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None, device=0)
classifier('i want to laugh')

Device set to use cuda:0


[[{'label': 'joy', 'score': 0.9880262613296509},
  {'label': 'sadness', 'score': 0.005980018991976976},
  {'label': 'anger', 'score': 0.001999705797061324},
  {'label': 'surprise', 'score': 0.0016898228786885738},
  {'label': 'neutral', 'score': 0.001657860935665667},
  {'label': 'fear', 'score': 0.0004096422635484487},
  {'label': 'disgust', 'score': 0.00023663256433792412}]]

In [5]:
sentences = books["description"][0].split('.')
predictions = classifier(sentences)

In [6]:
predictions[0]

[{'label': 'surprise', 'score': 0.7296028733253479},
 {'label': 'neutral', 'score': 0.14038555324077606},
 {'label': 'fear', 'score': 0.06816219538450241},
 {'label': 'joy', 'score': 0.047942399978637695},
 {'label': 'anger', 'score': 0.009156350046396255},
 {'label': 'disgust', 'score': 0.002628474496304989},
 {'label': 'sadness', 'score': 0.002122160978615284}]

In [7]:
sorted(predictions[0], key=lambda x: x["label"])

[{'label': 'anger', 'score': 0.009156350046396255},
 {'label': 'disgust', 'score': 0.002628474496304989},
 {'label': 'fear', 'score': 0.06816219538450241},
 {'label': 'joy', 'score': 0.047942399978637695},
 {'label': 'neutral', 'score': 0.14038555324077606},
 {'label': 'sadness', 'score': 0.002122160978615284},
 {'label': 'surprise', 'score': 0.7296028733253479}]

In [14]:
import numpy as np 

# List of possible emotion labels
emotion_labels = ["anger","disgust","fear","joy","sadness","surprise","neutral"]

# List to store ISBNs (currently empty)
isbn = []

# Dictionary to store emotion scores for each label
emotion_scores = {label :[] for label in emotion_labels}

# Function to calculate the maximum score for each emotion across predictions
def calc_max_emotion_scores(predictions):
    # Create a dictionary to collect scores for each emotion label
    per_emotion_scores = {label: [] for label in emotion_labels}
    # Loop through each prediction (one per sentence)
    for prediction in predictions:
        # Sort predictions by label to align with emotion_labels order
        sorted_predictions = sorted(prediction, key=lambda x: x["label"])
        # For each emotion label, add the score to the corresponding list
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    # For each emotion, return the maximum score found across all sentences
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [17]:
from tqdm import tqdm

# List of possible emotion labels
emotion_labels = ["anger","disgust","fear","joy","sadness","surprise","neutral"]

# List to store ISBNs (currently empty)
isbn = []

# Dictionary to store emotion scores for each label
emotion_scores = {label :[] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books["description"][i].split(".")
    predictions = classifier(sentences)
    max_scores = calc_max_emotion_scores(predictions)
    for label in emotion_labels:    
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [01:48<00:00, 47.75it/s]


In [19]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
emotions_df.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.064134,0.273592,0.928168,0.932797,0.646217,0.967158,0.729603,9780002005883
1,0.612618,0.348285,0.942528,0.704422,0.887939,0.11169,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.11169,0.078765,9780006178736
3,0.351484,0.150723,0.360707,0.251881,0.732685,0.11169,0.078765,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.88439,0.475881,0.078765,9780006280934


In [20]:
books = pd.merge(books, emotions_df, on = "isbn13")

In [21]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,simple_categories,predicted_categories_x,predicted_categories_y,anger,disgust,fear,joy,sadness,surprise,neutral
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Fiction,,,0.064134,0.273592,0.928168,0.932797,0.646217,0.967158,0.729603
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Fiction,Fiction,Fiction,0.612618,0.348285,0.942528,0.704422,0.887939,0.111690,0.252545
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Fiction,,,0.064134,0.104007,0.972321,0.767237,0.549477,0.111690,0.078765
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,Nonfiction,Nonfiction,Nonfiction,0.351484,0.150723,0.360707,0.251881,0.732685,0.111690,0.078765
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,Nonfiction,Nonfiction,Nonfiction,0.081412,0.184495,0.095043,0.040564,0.884390,0.475881,0.078765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Fiction,Fiction,Fiction,0.148208,0.030643,0.919165,0.255170,0.853722,0.980877,0.030656
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Nonfiction,Nonfiction,Nonfiction,0.064134,0.114383,0.051363,0.400263,0.883198,0.111690,0.227765
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,Fiction,Fiction,Fiction,0.009997,0.009929,0.339218,0.947779,0.375755,0.066685,0.057625
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,Nonfiction,,,0.064134,0.104007,0.459269,0.759456,0.951104,0.368110,0.078765


In [22]:
books.to_csv("books_with_emotion.csv",index=False)