In [1]:
import pandas as pd

books = pd.read_csv('books_with_categories.csv')

In [2]:
from transformers import pipeline

classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base",
                      top_k=None,
                      device=0)
classifier('I love this!')

Device set to use cuda:0


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.008528684265911579},
  {'label': 'neutral', 'score': 0.0057645998895168304},
  {'label': 'anger', 'score': 0.004419790115207434},
  {'label': 'sadness', 'score': 0.002092393347993493},
  {'label': 'disgust', 'score': 0.0016119939973577857},
  {'label': 'fear', 'score': 0.0004138524236623198}]]

In [3]:
classifier(books['description'][0].split('.'))

[[{'label': 'surprise', 'score': 0.7296026349067688},
  {'label': 'neutral', 'score': 0.14038580656051636},
  {'label': 'fear', 'score': 0.06816219538450241},
  {'label': 'joy', 'score': 0.04794240742921829},
  {'label': 'anger', 'score': 0.009156355634331703},
  {'label': 'disgust', 'score': 0.0026284761261194944},
  {'label': 'sadness', 'score': 0.0021221614442765713}],
 [{'label': 'neutral', 'score': 0.449370801448822},
  {'label': 'disgust', 'score': 0.27359142899513245},
  {'label': 'joy', 'score': 0.10908301174640656},
  {'label': 'sadness', 'score': 0.09362722933292389},
  {'label': 'anger', 'score': 0.04047824814915657},
  {'label': 'surprise', 'score': 0.026970194652676582},
  {'label': 'fear', 'score': 0.006879045628011227}],
 [{'label': 'neutral', 'score': 0.6462159156799316},
  {'label': 'sadness', 'score': 0.24273337423801422},
  {'label': 'disgust', 'score': 0.04342271015048027},
  {'label': 'surprise', 'score': 0.02830049768090248},
  {'label': 'joy', 'score': 0.01421145

In [4]:
sentences = books['description'][0].split('.')
predictions = classifier(sentences)
sorted(predictions[0], key=lambda x: x['label'])

[{'label': 'anger', 'score': 0.009156355634331703},
 {'label': 'disgust', 'score': 0.0026284761261194944},
 {'label': 'fear', 'score': 0.06816219538450241},
 {'label': 'joy', 'score': 0.04794240742921829},
 {'label': 'neutral', 'score': 0.14038580656051636},
 {'label': 'sadness', 'score': 0.0021221614442765713},
 {'label': 'surprise', 'score': 0.7296026349067688}]

In [7]:
import numpy as np

emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
isbn = []
emotion_scores = { label: [] for label in emotion_labels }

def calculate_max_emotion_score(predictions):
    per_emotion_scores = { label: [] for label in emotion_labels }
    for prediction in predictions:
        sorted_prediction = sorted(prediction, key=lambda x: x['label'])
        for index, label in enumerate(emotion_labels):
            per_emotion_scores[label].append(sorted_prediction[index]['score'])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}

In [10]:
from tqdm import tqdm

for i in tqdm(range(len(books))):
    isbn.append(books['isbn13'][i])
    sentences = books['description'][i].split('.')
    predictions = classifier(sentences)
    max_score = calculate_max_emotion_score(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_score[label])

100%|██████████| 5197/5197 [03:58<00:00, 21.81it/s]


In [12]:
emotion_df = pd.DataFrame(emotion_scores)
emotion_df['isbn13'] = isbn

In [13]:
emotion_df

Unnamed: 0,anger,disgust,fear,joy,neutral,sadness,surprise,isbn13
0,0.064134,0.273591,0.928168,0.932798,0.646216,0.967157,0.729603,9780002005883
1,0.612619,0.348285,0.942528,0.704421,0.887939,0.111690,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.111690,0.078765,9780006178736
3,0.351483,0.150722,0.360707,0.251881,0.732686,0.111690,0.078765,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.884390,0.475881,0.078765,9780006280934
...,...,...,...,...,...,...,...,...
5202,0.148208,0.030643,0.919165,0.255170,0.853721,0.980877,0.030656,9788172235222
5203,0.064134,0.114383,0.051363,0.400263,0.883198,0.111690,0.227765,9788173031014
5204,0.009997,0.009929,0.339218,0.947779,0.375755,0.066685,0.057625,9788179921623
5205,0.064134,0.104007,0.459270,0.759456,0.951104,0.368111,0.078765,9788185300535


In [14]:
books = pd.merge(books, emotion_df, on='isbn13')

In [15]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,title_and_subtitle,tagged_description,simple_categories,anger,disgust,fear,joy,neutral,sadness,surprise
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.064134,0.273591,0.928168,0.932798,0.646216,0.967157,0.729603
1,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.064134,0.273591,0.928168,0.932798,0.646216,0.967157,0.729603
2,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web:A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.612619,0.348285,0.942528,0.704421,0.887939,0.111690,0.252545
3,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web:A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.612619,0.348285,0.942528,0.704421,0.887939,0.111690,0.252545
4,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.064134,0.104007,0.972321,0.767237,0.549477,0.111690,0.078765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5202,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction,0.148208,0.030643,0.919165,0.255170,0.853721,0.980877,0.030656
5203,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction,0.064134,0.114383,0.051363,0.400263,0.883198,0.111690,0.227765
5204,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.009997,0.009929,0.339218,0.947779,0.375755,0.066685,0.057625
5205,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that:Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...,Nonfiction,0.064134,0.104007,0.459270,0.759456,0.951104,0.368111,0.078765


In [16]:
books.to_csv('books_with_emotions.csv', index=False)