In [2]:
import pandas as pd

books = pd.read_csv('books_with_categories.csv')

In [3]:
from transformers import pipeline

classifier = pipeline("text-classification", 
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k = None,
                        device = "mps")

classifier("Hello")

Device set to use mps


[[{'label': 'neutral', 'score': 0.9381638765335083},
  {'label': 'surprise', 'score': 0.02487390860915184},
  {'label': 'fear', 'score': 0.011052035726606846},
  {'label': 'sadness', 'score': 0.008561934344470501},
  {'label': 'disgust', 'score': 0.008296279236674309},
  {'label': 'anger', 'score': 0.005248758010566235},
  {'label': 'joy', 'score': 0.0038032936863601208}]]

In [4]:
classifier(books["description"][0].split("."))

[[{'label': 'surprise', 'score': 0.7296023964881897},
  {'label': 'neutral', 'score': 0.14038589596748352},
  {'label': 'fear', 'score': 0.06816217303276062},
  {'label': 'joy', 'score': 0.04794251546263695},
  {'label': 'anger', 'score': 0.009156357496976852},
  {'label': 'disgust', 'score': 0.002628477755934},
  {'label': 'sadness', 'score': 0.0021221647039055824}],
 [{'label': 'neutral', 'score': 0.4493715167045593},
  {'label': 'disgust', 'score': 0.27359068393707275},
  {'label': 'joy', 'score': 0.10908294469118118},
  {'label': 'sadness', 'score': 0.09362741559743881},
  {'label': 'anger', 'score': 0.040478210896253586},
  {'label': 'surprise', 'score': 0.026970192790031433},
  {'label': 'fear', 'score': 0.006879056338220835}],
 [{'label': 'neutral', 'score': 0.6462160348892212},
  {'label': 'sadness', 'score': 0.24273338913917542},
  {'label': 'disgust', 'score': 0.04342262074351311},
  {'label': 'surprise', 'score': 0.028300566598773003},
  {'label': 'joy', 'score': 0.014211431

In [5]:
#predictions
sentences = books['description'][0].split('.')
predictions = classifier(sentences)
predictions

[[{'label': 'surprise', 'score': 0.7296023964881897},
  {'label': 'neutral', 'score': 0.14038589596748352},
  {'label': 'fear', 'score': 0.06816217303276062},
  {'label': 'joy', 'score': 0.04794251546263695},
  {'label': 'anger', 'score': 0.009156357496976852},
  {'label': 'disgust', 'score': 0.002628477755934},
  {'label': 'sadness', 'score': 0.0021221647039055824}],
 [{'label': 'neutral', 'score': 0.4493715167045593},
  {'label': 'disgust', 'score': 0.27359068393707275},
  {'label': 'joy', 'score': 0.10908294469118118},
  {'label': 'sadness', 'score': 0.09362741559743881},
  {'label': 'anger', 'score': 0.040478210896253586},
  {'label': 'surprise', 'score': 0.026970192790031433},
  {'label': 'fear', 'score': 0.006879056338220835}],
 [{'label': 'neutral', 'score': 0.6462160348892212},
  {'label': 'sadness', 'score': 0.24273338913917542},
  {'label': 'disgust', 'score': 0.04342262074351311},
  {'label': 'surprise', 'score': 0.028300566598773003},
  {'label': 'joy', 'score': 0.014211431

In [6]:
import numpy as np

emotion_labels = ["anger","disgust","fear", "joy", "sadness","surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}


def calculate_max_emotion_scores(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}
    for prediction in predictions:
        sorted_predictions = sorted(prediction, key=lambda x: ["label"])
        for index, label in enumerate(emotion_labels) :
            per_emotion_scores[label].append(sorted_predictions[index]["score"])
    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}



In [7]:
for i in range(10):
    sentences = books['description'][i].split('.')
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

emotion_scores

{'anger': [0.9671575427055359,
  0.9425276517868042,
  0.9723208546638489,
  0.732686460018158,
  0.8843896389007568,
  0.7271744608879089,
  0.8725653886795044,
  0.8202819228172302,
  0.9155239462852478,
  0.8603722453117371],
 'disgust': [0.27359068393707275,
  0.31934991478919983,
  0.11169011890888214,
  0.35148343443870544,
  0.2726134657859802,
  0.2719022333621979,
  0.3853578567504883,
  0.2921660244464874,
  0.27948132157325745,
  0.17792697250843048],
 'fear': [0.10908294469118118,
  0.19543583691120148,
  0.1040065586566925,
  0.15072256326675415,
  0.12224285304546356,
  0.11962244659662247,
  0.1852913796901703,
  0.2344881147146225,
  0.24911761283874512,
  0.1040065586566925],
 'joy': [0.09362741559743881,
  0.14334645867347717,
  0.0787653997540474,
  0.07967247068881989,
  0.09504333138465881,
  0.102390356361866,
  0.08459077775478363,
  0.0787653997540474,
  0.13561491668224335,
  0.0787653997540474],
 'sadness': [0.06413353234529495,
  0.07712996006011963,
  0.0641

In [8]:
from tqdm import tqdm

emotion_labels = ["anger","disgust","fear", "joy", "sadness","surprise", "neutral"]
isbn = []
emotion_scores = {label: [] for label in emotion_labels}

for i in tqdm(range(len(books))):
    isbn.append(books["isbn13"][i])
    sentences = books['description'][i].split('.')
    predictions = classifier(sentences)
    max_scores = calculate_max_emotion_scores(predictions)
    for label in emotion_labels:
        emotion_scores[label].append(max_scores[label])

100%|██████████| 5197/5197 [04:43<00:00, 18.33it/s]


In [9]:
emotions_df = pd.DataFrame(emotion_scores)
emotions_df["isbn13"] = isbn
emotions_df

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,isbn13
0,0.967158,0.273591,0.109083,0.093627,0.064134,0.051363,0.040564,9780002005883
1,0.942528,0.319350,0.195436,0.143346,0.077130,0.051363,0.040564,9780002261982
2,0.972321,0.111690,0.104007,0.078765,0.064134,0.051363,0.040564,9780006178736
3,0.732686,0.351483,0.150723,0.079672,0.064134,0.051363,0.040564,9780006280897
4,0.884390,0.272613,0.122243,0.095043,0.064134,0.051363,0.040564,9780006280934
...,...,...,...,...,...,...,...,...
5192,0.980877,0.305738,0.148208,0.127783,0.043363,0.030656,0.009569,9788172235222
5193,0.883198,0.338892,0.227765,0.078765,0.064134,0.051363,0.040564,9788173031014
5194,0.947779,0.339217,0.141734,0.066685,0.057625,0.009929,0.009055,9788179921623
5195,0.951104,0.368110,0.214132,0.078765,0.064134,0.051363,0.040564,9788185300535


In [10]:
books = pd.merge(books, emotions_df, on="isbn13")
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,...,newtitle,tagged_description,simple_categories,anger,disgust,fear,joy,sadness,surprise,neutral
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,...,Gilead,9780002005883 A NOVEL THAT READERS and critics...,Fiction,0.967158,0.273591,0.109083,0.093627,0.064134,0.051363,0.040564
1,9780002261982,0002261987,Spider's Web A Novel A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,...,Spider's Web A Novel A Novel:| A Novel,9780002261982 A new 'Christie for Christmas' -...,Fiction,0.942528,0.319350,0.195436,0.143346,0.077130,0.051363,0.040564
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,...,Rage of angels,"9780006178736 A memorable, mesmerizing heroine...",Fiction,0.972321,0.111690,0.104007,0.078765,0.064134,0.051363,0.040564
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,...,The Four Loves,9780006280897 Lewis' work on the nature of lov...,Nonfiction,0.732686,0.351483,0.150723,0.079672,0.064134,0.051363,0.040564
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,...,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le...",Nonfiction,0.884390,0.272613,0.122243,0.095043,0.064134,0.051363,0.040564
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,...,Mistaken Identity,9788172235222 On A Train Journey Home To North...,Fiction,0.980877,0.305738,0.148208,0.127783,0.043363,0.030656,0.009569
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,...,Journey to the East,9788173031014 This book tells the tale of a ma...,Nonfiction,0.883198,0.338892,0.227765,0.078765,0.064134,0.051363,0.040564
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,...,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...,Fiction,0.947779,0.339217,0.141734,0.066685,0.057625,0.009929,0.009055
5195,9788185300535,8185300534,I Am that Talks with Sri Nisargadatta Maharaj ...,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,...,I Am that Talks with Sri Nisargadatta Maharaj ...,9788185300535 This collection of the timeless ...,Nonfiction,0.951104,0.368110,0.214132,0.078765,0.064134,0.051363,0.040564


In [11]:
books.to_csv('books_with_emotions.csv',index=False)