In [1]:
import pandas as pd
books = pd.read_csv('books_with_categories.csv')

In [2]:
from transformers import pipeline
pipe = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base",top_k=None)


Device set to use cpu


In [3]:
pipe("I will never get a gf and die single")

[[{'label': 'sadness', 'score': 0.8042255640029907},
  {'label': 'anger', 'score': 0.10395780205726624},
  {'label': 'neutral', 'score': 0.04898032918572426},
  {'label': 'disgust', 'score': 0.02261645533144474},
  {'label': 'joy', 'score': 0.010619841516017914},
  {'label': 'fear', 'score': 0.00816002581268549},
  {'label': 'surprise', 'score': 0.00143995916005224}]]

In [4]:
short_desc = books.loc[(books["description"].str.len() < 250) & (books["description"].str.len() > 50)].reset_index()


In [5]:
sd = short_desc["description"][0]

In [6]:
sd

'"The Citadel of the Autarch brings The Book of the New sun to its harrowing conclusion, as Severiain clashes in a final reckoning with the dread Autarch, fulfilling an ancient prophesy that will alter forever the realm known as Urth." -- Back cover.'

In [7]:
pipe(sd)

[[{'label': 'fear', 'score': 0.9862549304962158},
  {'label': 'sadness', 'score': 0.004601126071065664},
  {'label': 'neutral', 'score': 0.004379679914563894},
  {'label': 'disgust', 'score': 0.0013374342815950513},
  {'label': 'anger', 'score': 0.0011936438968405128},
  {'label': 'joy', 'score': 0.0011608409695327282},
  {'label': 'surprise', 'score': 0.0010723872110247612}]]

In [15]:
import numpy as np

emotion_labels = ["anger", "disgust", "fear", "joy", "sadness", "surprise"]

def get_top_mean_emotion(predictions):
    per_emotion_scores = {label: [] for label in emotion_labels}

    for prediction in predictions:

        if not isinstance(prediction, list):
            prediction = [prediction]

        score_map = {p["label"]: p["score"] for p in prediction}
        for label in emotion_labels:
            per_emotion_scores[label].append(score_map.get(label, 0.0))

    mean_scores = {label: np.mean(scores) for label, scores in per_emotion_scores.items()}
    return max(mean_scores, key=mean_scores.get)


In [16]:
isb = []
emotions = []

from tqdm import tqdm

for i in tqdm(range(len(books))):
    isb.append(books["isbn13"][i])
    sentences = [s.strip() for s in books["description"][i].split(".") if s.strip()]
    sentences = sentences[:9]
    predictions = []
    for sentence in sentences:
        res = pipe(sentence)

        if len(res) == 1 and isinstance(res[0], list):
            res = res[0]
        predictions.append(res)

    top_emotion = get_top_mean_emotion(predictions)
    emotions.append(top_emotion)

100%|██████████| 6586/6586 [12:56<00:00,  8.48it/s] 


In [17]:
emotion_df = pd.DataFrame({"isbn13": isb, "emotion": emotions})

In [18]:
emotion_df

Unnamed: 0,isbn13,emotion
0,9780687002825,fear
1,9780974320620,surprise
2,9780974320620,surprise
3,9781904271062,fear
4,9780743223130,surprise
...,...,...
6581,9780310243878,surprise
6582,9781591162025,surprise
6583,9780440414001,sadness
6584,9780618339556,disgust


In [19]:
books = pd.merge(books, emotion_df, on = "isbn13")

In [20]:
books

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,missing_description,age,words_in_description,title_and_subtitle,tagged_description,simple_categories,emotion
0,9780687002825,0687002826,Exclusion and Embrace,"A Theological Exploration of Identity, Otherne...",Miroslav Volf,Religion,http://books.google.com/books/content?id=Cqto7...,Life at the end of the twentieth century prese...,1996.0,4.27,306.0,1895.0,0,28.0,920,Exclusion and Embrace: A Theological Explorati...,9780687002825: Life at the end of the twentiet...,Nonfiction,fear
1,9780974320620,,Emotional Intelligence 2.0,,"Travis Bradberry, Jean Greaves, Patrick Lencioni",,https://images.gr-assets.com/books/1328765863m...,"""Emotional Intelligence 2.0 succinctly explain...",2003.0,3.81,895.0,53384.0,0,21.0,681,Emotional Intelligence 2.0,"9780974320620: ""Emotional Intelligence 2.0 suc...",Nonfiction,surprise
2,9780974320620,,Emotional Intelligence 2.0,,"Travis Bradberry, Jean Greaves, Patrick Lencioni",,https://images.gr-assets.com/books/1328765863m...,"""Emotional Intelligence 2.0 succinctly explain...",2003.0,3.81,895.0,53384.0,0,21.0,681,Emotional Intelligence 2.0,"9780974320620: ""Emotional Intelligence 2.0 suc...",Nonfiction,surprise
3,9780974320620,,Emotional Intelligence 2.0,,"Travis Bradberry, Jean Greaves, Patrick Lencioni",,https://images.gr-assets.com/books/1328765863m...,"""Emotional Intelligence 2.0 succinctly explain...",2003.0,3.81,895.0,53384.0,0,21.0,681,Emotional Intelligence 2.0,"9780974320620: ""Emotional Intelligence 2.0 suc...",Nonfiction,surprise
4,9780974320620,,Emotional Intelligence 2.0,,"Travis Bradberry, Jean Greaves, Patrick Lencioni",,https://images.gr-assets.com/books/1328765863m...,"""Emotional Intelligence 2.0 succinctly explain...",2003.0,3.81,895.0,53384.0,0,21.0,681,Emotional Intelligence 2.0,"9780974320620: ""Emotional Intelligence 2.0 suc...",Nonfiction,surprise
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7173,9780310243878,0310243874,Breach of Promise,,James Scott Bell,Fiction,http://books.google.com/books/content?id=RNpBD...,Actor and father Mark Gillen finds out how far...,2004.0,4.00,345.0,358.0,0,20.0,25,Breach of Promise,9780310243878: Actor and father Mark Gillen fi...,Fiction,surprise
7174,9781591162025,1591162025,Inuyasha Ani-Manga,,Rumiko Takahashi,Comics & Graphic Novels,http://books.google.com/books/content?id=mf55s...,A teenage girl travels back in time to medieva...,2004.0,4.36,206.0,5787.0,0,20.0,26,Inuyasha Ani-Manga,9781591162025: A teenage girl travels back in ...,Fiction,surprise
7175,9780440414001,0440414008,"See You Around, Sam!",,Lois Lowry,Juvenile Fiction,http://books.google.com/books/content?id=-oXbx...,"Sam Krupnik, mad at his mother because she won...",1998.0,3.68,128.0,235.0,0,26.0,25,"See You Around, Sam!","9780440414001: Sam Krupnik, mad at his mother ...",Children's Fiction,sadness
7176,9780618339556,0618339558,Fergus and the Night-Demon,An Irish Ghost Story,Jim Murphy,Juvenile Fiction,http://books.google.com/books/content?id=OKeoo...,"On his way to town to have some fun, a lazy bu...",2006.0,3.71,32.0,70.0,0,18.0,26,Fergus and the Night-Demon: An Irish Ghost Story,9780618339556: On his way to town to have some...,Children's Fiction,disgust


In [21]:
books.to_csv("books_with_emotions.csv", index = False)