In [6]:
import pandas as pd
books = pd.read_csv("/books_with_categories.csv")

In [7]:
#lets find a fine tuned model for our task
from transformers import pipeline
classifier = pipeline("text-classification",
                      model="j-hartmann/emotion-english-distilroberta-base",
                      top_k = None,
                      device = -1)
classifier("i am both polite and angry person")

Device set to use cpu


[[{'label': 'anger', 'score': 0.9544541835784912},
  {'label': 'joy', 'score': 0.036051928997039795},
  {'label': 'sadness', 'score': 0.0037840420845896006},
  {'label': 'disgust', 'score': 0.0020589889027178288},
  {'label': 'neutral', 'score': 0.0013774930266663432},
  {'label': 'fear', 'score': 0.001280306139960885},
  {'label': 'surprise', 'score': 0.0009930622763931751}]]

In [8]:
# now we need to decide which level of description we will apply to find the sentiment from the text
# 1 we can find overall sentiment of whole description using classifier
# 2.we can break them into individual sentences and them find

In [9]:
books["description"][0]

'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world ha

we can see that from the description that words in different sentences are totally differernt from one another .So applying classifier here and getting correct sentiment will be bit difficult.
Thats why we break them into discrete parts and then classify them

In [10]:
#see the exampke here that above description is full if mx sentences thats why model is prediction a score of 65% only
classifier(books["description"][0])

[[{'label': 'fear', 'score': 0.6548416614532471},
  {'label': 'neutral', 'score': 0.16985176503658295},
  {'label': 'sadness', 'score': 0.11640875041484833},
  {'label': 'surprise', 'score': 0.020700644701719284},
  {'label': 'disgust', 'score': 0.019100699573755264},
  {'label': 'joy', 'score': 0.0151612414047122},
  {'label': 'anger', 'score': 0.003935148473829031}]]

In [11]:
#break the description
classifier(books["description"][0].split("."))

[[{'label': 'surprise', 'score': 0.729602038860321},
  {'label': 'neutral', 'score': 0.14038637280464172},
  {'label': 'fear', 'score': 0.06816219538450241},
  {'label': 'joy', 'score': 0.047942470759153366},
  {'label': 'anger', 'score': 0.009156357496976852},
  {'label': 'disgust', 'score': 0.002628476358950138},
  {'label': 'sadness', 'score': 0.0021221644710749388}],
 [{'label': 'neutral', 'score': 0.44937166571617126},
  {'label': 'disgust', 'score': 0.273590624332428},
  {'label': 'joy', 'score': 0.10908307135105133},
  {'label': 'sadness', 'score': 0.09362711012363434},
  {'label': 'anger', 'score': 0.04047827422618866},
  {'label': 'surprise', 'score': 0.02697024680674076},
  {'label': 'fear', 'score': 0.006879054941236973}],
 [{'label': 'neutral', 'score': 0.6462171673774719},
  {'label': 'sadness', 'score': 0.2427321970462799},
  {'label': 'disgust', 'score': 0.04342259466648102},
  {'label': 'surprise', 'score': 0.02830052375793457},
  {'label': 'joy', 'score': 0.01421149168

In [12]:
sentences = books["description"][0].split(".")
predictions = classifier(sentences)

In [13]:
sentences[0]

'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'

In [14]:
predictions[0]

[{'label': 'surprise', 'score': 0.729602038860321},
 {'label': 'neutral', 'score': 0.14038637280464172},
 {'label': 'fear', 'score': 0.06816219538450241},
 {'label': 'joy', 'score': 0.047942470759153366},
 {'label': 'anger', 'score': 0.009156357496976852},
 {'label': 'disgust', 'score': 0.002628476358950138},
 {'label': 'sadness', 'score': 0.0021221644710749388}]

In [15]:
sentences[3]

' Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist'

In [16]:
predictions[3]

[{'label': 'fear', 'score': 0.9281687140464783},
 {'label': 'anger', 'score': 0.03219049796462059},
 {'label': 'neutral', 'score': 0.012808642350137234},
 {'label': 'sadness', 'score': 0.0087568499147892},
 {'label': 'surprise', 'score': 0.008597880601882935},
 {'label': 'disgust', 'score': 0.008431791327893734},
 {'label': 'joy', 'score': 0.0010455799056217074}]

In [17]:
sorted(predictions[0],key=lambda x: x["label"])

[{'label': 'anger', 'score': 0.009156357496976852},
 {'label': 'disgust', 'score': 0.002628476358950138},
 {'label': 'fear', 'score': 0.06816219538450241},
 {'label': 'joy', 'score': 0.047942470759153366},
 {'label': 'neutral', 'score': 0.14038637280464172},
 {'label': 'sadness', 'score': 0.0021221644710749388},
 {'label': 'surprise', 'score': 0.729602038860321}]

In [18]:
#lets now find the maximum probability for each description
emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
isbn = []
emotion_score = {label: [] for label in emotion_labels}
def calculate_max_emotion_score(predictions):
  per_emotion_scores = {label: [] for label in emotion_labels}
  for prediction in predictions:
    sorted_predictions = sorted(prediction, key=lambda x: x["label"])
    for index, label in enumerate(emotion_labels):
      per_emotion_scores[label].append(sorted_predictions[index]["score"])
  return {label: max(scores) for label, scores in per_emotion_scores.items()}

In [19]:
for i in range(10):
  isbn.append(books["isbn13"][i])
  sentences = books["description"][i].split(".")
  predictions = classifier(sentences)
  max_scores = calculate_max_emotion_score(predictions)
  for label  in emotion_labels:
    emotion_score[label].append(max_scores[label])

In [20]:
emotion_score

{'anger': [0.06413359940052032,
  0.6126187443733215,
  0.06413359940052032,
  0.3514831066131592,
  0.08141248673200607,
  0.23222537338733673,
  0.5381839871406555,
  0.06413359940052032,
  0.3006693422794342,
  0.06413359940052032],
 'disgust': [0.273590624332428,
  0.3482849895954132,
  0.10400664061307907,
  0.15072253346443176,
  0.18449537456035614,
  0.7271742224693298,
  0.15585489571094513,
  0.10400664061307907,
  0.2794814109802246,
  0.1779269278049469],
 'fear': [0.9281687140464783,
  0.9425276517868042,
  0.9723208546638489,
  0.36070701479911804,
  0.09504341334104538,
  0.05136280134320259,
  0.747428297996521,
  0.4044966399669647,
  0.9155242443084717,
  0.05136280134320259],
 'joy': [0.9327975511550903,
  0.7044214010238647,
  0.7672374248504639,
  0.2518806457519531,
  0.04056441783905029,
  0.04337577521800995,
  0.8725654482841492,
  0.04056441783905029,
  0.04056441783905029,
  0.04056441783905029],
 'neutral': [0.6462171673774719,
  0.887939453125,
  0.54947680

In [21]:
from tqdm import tqdm
emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
isbn = []
emotion_score = {label: [] for label in emotion_labels}
for i in tqdm(range(1000)):
  isbn.append(books["isbn13"][i])
  sentences = books["description"][i].split(".")
  predictions = classifier(sentences)
  max_scores = calculate_max_emotion_score(predictions)
  for label  in emotion_labels:
    emotion_score[label].append(max_scores[label])

100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s]


In [26]:
emotion_df = pd.DataFrame(emotion_score)
emotion_df["isbn13"] = isbn

In [27]:
emotion_df

Unnamed: 0,anger,disgust,fear,joy,neutral,sadness,surprise,isbn13
0,0.064134,0.273591,0.928169,0.932798,0.646217,0.967158,0.729602,9780002005883
1,0.612619,0.348285,0.942528,0.704421,0.887939,0.111690,0.252545,9780002261982
2,0.064134,0.104007,0.972321,0.767237,0.549477,0.111690,0.078766,9780006178736
3,0.351483,0.150723,0.360707,0.251881,0.732685,0.111690,0.078766,9780006280897
4,0.081412,0.184495,0.095043,0.040564,0.884390,0.475880,0.078766,9780006280934
...,...,...,...,...,...,...,...,...
995,0.064134,0.104007,0.051363,0.058638,0.820086,0.111690,0.078766,9780195069051
996,0.058834,0.073313,0.393855,0.088075,0.744850,0.160302,0.101237,9780195101287
997,0.415775,0.503980,0.089842,0.519029,0.914986,0.111690,0.832983,9780195108965
998,0.064134,0.104007,0.129100,0.040564,0.887440,0.387930,0.078766,9780195117950


In [28]:
books = pd.merge(books,emotion_df,on="isbn13")

In [29]:
books.to_csv("books_with_emotions.csv",index=False)