In [14]:
import pandas as pd

books = pd.read_csv('/content/books_with_categories.csv')

In [15]:
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k = None, device = 'cuda')
classifier("I love this!")


Device set to use cuda


[[{'label': 'joy', 'score': 0.9771687984466553},
  {'label': 'surprise', 'score': 0.00852868054062128},
  {'label': 'neutral', 'score': 0.005764591973274946},
  {'label': 'anger', 'score': 0.004419785924255848},
  {'label': 'sadness', 'score': 0.0020923891570419073},
  {'label': 'disgust', 'score': 0.001611991785466671},
  {'label': 'fear', 'score': 0.00041385178337804973}]]

In [16]:
sentences = books["description"][0].split(".")
predictions =  classifier(sentences)

In [31]:
sorted(predictions[0], key = lambda x:x['label'])

[{'label': 'anger', 'score': 0.0028371906373649836},
 {'label': 'disgust', 'score': 0.003136999439448118},
 {'label': 'fear', 'score': 0.0011662750039249659},
 {'label': 'joy', 'score': 0.9585492610931396},
 {'label': 'neutral', 'score': 0.02825172245502472},
 {'label': 'sadness', 'score': 0.0029162841383367777},
 {'label': 'surprise', 'score': 0.0031422844622284174}]

In [18]:
import numpy as np

emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']

In [19]:
isbn = []

emotion_scores = {label: [] for label in emotion_labels}

In [32]:
def calculate_max_emotion_score(predictions):
  per_emotion_scores = {label: [] for label in emotion_labels}
  for prediction in predictions:
    sorted_predictions = sorted(prediction, key = lambda x:x['label'])
    for index, label in enumerate(emotion_labels):
      per_emotion_scores[label].append(sorted_predictions[index]['score'])

  return {label: np.max(scores) for label, scores in per_emotion_scores.items()}


In [33]:
for i in range(10):
  isbn.append(books['isbn13'][i])
  sentences = books["description"][i].split(".")
  predictions =  classifier(sentences)
  max_emotion_scores = calculate_max_emotion_score(predictions)
  for label in emotion_labels:
    emotion_scores[label].append(max_emotion_scores[label])

In [34]:
emotion_scores

{'anger': [0.009156348183751106,
  0.040478333830833435,
  0.01103188470005989,
  0.03219102695584297,
  0.0018428893527016044,
  0.005025018472224474,
  0.00482131028547883,
  0.0641336739063263,
  np.float64(0.0641336739063263),
  0.005966844502836466,
  0.009242720901966095,
  0.01639065518975258,
  0.023093044757843018,
  0.6126185059547424,
  0.07713033258914948,
  0.0641336739063263,
  0.0641336739063263,
  0.00872359424829483,
  0.005810628179460764,
  np.float64(0.6126185059547424),
  0.041301045566797256,
  0.013167046941816807,
  0.0641336739063263,
  np.float64(0.6126185059547424),
  0.016036203131079674,
  0.006699250545352697,
  0.35148391127586365,
  0.0641336739063263,
  np.float64(0.6126185059547424),
  0.013624371029436588,
  0.05883360654115677,
  0.08141247183084488,
  0.05883360654115677,
  0.026564864441752434,
  0.006978129036724567,
  0.0641336739063263,
  np.float64(0.6126185059547424),
  0.009158486500382423,
  0.215225949883461,
  0.2322249710559845,
  0.06413

In [35]:
from tqdm import tqdm

isbn = []
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']
emotion_scores = {label: [] for label in emotion_labels}


for i in tqdm(range(len(books))):
  isbn.append(books['isbn13'][i])
  sentences = books["description"][i].split(".")
  predictions =  classifier(sentences)
  max_emotion_scores = calculate_max_emotion_score(predictions)
  for label in emotion_labels:
    emotion_scores[label].append(max_emotion_scores[label])

100%|██████████| 5595/5595 [02:27<00:00, 37.87it/s]


In [36]:
emotions_df =  pd.DataFrame(emotion_scores)
emotions_df['isbn13'] = isbn

In [37]:
books = pd.merge(books, emotions_df, on='isbn13')

In [38]:
books.to_csv('books_with_emotions.csv', index = False)

In [39]:
from google.colab import files
files.download('books_with_emotions.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>