In [None]:
import pandas as pd
import numpy as np
books = pd.read_csv("../data/books_classified.csv")

In [None]:
from transformers import pipeline

# Load model and tokenizer
model_name = "j-hartmann/emotion-english-distilroberta-base"
classifier=pipeline("text-classification", model=model_name,
                    top_k=None)

In [None]:
classifier("i work for very low pay")

In [None]:
labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise', 'love','hope']


In [None]:
def classify_emotions(description,neutral_max=0.35):
    scores = {label: 0.0 for label in labels}
    count = 0

    for sentence in description.split("."):
        sentence = sentence.strip()
        if sentence:
            preds = classifier(sentence)[0]
            for pred in preds:
                scores[pred['label']] += pred['score']
            count += 1

    result = {}
    for label in scores:
        avg_score = scores[label] / count if count > 0 else 0.0
        if label == "neutral":
            avg_score = min(avg_score, neutral_max)  # cap neutral
        result[label] = round(avg_score, 3) #if avg_score > threshold else 0.0

    return result

In [None]:
emotion_df = books["description"].apply(classify_emotions).apply(pd.Series)
books = pd.concat([books, emotion_df], axis=1)


In [None]:
books.to_csv("books_with_emotions.csv", index=False)