# Classify the scenes into the emotions it evokes

In [3]:
import pandas as pd
import json
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import numpy as np

### Import the trained model

In [4]:
tokenizer = AutoTokenizer.from_pretrained('classifier')
model = AutoModelForSequenceClassification.from_pretrained('classifier')

In [5]:
emotion_mapping = {
     'joy':6,
     'love':5,
     'surprise':4,
     'sadness':3,
     'anger':2,
     'fear':1
}
classifier_labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

In [6]:
def classify(scenes):
    inputs = tokenizer(scenes, padding=True, truncation=True,max_length=510, return_tensors="pt")['input_ids']
    outputs = np.array(model(inputs).logits.argmax(-1)).tolist()
    labels = []
    for i in outputs:
        labels.append(emotion_mapping[classifier_labels[outputs[i]]])
    return labels

In [7]:
df = pd.read_json('screenplay datasets/scripts.json')

In [9]:
df['emotions'] = df['scenes'].apply(classify)

Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors


# Save the classified dataset in both csv and json files

In [12]:
df.to_csv('screenplay datasets/scriptemotionscsv')

In [13]:
df.to_json('screenplay datasets/scriptemotionjson')