In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
import Levenshtein
from sklearn.metrics.pairwise import polynomial_kernel
import gradio as gr
from langdetect import detect
from Bio.Align import PairwiseAligner
from scipy.spatial import distance
import jaro
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
import codecs



# Import the Model

In [3]:
tokenizer = AutoTokenizer.from_pretrained('classifier')
model = AutoModelForSequenceClassification.from_pretrained('classifier')

### Vector mapping for Euclidean Distance

In [4]:
vector_mapping = {
                    1: [0,0,0],
                    2: [0,2,0],
                    3: [1,1,1],
                    4: [2,1,0],
                    5: [4,1,0],
                    6: [5,1,0]
                }

def mapping(emotions):
    vectors = []
    for i in emotions:
        vectors.append(vector_mapping[i])
    return vectors

### Function to Detect the language of the script

In [5]:
def language(scenes):
    scene = scenes[0]
    try:
        lang = detect(scene)
    except:
        return 'unknown'
    return lang

### Import the screenplay dataset and remove the french movies

In [6]:
df = pd.read_json('screenplay datasets/scriptemotionjson.json')
df['lang'] = df['scenes'].apply(language)
df.drop(df[df['lang'] == 'fr'].index, inplace=True)
df.reset_index(inplace=True, drop=True)
df['3d_vectors'] = df['emotions'].apply(mapping)

## Map the emotions to colors that represent them

In [7]:
from PIL import Image, ImageDraw

color_mapping = {
    'joy': (248, 255, 149),       
    'love': (255, 199, 234),     
    'surprise': (166, 255, 150),  
    'sadness':(12, 53, 106),    
    'anger': (223, 46, 56),     
    'fear': (53, 124, 60)	
}
code_mapping = {
    '6': (248, 255, 149),       
    '5': (255, 199, 234),     
    '4': (166, 255, 150),  
    '3': (12, 53, 106),    
    '2': (223, 46, 56),     
    '1': (53, 124, 60)	
}
emotion_mapping = {
     'joy':6,
     'love':5,
     'surprise':4,
     'sadness':3,
     'anger':2,
     'fear':1
}
classifier_labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

In [8]:
def string(e):
    x = [str(i) for i in e]
    string = ''.join(x)
    return string

## Function to convert the vector into an image representing the emotional pattern in the film

In [9]:
def emo_barcode(idx):
    image = Image.new('RGB', (700,300), 'white')
    draw = ImageDraw.Draw(image)
    length = 700//100
    emotion_list = df['emotions'][idx]
    for i in range(0,100):
        code = emotion_list[i]
        color = code_mapping[str(code)]
        draw.rectangle([length*i,0,length*(i+1), 300], fill=color)
    return image

def emo_barcode_file(emotion_list):
    image = Image.new('RGB', (700,300), 'white')    
    draw = ImageDraw.Draw(image)
    length = 700//100
    for i in range(0,100):
        code = emotion_list[i]
        color = code_mapping[str(code)]
        draw.rectangle([length*i,0,length*(i+1), 300], fill=color)
    return image

# Functions to calculate similarity measures

In [10]:
def cosine(idx):
    a_list = []
    if type(idx) == int:
        e1 = df['emotions'][idx]
    if type(idx) == list:
        e1 = idx
    for i in range(0,df.shape[0]):
        e2 = df['emotions'][i]
        score = 0
        for i in range(0,100,5):
            score += distance.cosine(e1[i:i+5],e2[i:i+5])
        a_list.append(score)
    array = np.array(a_list)
    movie_indexes = np.argsort(array)
    return movie_indexes[1:6]

def euclid(idx):
    a_list = []
    if type(idx) == int:
        e1 = df['3d_vectors'][idx]
    if type(idx) == list:
        e1 = mapping(idx)
    for i in range(0,df.shape[0]):
        e2 = df['3d_vectors'][i]
        score = 0
        for i in range(0,100):
            score += distance.euclidean(e1[i],e2[i])
        a_list.append(score)
    array = np.array(a_list)
    movie_indexes = np.argsort(array)
    return movie_indexes[1:6]

def jaro_metric(idx):
    a_list = []
    if type(idx) == int:
        e1 = df['emotions'][idx]
    if type(idx) == list:
        e1 = idx
    for i in range(0,df.shape[0]):
        e2 = df['emotions'][i]
        score = 0
        for i in range(0,100,5):
            score += jaro.jaro_metric(string(e1[i:i+5]),string(e2[i:i+5]))
        a_list.append(score)
    array = np.array(a_list)
    movie_indexes = np.argsort(array)
    return movie_indexes[-6:-1]

def hamming(idx):
    a_list = []
    if type(idx) == int:
        e1 = df['emotions'][idx]
    if type(idx) == list:
        e1 = idx
    for i in range(0,df.shape[0]):
        e2 = df['emotions'][i]
        score = 0
        for i in range(0,100,5):
            score += distance.hamming(e1[i:i+5],e2[i:i+5])
        a_list.append(score)
    array = np.array(a_list)
    movie_indexes = np.argsort(array)
    return movie_indexes[1:6]

def needleman(idx):
    a_list = []
    if type(idx) == int:
        e1 = df['emotions'][idx]
    if type(idx) == list:
        e1 = idx
    for i in range(0,df.shape[0]):
        e2 = df['emotions'][i]
        score = 0
        for i in range(0,100,5):
            score += PairwiseAligner.align(PairwiseAligner,e1[i:i+5],e2[i:i+5])
        a_list.append(score)
    array = np.array(a_list)
    movie_indexes = np.argsort(array)
    return movie_indexes[1:6]

def Leven(idx):
    a_list = []
    if type(idx) == int:
        e1 = df['emotions'][idx]
    if type(idx) == list:
        e1 = idx
    for i in range(0,df.shape[0]):
        e2 = df['emotions'][i]
        score = 0
        for i in range(0,100,5):
            score += Levenshtein.distance(e1[i:i+5],e2[i:i+5])
        a_list.append(score)
    array = np.array(a_list)
    movie_indexes = np.argsort(array)
    return movie_indexes[1:6]

# Function to classify the segments into scenes

In [11]:
def classify(file):
    scenes = []
    #read the file
    if type(file) == str:
        try:
            with codecs.open(file, "r", "utf-8") as f:
                text = f.read()
        except Exception as e:
            print(e)
    else:
        try:
            with codecs.open(file.name, "r", "utf-8") as f:
                text = f.read()
        except Exception as e:
            print(e)
    #split the screenplay into scenes
    words = text.split()
    length = len(words)
    segment_length = length//100
    for i in range(0,100):
        segment = words[segment_length*i:segment_length*(i+1)]
        scene = ' '.join(segment)
        scenes.append(scene)
    # classify the segments
    inputs = tokenizer(scenes, padding=True, truncation=True, return_tensors="pt")['input_ids']
    outputs = np.array(model(inputs).logits.argmax(-1)).tolist()
    labels = []
    for i in outputs:
        labels.append(emotion_mapping[classifier_labels[i]])
    return labels


# Deployment in Gradio App

In [12]:
similarity_measures = ["Cosine Similarity", "Euclidean Distance", "Jaro Metric", "Hamming Metric", "Levenshtein distance"]
def output(idx,measure):
    if measure == 0:
        a_list = cosine(idx)
    if measure == 1:
        a_list = euclid(idx)
    if measure == 2:
        a_list = jaro_metric(idx)
    if measure == 3:
        a_list = hamming(idx)
    if measure == 4:
        a_list = Leven(idx)

    movie_names = []
    selected_movie = df['title'][idx]
    barcode = emo_barcode(idx)
    b1 = emo_barcode(a_list[0])
    b2 = emo_barcode(a_list[1])
    b3 = emo_barcode(a_list[2])
    b4 = emo_barcode(a_list[3])
    b5 = emo_barcode(a_list[4])
    for i in a_list:
        movie_names.append(df['title'][i])
    n1 = movie_names[0]
    n2 = movie_names[1]
    n3 = movie_names[2]
    n4 = movie_names[3]
    n5 = movie_names[4]
    return selected_movie,barcode,n1,b1,n2,b2,n3,b3,n4,b4,n5,b5

def output2(file,measure):
    labels = classify(file)
    barcode2 = emo_barcode_file(labels)
    if measure == 0:
        a_list = cosine(labels)
    if measure == 1:
        a_list = euclid(labels)
    if measure == 2:
        a_list = jaro_metric(labels)
    if measure == 3:
        a_list = hamming(labels)
    if measure == 4:
        a_list = Leven(labels)

    movie_names = []
    y1 = emo_barcode(a_list[0])
    y2 = emo_barcode(a_list[1])
    y3 = emo_barcode(a_list[2])
    y4 = emo_barcode(a_list[3])
    y5 = emo_barcode(a_list[4])
    for i in a_list:
        movie_names.append(df['title'][i])
    x1 = movie_names[0]
    x2 = movie_names[1]
    x3 = movie_names[2]
    x4 = movie_names[3]
    x5 = movie_names[4]
    return barcode2,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5

    

with gr.Blocks() as demo:
    with gr.Tab("Analyse Existing Screenplays"):
        movie_name = gr.Dropdown(choices=df['title'].to_list(),type='index', label='Choose your movie')
        measure = gr.Dropdown(choices=similarity_measures,type='index', label="Select a similarity measure")
        button = gr.Button('Submit')
        selected_movie = gr.Textbox()
        barcode = gr.Image(label='Emotion Sequence')
        n1 = gr.Textbox()
        b1 = gr.Image(label='Emotion Sequence')
        n2 = gr.Textbox()
        b2 = gr.Image(label='Emotion Sequence')
        n3 = gr.Textbox()
        b3 = gr.Image(label='Emotion Sequence')
        n4 = gr.Textbox()
        b4 = gr.Image(label='Emotion Sequence')
        n5 = gr.Textbox()
        b5 = gr.Image(label='Emotion Sequence')
    with gr.Tab("Analyse new Screenplay"):
        file = gr.File(label="Submit you file", file_types=["text"], type="file")
        measure2 = gr.Dropdown(choices=similarity_measures,type='index', label="Select a similarity measure")
        file_button = gr.Button("Submit file")
        barcode2 = gr.Image(label='Emotion Sequence')
        x1 = gr.Textbox()
        y1 = gr.Image(label='Emotion Sequence')
        x2 = gr.Textbox()
        y2 = gr.Image(label='Emotion Sequence')
        x3 = gr.Textbox()
        y3 = gr.Image(label='Emotion Sequence')
        x4 = gr.Textbox()
        y4 = gr.Image(label='Emotion Sequence')
        x5 = gr.Textbox()
        y5 = gr.Image(label='Emotion Sequence')

    button.click(fn=output,inputs=[movie_name,measure],outputs=[selected_movie,barcode,n1,b1,n2,b2,n3,b3,n4,b4,n5,b5])
    file_button.click(fn=output2,inputs=[file,measure2],outputs=[barcode2,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5])


    

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


