### Doc

This module of the system provides the change in emotions of the movie as it progresses. The change is measured against every scene of the movie. Using The NLU APIs provided by IBM Watson, the emotion of each scene from the script of the movie is analysed. Then a plot is produced that describes the trend in the common emotions per scene. <br>

##### Input

Movie, and emotion to be described(optional)

##### Output

A plot of trend of emotion through the course of the movie

In [None]:
import json
import numpy as np
import pandas as pd
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 \
  import Features, EntitiesOptions, KeywordsOptions, EmotionOptions

natural_language_understanding = NaturalLanguageUnderstandingV1(
  username='06320980-d791-4920-834b-3c5368522608',
  password='UKVXqFnwnNnZ',
  version='2018-03-16')


### Function to Read

In [None]:
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO

def convert_pdf_to_txt(path):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    codec = 'ascii'
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
    fp = open(path, 'rb')
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    password = ""
    maxpages = 0
    caching = True
    pagenos=set()

    for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
        interpreter.process_page(page)

    text = retstr.getvalue()

    fp.close()
    device.close()
    retstr.close()
    return text

In [None]:
dic = {}
dic['Haider.pdf'] = 'CUT TO'
dic['Highway.pdf'] = 'Sc # '
dic['JabWeMet.pdf'] = 'Sc # '
dic['Kaminey.pdf'] = 'CUT TO'
dic['Maqbool.pdf'] = 'SC. '
dic['Masaan.pdf'] = 'CUT TO'
dic['NEERJA.pdf'] = 'CUT TO'
dic['Pink.pdf'] = 'CUT TO'
dic['Queen.pdf'] = 'CUT TO'
dic['Raman Raghav 2_0.pdf'] = 'CUT TO'
dic['Rang De Basanti Script - Film Companion-min.pdf'] = 'CUT TO'
dic['Rockstar.pdf'] = 'Sc # '


In [None]:
movienames = ['Haider.pdf', 'Highway.pdf', 'JabWeMet.pdf', 'Kaminey.pdf', 'Maqbool.pdf', 'Masaan.pdf', 'NEERJA.pdf', 'Pink.pdf', 'Queen.pdf', 'Raman Raghav 2_0.pdf', 'Rang De Basanti Script - Film Companion-min.pdf', 'Rockstar.pdf']

In [None]:
emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness']

In [None]:
class scene_by_scene():
    def __init__(self, moviename):
        self.moviename = moviename
        path = '../Bollywood-Data-master/scripts-data/' + moviename
        text = convert_pdf_to_txt(path)
        delimiter = dic[moviename]
        text2 = text.split(delimiter)
        self.main_table = pd.DataFrame()
        for i,txt in enumerate(text2):
            try:
                response = natural_language_understanding.analyze(
                  text = txt,
                  features=Features(
                    emotion=EmotionOptions())).get_result()
                mbst = response['emotion']['document']['emotion']
                mbst['scene'] = i
                print(i)
                table1 = pd.DataFrame.from_dict(mbst, orient='index').squeeze()

                self.main_table = self.main_table.append(table1)
            except:
                self.main_table = self.main_table.append({'anger':0, 'disgust':0,'fear':0, 'joy':0,'sadness':0, 'scene':i}, ignore_index=True)
        self.main_table['scene'] = self.main_table['scene'].astype(int)
        self.main_table = self.main_table.set_index('scene')

    def plot_emotion(self, emotion = ['anger', 'disgust','fear', 'joy','sadness']):
        return self.main_table[emotion].plot(figsize = (20,15))
    
    def ret_dataframe(self):
        return self.main_table
    
    

In [None]:
def emotionRelater(x,y):
    range=20
    #We can downsample too if required
    arr = []
    for index, emotionname in enumerate(emotions):
        x1 = x[emotionname]
        x2 = y[emotionname]
        x1=x1[1:range]
        x2=x2[1:range]
        relation=np.corrcoef(x1,x2)
        relativeIndex=relation[1][0]
        print(relativeIndex)
        arr.append(relativeIndex)
        
    return np.sum(arr)

In [None]:
def read_dataframe(moviename1):
    movie_dataframe1 = pd.read_csv('../ScriptsDF/'+moviename1)
    return movie_dataframe1

In [None]:
movie_dataframe1 = read_dataframe(moviename1)
arr = {}
for moviename in movienames:
    if moviename == moviename1:
        continue
    else:
        movie_dataframe2 = read_dataframe(moviename)
    arr[moviename] = emotionRelater(movie_dataframe1,movie_dataframe2)

    
sorted_names = sorted(arr)
print(sorted_names)


In [None]:
emotionRelater(movie_dataframe1,movie_dataframe2)

In [None]:
x = scene_by_scene('Pink.pdf')

In [None]:
y = scene_by_scene('Queen.pdf')

In [None]:
x.plot_emotion()

In [None]:
type(x)

In [None]:
movie_dataframe1 = x.ret_dataframe()
movie_dataframe2 = y.ret_dataframe()

In [None]:
movie_dataframe1

In [None]:
movie_dataframe2

In [None]:
parfolder= '../ScriptsDF/'

for moviename in movienames:
    x = scene_by_scene(moviename)
    df = x.ret_dataframe()
    path = parfolder+moviename+'.csv'
    df.to_csv(path)
    