In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob

In [None]:
scripts = pd.read_csv('data/processed_scripts.csv')
meta = pd.read_csv('data/metadata.csv')

In [None]:
def getSubjectivity(text):
   return TextBlob(text).sentiment.subjectivity
  
def getPolarity(text):
   return TextBlob(text).sentiment.polarity
  
#Create two new columns Subjectivity & Polarity
scripts = scripts[pd.notnull(scripts['Dialog'])] # Get rid of nulls
scripts['subjectivity'] = scripts['Dialog'].apply(getSubjectivity)
scripts['polarity'] = scripts['Dialog'].apply(getPolarity)
scripts.sort_values('S-E', inplace=True, ignore_index=True)

scripts

In [None]:
scripts.describe()

From this, we can see that on average, the show tends to be neutral (neither positive nor negative) on the whole.

In [None]:
data = {}
for episode in scripts['S-E'].unique():
    subjectivity = scripts[scripts['S-E'] == episode].subjectivity.mean()
    polarity = scripts[scripts['S-E'] == episode].polarity.mean()
    data[episode] = [subjectivity, polarity]

data = {k: v for k, v in sorted(data.items(), key=lambda item: item[0])}

In [None]:
ep = list(data.keys())
sub = [i[0] for i in data.values()]
pol = [i[1] for i in data.values()] 

In [None]:
print("Maximum Subjectivity: ", max(sub))
print("Minimum Subjectivity: ", min(sub))

plt.figure(figsize=(42, 6))
plt.plot(ep, sub)
plt.xticks(rotation=90)
plt.xlabel("Episode")
plt.ylabel("Subjectivity")
plt.title("Subjectivity vs. Episode")
plt.show()

In [None]:
print("Maximum Polarity: ", max(pol))
print("Minimum Polarity: ", min(pol))

plt.figure(figsize=(42, 6))
plt.plot(ep, pol)
plt.xticks(rotation=90)
plt.xlabel("Episode")
plt.ylabel("Polarity")
plt.title("Polarity vs. Episode")
plt.show()

In [None]:
# Weighted polarity and subjectivity

def words_per_line(text):
    return len(text.split())

scripts['word_count'] = scripts.Dialog.apply(words_per_line)
scripts['weighted_sub'] = scripts['subjectivity'].to_numpy() * scripts['word_count'].to_numpy()
scripts['weighted_pol'] = scripts['polarity'].to_numpy() * scripts['word_count'].to_numpy()

scripts

In [None]:
data = {}
for episode in scripts['S-E'].unique():
    subjectivity = scripts[scripts['S-E'] == episode]['weighted_sub'].sum() / scripts[scripts['S-E'] == episode]['word_count'].sum()
    polarity = scripts[scripts['S-E'] == episode]['weighted_pol'].sum() / scripts[scripts['S-E'] == episode]['word_count'].sum()
    data[episode] = [subjectivity, polarity]

data = {k: v for k, v in sorted(data.items(), key=lambda item: item[0])}

In [None]:
ep = list(data.keys())
sub = [i[0] for i in data.values()]
pol = [i[1] for i in data.values()] 

In [None]:
print("Maximum Weighted Subjectivity: ", max(sub))
print("Minimum Weighted Subjectivity: ", min(sub))

plt.figure(figsize=(42, 6))
plt.plot(ep, sub)
plt.xticks(rotation=90)
plt.xlabel("Episode")
plt.ylabel("Subjectivity")
plt.title("Subjectivity vs. Episode")
plt.show()

In [None]:
print("Maximum Weighted Polarity: ", max(pol))
print("Minimum Weighted Polarity: ", min(pol))

plt.figure(figsize=(42, 6))
plt.plot(ep, pol)
plt.xticks(rotation=90)
plt.xlabel("Episode")
plt.ylabel("Polarity")
plt.title("Polarity vs. Episode")
plt.show()

In [None]:
meta['mean_pol'] = pol
meta['mean_sub'] = sub
meta['std_pol'] = stdpol
meta['std_sub'] = stdsub