In [1]:
import pandas as pd 
import numpy as np 
import nltk 

In [2]:
example_data = {"character":["Tony Stark", "Captain America", "Thor"], "dialogue":["I hate apples", "I love apples", "I feel neutral about apples"]}
example_df = pd.DataFrame(example_data)
example_df.head()

Unnamed: 0,character,dialogue
0,Tony Stark,I hate apples
1,Captain America,I love apples
2,Thor,I feel neutral about apples


In [3]:
example_df.dtypes

character    object
dialogue     object
dtype: object

In [4]:
from nltk.tokenize import word_tokenize
nltk.download('punkt') 

[nltk_data] Downloading package punkt to /Users/emilyyu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:
tokenized_tony = word_tokenize(example_df.loc[0,"dialogue"])
print(tokenized_tony)

['I', 'hate', 'apples']


In [6]:
example_df["tokened dialogue"] = example_df["dialogue"].apply(word_tokenize)
example_df.head()

Unnamed: 0,character,dialogue,tokened dialogue
0,Tony Stark,I hate apples,"[I, hate, apples]"
1,Captain America,I love apples,"[I, love, apples]"
2,Thor,I feel neutral about apples,"[I, feel, neutral, about, apples]"


In [7]:
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
print(stop_words)

{'that', 'few', 'against', 'where', 'her', 'itself', 'those', 'when', 'i', 'only', 'same', 'if', "wasn't", 'are', 'he', 'can', 'you', 'himself', 'ours', 'as', 'its', "shouldn't", 'up', "isn't", 'above', 'from', 'such', "couldn't", 'whom', 'didn', 'am', 'our', 've', 'through', 'so', 'in', 'o', 'a', 'just', 'doesn', "mustn't", 'both', 'hasn', 'had', 'here', 'they', 'into', 'theirs', 'own', 'my', 'off', 'out', 'there', 'but', 'more', 'some', 'each', 's', "didn't", 'being', 'once', "you'll", 'does', 'down', 'couldn', 'shan', 'how', 'don', 'mustn', 'again', 'wasn', 'isn', 'now', 'your', 'with', 'too', 'nor', 'do', 'hers', 'at', 'below', "you've", 'of', 'no', 'll', 'then', 'wouldn', "you're", 'mightn', 'the', 'yourself', 'aren', "shan't", "should've", 'him', 'who', 'them', 'have', 'was', "that'll", 'by', "she's", 'having', 'm', 'their', 'over', 'be', "wouldn't", "doesn't", 'we', "aren't", 'between', 'shouldn', 'because', 'other', 'to', "mightn't", "hadn't", 'his', 'after', "don't", "it's", '

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/emilyyu/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
example_df['tokened dialogue'] = example_df['tokened dialogue'].apply(lambda x: [item for item in x if item not in stop_words])
example_df.head()

Unnamed: 0,character,dialogue,tokened dialogue
0,Tony Stark,I hate apples,"[I, hate, apples]"
1,Captain America,I love apples,"[I, love, apples]"
2,Thor,I feel neutral about apples,"[I, feel, neutral, apples]"


In [9]:
from nltk.stem import PorterStemmer

ps = PorterStemmer()

In [10]:
example_df['tokened_stemmed_dialogue'] = example_df['tokened dialogue'].apply(lambda x: [ps.stem(y) for y in x])
example_df.head()

Unnamed: 0,character,dialogue,tokened dialogue,tokened_stemmed_dialogue
0,Tony Stark,I hate apples,"[I, hate, apples]","[i, hate, appl]"
1,Captain America,I love apples,"[I, love, apples]","[i, love, appl]"
2,Thor,I feel neutral about apples,"[I, feel, neutral, apples]","[i, feel, neutral, appl]"


In [11]:
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer 
analyser = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/emilyyu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [12]:
analyser.polarity_scores("I hate apples")

{'neg': 0.787, 'neu': 0.213, 'pos': 0.0, 'compound': -0.5719}

In [13]:
analyser.polarity_scores("I like apples")

{'neg': 0.0, 'neu': 0.286, 'pos': 0.714, 'compound': 0.3612}

In [14]:
analyser.polarity_scores("I feel neutral about apples")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [15]:
analyser.polarity_scores(example_df.loc[0, "dialogue"])

{'neg': 0.787, 'neu': 0.213, 'pos': 0.0, 'compound': -0.5719}

In [16]:
analyser.polarity_scores("I hate apple")

{'neg': 0.787, 'neu': 0.213, 'pos': 0.0, 'compound': -0.5719}

In [17]:
scores = analyser.polarity_scores("I hate apples")
print(scores)

{'neg': 0.787, 'neu': 0.213, 'pos': 0.0, 'compound': -0.5719}


In [18]:
type(scores)

dict

In [19]:
score_df = pd.DataFrame(scores, index = [0])
score_df.head()

Unnamed: 0,neg,neu,pos,compound
0,0.787,0.213,0.0,-0.5719


In [20]:
print(scores["compound"])

-0.5719
