# Initial Setup

In [1]:
from datetime import datetime as dt
import pandas as pd
from nltk import download
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re

In [2]:
download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/sean/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

# Data and Text Cleaning

In [3]:
headlines = pd.read_csv('headlines.csv', index_col='Date', parse_dates=['Date'], date_parser=lambda date: dt.strptime(date,'%Y-%m-%d').date()).sort_index()
headlines.head()

Unnamed: 0_level_0,News
Date,Unnamed: 1_level_1
2008-06-08,"b'Marriage, they said, was reduced to the stat..."
2008-06-08,b'Nim Chimpsky: The tragedy of the chimp who t...
2008-06-08,"b""Canada: Beware slippery slope' to censorship..."
2008-06-08,b'EU Vice-President Luisa Morgantini and the I...
2008-06-08,"b""Israeli minister: Israel will attack Iran if..."


In [4]:
REPLACE_NO_SPACE = re.compile("b[\"\']|[\'\"]")
headlines['News'] = headlines['News'].str.replace(REPLACE_NO_SPACE, '')
headlines.head()

Unnamed: 0_level_0,News
Date,Unnamed: 1_level_1
2008-06-08,"Marriage, they said, was reduced to the status..."
2008-06-08,Nim Chimpsky: The tragedy of the chimp who tho...
2008-06-08,"Canada: Beware slippery slope to censorship, h..."
2008-06-08,EU Vice-President Luisa Morgantini and the Iri...
2008-06-08,Israeli minister: Israel will attack Iran if i...


# Sentiment Analysis using NLTK's VADER

In [5]:
sid = SentimentIntensityAnalyzer()
scores = headlines['News'].map(lambda news: sid.polarity_scores(news))
for col in ['neg', 'neu', 'pos']:
    headlines[col] = scores.map(lambda score: score[col])
headlines.head()

Unnamed: 0_level_0,News,neg,neu,pos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-06-08,"Marriage, they said, was reduced to the status...",0.169,0.831,0.0
2008-06-08,Nim Chimpsky: The tragedy of the chimp who tho...,0.196,0.804,0.0
2008-06-08,"Canada: Beware slippery slope to censorship, h...",0.0,1.0,0.0
2008-06-08,EU Vice-President Luisa Morgantini and the Iri...,0.091,0.909,0.0
2008-06-08,Israeli minister: Israel will attack Iran if i...,0.188,0.666,0.146


In [6]:
headlines['overall'] = headlines['pos'] - headlines['neg']
headlines.head()

Unnamed: 0_level_0,News,neg,neu,pos,overall
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2008-06-08,"Marriage, they said, was reduced to the status...",0.169,0.831,0.0,-0.169
2008-06-08,Nim Chimpsky: The tragedy of the chimp who tho...,0.196,0.804,0.0,-0.196
2008-06-08,"Canada: Beware slippery slope to censorship, h...",0.0,1.0,0.0,0.0
2008-06-08,EU Vice-President Luisa Morgantini and the Iri...,0.091,0.909,0.0,-0.091
2008-06-08,Israeli minister: Israel will attack Iran if i...,0.188,0.666,0.146,-0.042


In [7]:
headlines[:'2009-06-01'].mean()

neg        0.180892
neu        0.756273
pos        0.062723
overall   -0.118170
dtype: float64

In [8]:
headlines['2009-06-01':].mean()

neg        0.160466
neu        0.778824
pos        0.060709
overall   -0.099757
dtype: float64