# Install VADER
[vaderSentiment](https://github.com/cjhutto/vaderSentiment)

`>pip install vaderSentiment`

In [71]:
import numpy as np
import pandas as pd
import json
from datetime import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Trump Approval Rating
Trump approval rating data gathered from [FiveThirtyEight](https://github.com/fivethirtyeight/data/tree/master/trump-approval-ratings), under the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).

In [103]:
df = pd.read_csv('https://projects.fivethirtyeight.com/trump-approval-data/approval_topline.csv')

In [104]:
# change 'timestamp' data type to datetime.date
for index,row in df.iterrows():
    # pad date number with 0 instead of whitespace
    date_string = row['timestamp'].replace('  ', ' 0')
    df.at[index,'timestamp'] = datetime.strptime(date_string, '%H:%M:%S %d %b %Y').date()
df.head()

Unnamed: 0,president,subgroup,modeldate,approve_estimate,approve_hi,approve_lo,disapprove_estimate,disapprove_hi,disapprove_lo,timestamp
0,Donald Trump,Voters,3/30/2018,41.3702,45.29608,37.44432,53.74371,57.38545,50.10198,2018-03-30
1,Donald Trump,Adults,3/30/2018,39.94889,43.50609,36.39169,53.5807,57.92394,49.23745,2018-03-30
2,Donald Trump,All polls,3/30/2018,40.54322,44.73767,36.34878,53.42228,57.44325,49.4013,2018-03-30
3,Donald Trump,Voters,3/29/2018,41.06978,44.98177,37.15779,53.94359,57.66401,50.22318,2018-03-29
4,Donald Trump,Adults,3/29/2018,39.58919,43.18702,35.99136,53.80489,58.26278,49.34699,2018-03-29


We want the daily approval estimate from all polls.

In [115]:
approval_estimates = df[df['subgroup'] == 'All polls'][['timestamp','approve_estimate']]
approval_estimates.head()

Unnamed: 0,timestamp,approve_estimate
2,2018-03-30,40.54322
5,2018-03-29,40.21669
6,2018-03-28,40.19111
11,2018-03-27,40.46638
14,2018-03-26,41.09504


# Sentiment analysis of Trump's tweets
[Trump tweet analysis by NPR apps](https://github.com/nprapps/trump-tweet-analysis/blob/master/trump-tweets.ipynb)

[What We Learned About The Mood Of Trump's Tweets](https://www.npr.org/2017/04/30/526106612)

The code below downloads a `.zip` of Trump tweets from an [archive](https://github.com/bpb27/trump_tweet_data_archive) updated every hour.

In [20]:
import requests
import zipfile
import io

response = requests.get('https://github.com/bpb27/trump_tweet_data_archive/raw/master/condensed_2018.json.zip')
z = zipfile.ZipFile(io.BytesIO(response.content))
z.extractall('data')

Load the JSON data into a DataFrame.

In [93]:
with open('data/condensed_2018.json') as f:
    jsondata = json.load(f)

tweets = pd.read_json('data/condensed_2018.json')
tweets.head()

Unnamed: 0,created_at,favorite_count,id_str,in_reply_to_user_id_str,is_retweet,retweet_count,source,text
0,2018-03-29 20:06:37,77534,979449770640015360,,False,20072,Twitter for iPhone,Washington spent trillions building up foreign...
1,2018-03-29 19:58:35,54539,979447750529110016,,False,13529,Twitter for iPhone,"JOBS, JOBS, JOBS! Unemployment claims have fal..."
2,2018-03-29 19:44:06,56138,979444104923840512,,False,12922,Twitter for iPhone,We are going to REBUILD our crumbling infrastr...
3,2018-03-29 11:57:38,112077,979326715272065024,,False,29771,Twitter for iPhone,I have stated my concerns with Amazon long bef...
4,2018-03-28 21:31:54,41557,979108846408003584,25073877.0,False,8840,Twitter for iPhone,"....In the interim, Hon. Robert Wilkie of DOD ..."


For each tweet, run sentiment analysis on the text. Descriptions of the sentiment scores can be found [here](https://github.com/cjhutto/vaderSentiment#about-the-scoring).

In [102]:
analyzer = SentimentIntensityAnalyzer()

for index,row in tweets.iterrows():
    text = row['text']
    vs = analyzer.polarity_scores(text)
    tweets.at[index,'neg'] = vs['neg']
    tweets.at[index,'neu'] = vs['neu']
    tweets.at[index,'pos'] = vs['pos']
    tweets.at[index,'compound'] = vs['compound']
    
tweets.head()

Unnamed: 0,created_at,favorite_count,id_str,in_reply_to_user_id_str,is_retweet,retweet_count,source,text,neg,neu,pos,compound
0,2018-03-29 20:06:37,77534,979449770640015360,,False,20072,Twitter for iPhone,Washington spent trillions building up foreign...,0.051,0.862,0.087,0.3647
1,2018-03-29 19:58:35,54539,979447750529110016,,False,13529,Twitter for iPhone,"JOBS, JOBS, JOBS! Unemployment claims have fal...",0.297,0.552,0.151,-0.4559
2,2018-03-29 19:44:06,56138,979444104923840512,,False,12922,Twitter for iPhone,We are going to REBUILD our crumbling infrastr...,0.04,0.772,0.188,0.8516
3,2018-03-29 11:57:38,112077,979326715272065024,,False,29771,Twitter for iPhone,I have stated my concerns with Amazon long bef...,0.117,0.85,0.034,-0.4962
4,2018-03-28 21:31:54,41557,979108846408003584,25073877.0,False,8840,Twitter for iPhone,"....In the interim, Hon. Robert Wilkie of DOD ...",0.0,0.747,0.253,0.8697


In [127]:
# TODO: add approval estimates
for index,row in tweets.iterrows():
    date = row['created_at'].date()
    approval = approval_estimates[approval_estimates['timestamp'] == date].iloc[0]['approve_estimate']

IndexError: single positional indexer is out-of-bounds