**Import libraries**

In [129]:
import pandas as pd
from datetime import date, timedelta, datetime
from operator import itemgetter
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import requests
import plotly.graph_objects as go

**Try out Vader Sentiment Analyzer**

In [2]:
#call analyser object
analyser = SentimentIntensityAnalyzer()

In [3]:
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    return score

In [4]:
sentiment_analyzer_scores("Today is an okay day.")

{'neg': 0.0, 'neu': 0.678, 'pos': 0.322, 'compound': 0.2263}

In [5]:
sentiment_analyzer_scores("Today is an amazing day!")

{'neg': 0.0, 'neu': 0.494, 'pos': 0.506, 'compound': 0.6239}

In [6]:
sentiment_analyzer_scores("Today is an amazing day! Vader is working but it took a while")

{'neg': 0.0, 'neu': 0.803, 'pos': 0.197, 'compound': 0.4003}

In [7]:
sentiment_analyzer_scores("Today kind of sucked.")

{'neg': 0.523, 'neu': 0.477, 'pos': 0.0, 'compound': -0.5095}

**Pull in Live News Data from News API**
<p>News API only display news up to a month old</p>

In [111]:
#find date for 30 days ago
news_start_date = date.today() - timedelta(28)
print(news_start_date)

2020-02-10


In [112]:
url = ('http://newsapi.org/v2/top-headlines?'
       #'q=' + input('company: ') + '&'
       'country=us&'
       'category=business&'
       'from=' + str(news_start_date) + '&'
       'sortBy=popularity&'
       'pageSize=100&'
       'apiKey=' + input('api key: '))

response = requests.get(url)
print('Total Results: ' + str(response.json().get('totalResults')))

api key: 798c7707bbe8404199296c0521a99f61
Total Results: 70


**Perform Analysis on the News Title**

In [88]:
news = response.json().get('articles')

#add title score:
na_score = {'neg': 0, 'neu': 0, 'pos': 0, 'compound': 0}
for item in news:
    if item['title'] is None:
        item.update({'title_score': na_score})
    else:
        item.update({'title_score': sentiment_analyzer_scores(item['title'])})
    if item['description'] is None:
        item.update({'desc_score': na_score})
    else:
        item.update({'desc_score': sentiment_analyzer_scores(item['description'])})

In [89]:
news_df = pd.DataFrame(news)

#replace all NaN score with 0 

news_df['desc_score'].fillna(value=na_score, inplace=True)

print('row and column: ' + str(news_df.shape))
news_df.head()

row and column: (70, 10)


Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,title_score,desc_score
0,"{'id': 'bloomberg', 'name': 'Bloomberg'}",,Apple’s Cook Offers Work From Home This Week t...,,https://www.bloomberg.com/tosv2.html?vid=&uuid...,,2020-03-08T20:23:41Z,"To continue, please click the box below to let...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0, 'neu': 0, 'pos': 0, 'compound': 0}"
1,"{'id': 'reuters', 'name': 'Reuters'}",Megan Davies,"Investor fears rise over recession, bear marke...","The words ""bear market"" and ""recession"" are be...",https://www.reuters.com/article/us-health-coro...,https://s3.reutersmedia.net/resources/r/?m=02&...,2020-03-08T20:16:18Z,NEW YORK (Reuters) - The words “bear market” a...,"{'neg': 0.337, 'neu': 0.663, 'pos': 0.0, 'comp...","{'neg': 0.142, 'neu': 0.743, 'pos': 0.115, 'co..."
2,"{'id': None, 'name': 'Bitcoinist.com'}",,"Bitcoin Price Consolidating, is a Big Move Abo...","Bitcoin price continues to look bearish, yet a...",https://bitcoinist.com/bitcoin-price-consolida...,https://bitcoinist.com/wp-content/uploads/2020...,2020-03-08T19:03:03Z,"Bitcoin price continues to look bearish, yet a...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 0.782, 'pos': 0.218, 'comp..."
3,"{'id': 'fox-news', 'name': 'Fox News'}",Michael Hollan,Family accidentally orders 12 years' worth of ...,There’s nothing worse than not having enough t...,https://www.foxnews.com/lifestyle/12-years-toi...,https://static.foxnews.com/foxnews.com/content...,2020-03-08T18:53:56Z,There’s nothing worse than not having enough t...,"{'neg': 0.18, 'neu': 0.677, 'pos': 0.143, 'com...","{'neg': 0.0, 'neu': 0.801, 'pos': 0.199, 'comp..."
4,"{'id': None, 'name': 'Oregonlive.com'}",Laura Gunderson | The Oregonian/OregonLive,New coronavirus case at OHSU Hospital in Portl...,OSHU informed its employees over the weekend t...,https://www.oregonlive.com/coronavirus/2020/03...,https://www.oregonlive.com/resizer/PoxhmM45EGw...,2020-03-08T18:53:02Z,This is breaking news and will be updated.\r\n...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp..."


In [90]:
#other ways to add title score object to news


#add title score way 2 -----------------------:

# def add_title_scoore():
#     for item in news:
#         score = {'title_score' : sentiment_analyzer_scores(item['title'])}
#     return news
# add_title_scoore()


#add title score way 3 -----------------------:

# result = [dict(item, scoreeeeeeeeeeeeeee=sentiment_analyzer_scores(item['title'])) for item in news]
# print(result)


#this works append static object -------------:

# for item in news:
#     item.update({'scooooooooooooooooore': 'baaaaaaaaaaaaaaaaaad'})
# print(news)

**Plot Title Sentiment Score**

In [93]:
def graph_sentiment(text, score, graph_title):
    x = text
    y = [d.get('neg') for d in score]
    y1 = [d.get('neu') for d in score]
    y2 = [d.get('pos') for d in score]
    y3 = [d.get('compound') for d in score]

    fig = go.Figure(go.Bar(x=x, y=y, name='Negative', marker_color='#EE7674'))
    fig.add_trace(go.Bar(x=x, y=y1, name='Neutral', marker_color='#247BA0'))
    fig.add_trace(go.Bar(x=x, y=y2, name='Positive'))
    fig.add_trace(go.Bar(x=x, y=y3, name='Compound'))

    fig.update_layout(barmode='stack', title=graph_title)
    fig.show()


graph_sentiment(news_df['title'], news_df['title_score'], 'Title Sentiment')

In [94]:
graph_sentiment(news_df['description'], news_df['desc_score'], 'Description Sentiment')

***How to interpret Compound Values?*** 
<p>The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive). This is the most useful metric if you want a single unidimensional measure of sentiment for a given sentence. Calling it a 'normalized, weighted composite score' is accurate.</p>

**Next Step: Populate the score on a daily basis**
<p> Use the mean of the headline score for each day </p>

In [139]:
#extract date and scores
news_score_df = news_df[['publishedAt', 'title_score', 'desc_score']].copy()

#remove time from datetime
news_score_df['publishedAt'] = news_score_df['publishedAt'].str.split('T').str[0]

news_score_df.head()

Unnamed: 0,publishedAt,title_score,desc_score
0,2020-03-08,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0, 'neu': 0, 'pos': 0, 'compound': 0}"
1,2020-03-08,"{'neg': 0.337, 'neu': 0.663, 'pos': 0.0, 'comp...","{'neg': 0.142, 'neu': 0.743, 'pos': 0.115, 'co..."
2,2020-03-08,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 0.782, 'pos': 0.218, 'comp..."
3,2020-03-08,"{'neg': 0.18, 'neu': 0.677, 'pos': 0.143, 'com...","{'neg': 0.0, 'neu': 0.801, 'pos': 0.199, 'comp..."
4,2020-03-08,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp..."


**Next Step: Correlate the score of the daily movement of S&P and Dow Jones**

<p>Daily average Title score vs S&P <br />
   Daily average Description score vs S&P <br />
   Daily average Title score vs DJ <br />
   Daily average Description score vs DJ <br /></p>

**Next Step: Compare the correlations**

**Other to do: add pages to API call to pull up to 20 pages**