**Import libraries**

In [1]:
import pandas as pd
import numpy as np
from datetime import date, timedelta, datetime
from operator import itemgetter
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import requests
import plotly.graph_objects as go
from pandas_datareader import data as pdr

#yahoo finance and pandadata reader override
import yfinance as yf
yf.pdr_override()


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



**Try out Vader Sentiment Analyzer**

In [2]:
#call analyser object
analyser = SentimentIntensityAnalyzer()

In [3]:
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    return score

In [4]:
sentiment_analyzer_scores("Today is an okay day.")

{'neg': 0.0, 'neu': 0.678, 'pos': 0.322, 'compound': 0.2263}

In [5]:
sentiment_analyzer_scores("Today is an amazing day!")

{'neg': 0.0, 'neu': 0.494, 'pos': 0.506, 'compound': 0.6239}

In [6]:
sentiment_analyzer_scores("Today is an amazing day! Vader is working but it took a while")

{'neg': 0.0, 'neu': 0.803, 'pos': 0.197, 'compound': 0.4003}

In [7]:
sentiment_analyzer_scores("Today kind of sucked.")

{'neg': 0.523, 'neu': 0.477, 'pos': 0.0, 'compound': -0.5095}

**Pull in Live News Data from News API**
<p>News API only display news up to a month old</p>

In [23]:
#enter company and api key
company = input('company: ')
stock_ticker = input('stock ticker: ')
api_key = input('api key: ')

company: shopify
stock ticker: shop
api key: 798c7707bbe8404199296c0521a99f61


<h4 style='background:#ffbdb3'>Major limitation of News API: Free version has max of 100 results per call; 1 month old max; 500 request per day</h4>
<h4>To get around this, we will make a request for each day, and compile the first 100 most popular results for each day</h4>

In [24]:
#get dates, 7 days from today
today = date.today() - timedelta(0)
numdays = 15
date_list = [today - timedelta(days=x) for x in range(numdays)]

In [25]:
#try return news for 1 day, make 7 requests, sort by most popular results
total_res = []

for x in range(0, numdays):  
    url = ('http://newsapi.org/v2/everything?'
           'q=' + company + '&'
           'from=' + str(date_list[x]) + '&'
           'to=' + str(date_list[x]) + '&'
           'language=en&'
           'sortBy=popularity&'
           'pageSize=100&'
           'apiKey=' + api_key)
    response = requests.get(url)
    total_res.append(response.json())


#print('Total Results: ' + str(response.json().get('totalResults')))

#print(*total_res, sep = '\n')   

In [26]:
#put all 'articles' in 1 list, then remove the nested list
articles = []
for x in range(0, len(total_res)):
    articles.append(total_res[x]['articles'])

news = []
def removeNesting(nestedList):
    for i in nestedList:
        if type(i) == list:
            removeNesting(i)
        else: 
            news.append(i)
            
removeNesting(articles)
print(news)



**Perform Analysis on the News Title**

In [27]:
#news = response.json().get('articles')

#add title score:
na_score = {'neg': 0, 'neu': 0, 'pos': 0, 'compound': 0}

for item in news:
    if item['title'] is None:
        item.update({'title_score': na_score})
    else:
        item.update({'title_score': sentiment_analyzer_scores(item['title'])})
    if item['description'] is None:
        item.update({'desc_score': na_score})
    else:
        item.update({'desc_score': sentiment_analyzer_scores(item['description'])})

In [28]:
news_df = pd.DataFrame(news)
news_df.sort_values(by=['publishedAt'])

#replace all NaN score with 0 

print('row and column: ' + str(news_df.shape))
news_df.head(30)

row and column: (310, 10)


Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,title_score,desc_score
0,"{'id': None, 'name': 'Forbes.com'}","Shahar Ziv, Contributor, Shahar Ziv, Contribut...",Spread Financial Empathy Instead Of Coronavirus,Each of us has a role to play not only in limi...,https://www.forbes.com/sites/shaharziv/2020/03...,https://thumbor.forbes.com/thumbor/fit-in/1200...,2020-03-18T12:11:00Z,The virus will bankrupt more people than it ki...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 0.943, 'pos': 0.057, 'comp..."
1,"{'id': None, 'name': 'Fool.ca'}",,2 TSX Tech Stocks To Consider As the COVID-19 ...,Here's why Canadian investors can consider tec...,https://www.fool.ca/2020/03/18/2-tsx-tech-stoc...,https://yw553ftvhw1iqegz29fycoyw-wpengine.netd...,2020-03-18T12:10:18Z,Most stocks have fallen off the cliff as the g...,"{'neg': 0.333, 'neu': 0.667, 'pos': 0.0, 'comp...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
2,"{'id': 'the-times-of-india', 'name': 'The Time...",Aishwarya Dharni,Global Recycling Day: Bottled Water Company Ma...,Busting the myth that plastic PET bottles are ...,https://www.indiatimes.com/trending/social-rel...,https://im.indiatimes.in/content/2020/Mar/PETb...,2020-03-18T08:25:35Z,Plastic PET bottles don't have to be worthless...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.061, 'neu': 0.818, 'pos': 0.122, 'co..."
3,"{'id': None, 'name': 'Prnewswire.com'}",,"Washington, DC, Vermont, and Colorado Lead the...","SCARBOROUGH, Maine, March 18, 2020 /PRNewswire...",https://www.prnewswire.com/news-releases/washi...,https://mma.prnewswire.com/media/1134882/map_m...,2020-03-18T12:38:00Z,"SCARBOROUGH, Maine, March 18, 2020 /PRNewswire...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 0.941, 'pos': 0.059, 'comp..."
4,"{'id': None, 'name': 'Dappered.com'}",vaxxy,Best Ecommerce Extensions,We provide best plugins for platforms like - M...,https://threads.dappered.com/showthread.php/26...,,2020-03-18T05:50:21Z,,"{'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'comp...","{'neg': 0.0, 'neu': 0.782, 'pos': 0.218, 'comp..."
5,"{'id': 'techcrunch', 'name': 'TechCrunch'}",Greg Kumparak,All the companies from Y Combinator’s W20 Demo...,Y Combinator’s Demo Day was a bit different th...,http://techcrunch.com/2020/03/17/all-the-compa...,https://techcrunch.com/wp-content/uploads/2019...,2020-03-17T19:42:34Z,Y Combinator’s Demo Day was a bit different th...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
6,"{'id': None, 'name': 'Entrepreneur.com'}",Kanika Tolver,Why Your Best Move Might Be to Ditch Your Day ...,If working for someone else isn't making you a...,https://www.entrepreneur.com/article/345077,https://assets.entrepreneur.com/content/3x2/20...,2020-03-17T16:30:00Z,"March\r\n17, 2020\r\n7 min read\r\nOpinions ex...","{'neg': 0.0, 'neu': 0.659, 'pos': 0.341, 'comp...","{'neg': 0.0, 'neu': 0.871, 'pos': 0.129, 'comp..."
7,"{'id': 'business-insider', 'name': 'Business I...",Lisa Eadicicco,Facebook is reportedly giving every employee a...,"Facebook is giving a $1,000 bonus to employees...",https://www.businessinsider.com/facebook-gives...,https://i.insider.com/5e70f020c48540301e19b3e4...,2020-03-17T16:10:23Z,"Facebook is giving employees a $1,000 bonus to...","{'neg': 0.0, 'neu': 0.636, 'pos': 0.364, 'comp...","{'neg': 0.0, 'neu': 0.856, 'pos': 0.144, 'comp..."
8,"{'id': None, 'name': 'Iwillteachyoutoberich.com'}",Lars Lofgren,How to Make Money on YouTube in 2020,Folks are making serious money on YouTube. Bef...,https://www.iwillteachyoutoberich.com/blog/mak...,https://cdn.iwillteachyoutoberich.com/wp-conte...,2020-03-17T19:24:33Z,Folks are making serious money on YouTube.\r\n...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.033, 'neu': 0.967, 'pos': 0.0, 'comp..."
9,"{'id': None, 'name': 'Yahoo.com'}","Greg Kumparak, Kirsten Korosec, Josh Constine,...",All the companies from Y Combinator's W20 Demo...,Y Combinator's Demo Day was a bit different th...,https://finance.yahoo.com/news/companies-y-com...,https://s.yimg.com/uu/api/res/1.2/bAGN.7jhQ8by...,2020-03-17T19:42:34Z,"As concerns grew over the spread of COVID-19, ...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."


In [29]:
#other ways to add title score object to news


#add title score way 2 -----------------------:

# def add_title_scoore():
#     for item in news:
#         score = {'title_score' : sentiment_analyzer_scores(item['title'])}
#     return news
# add_title_scoore()


#add title score way 3 -----------------------:

# result = [dict(item, scoreeeeeeeeeeeeeee=sentiment_analyzer_scores(item['title'])) for item in news]
# print(result)


#this works append static object -------------:

# for item in news:
#     item.update({'scooooooooooooooooore': 'baaaaaaaaaaaaaaaaaad'})
# print(news)

**Plot Title Sentiment Score**

In [30]:
def graph_sentiment(text, score, graph_title):
    x = text
    y = [d.get('neg') for d in score]
    y1 = [d.get('neu') for d in score]
    y2 = [d.get('pos') for d in score]
    y3 = [d.get('compound') for d in score]

    fig = go.Figure(go.Bar(x=x, y=y, name='Negative', marker_color='#EE7674'))
    fig.add_trace(go.Bar(x=x, y=y1, name='Neutral', marker_color='#247BA0'))
    fig.add_trace(go.Bar(x=x, y=y2, name='Positive'))
    fig.add_trace(go.Bar(x=x, y=y3, name='Compound'))

    fig.update_layout(title=graph_title)
    fig.show()


graph_sentiment(news_df['title'], news_df['title_score'], 'Title Sentiment')

In [31]:
graph_sentiment(news_df['description'], news_df['desc_score'], 'Description Sentiment')

***How to interpret Compound Values?*** 
<p>The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive). This is the most useful metric if you want a single unidimensional measure of sentiment for a given sentence. Calling it a 'normalized, weighted composite score' is accurate.</p>

**Next Step: Populate the score on a daily basis**
<p> Use the mean of the headline score for each day </p>

In [32]:
#extract date and scores
news_score_df = news_df[['publishedAt', 'title_score', 'desc_score']].copy()

#remove time from datetime
news_score_df['publishedAt'] = news_score_df['publishedAt'].str.split('T').str[0]

#populate each score in nested title_score and desc_score into own column
news_score_df = pd.concat([news_score_df, 
                 pd.DataFrame((d for i, d in news_score_df['title_score'].iteritems()))], 
                 axis=1)

news_score_df = pd.concat([news_score_df, 
                 pd.DataFrame((d for i, d in news_score_df['desc_score'].iteritems()))], 
                 axis=1)


news_score_df.columns = ['publishedAt', 'title_score', 'desc_score', 
                         't_neg', 't_neu', 't_pos', 't_compound',
                         'd_neg', 'd_neu', 'd_pos', 'd_compound']

daily_score_df = news_score_df.groupby('publishedAt', as_index=True)[['t_neg', 't_neu', 't_pos', 't_compound',
                         'd_neg', 'd_neu', 'd_pos', 'd_compound']].mean().reset_index()

daily_score_df.head(30)



Unnamed: 0,publishedAt,t_neg,t_neu,t_pos,t_compound,d_neg,d_neu,d_pos,d_compound
0,2020-03-04,0.068867,0.845133,0.086033,0.028113,0.0198,0.847067,0.1331,0.385507
1,2020-03-05,0.04219,0.871952,0.085905,0.094581,0.060286,0.873619,0.066095,0.042367
2,2020-03-06,0.086037,0.827815,0.086111,-0.004811,0.051333,0.817704,0.094,0.225219
3,2020-03-07,0.054286,0.845571,0.100143,0.1093,0.040857,0.876571,0.082143,0.212829
4,2020-03-08,0.130778,0.769111,0.100111,-0.114589,0.045,0.832889,0.122,0.263278
5,2020-03-09,0.085727,0.859364,0.054909,-0.081873,0.028909,0.854364,0.116818,0.269091
6,2020-03-10,0.10919,0.829738,0.061071,-0.017979,0.068571,0.867429,0.063952,-0.025652
7,2020-03-11,0.076273,0.846091,0.077636,-0.022133,0.065303,0.843636,0.091121,0.122264
8,2020-03-12,0.033467,0.889533,0.077,0.111597,0.049867,0.8881,0.062033,0.032673
9,2020-03-13,0.061043,0.879609,0.059348,0.017717,0.047565,0.789957,0.075478,0.082504


In [33]:
def daily_score_graph(graph_title, x, y1, y2, y3, y4):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x, y=y1,
                        mode='lines+markers',
                        name='neg',
                        line=dict(color='#EE7674')))
    fig.add_trace(go.Scatter(x=x, y=y2,
                        mode='lines+markers',
                        name='neu',
                        line=dict(color='#247BA0')))
    fig.add_trace(go.Scatter(x=x, y=y3,
                        mode='lines+markers', 
                        name='pos'))
    fig.add_trace(go.Scatter(x=x, y=y4,
                        mode='lines+markers', 
                        name='compound',
                        line=dict(width=4)))

    fig.update_layout(title=graph_title)
    fig.show()

daily_score_graph('Daily Title Score', daily_score_df['publishedAt'], 
                  daily_score_df['t_neg'], daily_score_df['t_neu'], daily_score_df['t_pos'], daily_score_df['t_compound'])

In [34]:
daily_score_graph('Daily Description Score', daily_score_df['publishedAt'], 
                  daily_score_df['d_neg'], daily_score_df['d_neu'], daily_score_df['d_pos'], daily_score_df['d_compound'])


**Next Step: Pull Daily price of S&P and Dow Jones**

reference: https://pypi.org/project/yfinance/

In [38]:
#plot daily close for last 7 days
def stock_graph():
    x = stock_data_df['Date']
    y = stock_data_df['Adj Close']

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x, y=y,
                        mode='lines+markers',
                        name='Daily Close',
                        line=dict(color='#EE7674')))

    graph_title = str(company).upper() + ' Daily Close'
    fig.update_layout(title=graph_title)
    fig.show()

stock_graph()

#add label for day of week
#remove weekends

In [50]:
stock_data = pdr.get_data_yahoo(stock_ticker, start=date_list[-1], end=date_list[0])
stock_data_df = pd.DataFrame(stock_data).reset_index()

stock_data_df['Close PC'] = stock_data_df['Close'].pct_change()

print('Start Date: ' + str(date_list[-1]))
print('End Date: ' + str(date_list[0]))
stock_data_df.head(15)

[*********************100%***********************]  1 of 1 completed
Start Date: 2020-03-04
End Date: 2020-03-18


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Close PC
0,2020-03-04,489.0,514.0,486.0,512.22998,512.22998,2694500,
1,2020-03-05,495.0,512.375,491.25,498.220001,498.220001,2460200,-0.027351
2,2020-03-06,480.0,491.200012,459.147003,472.070007,472.070007,2913500,-0.052487
3,2020-03-09,430.820007,446.079987,416.51001,418.320007,418.320007,2986600,-0.11386
4,2020-03-10,445.0,450.5,420.630005,450.339996,450.339996,2864100,0.076544
5,2020-03-11,435.25,442.0,406.329987,422.309998,422.309998,2812600,-0.062242
6,2020-03-12,371.0,403.570007,366.100006,377.779999,377.779999,4419400,-0.105444
7,2020-03-13,404.0,406.420013,363.01001,390.899994,390.899994,3314400,0.034729
8,2020-03-16,339.0,362.0,321.5,322.290009,322.290009,4179900,-0.175518
9,2020-03-17,330.019989,358.0,307.670013,355.089996,355.089996,4233800,0.101772


In [46]:
def pct(data, n):
    return data.pct_change(n)

pct(stock_data_df['Close'], 1)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-04,489.0,514.0,486.0,512.22998,512.22998,2694500
2020-03-05,495.0,512.375,491.25,498.220001,498.220001,2460200
2020-03-06,480.0,491.200012,459.147003,472.070007,472.070007,2913500
2020-03-09,430.820007,446.079987,416.51001,418.320007,418.320007,2986600
2020-03-10,445.0,450.5,420.630005,450.339996,450.339996,2864100
2020-03-11,435.25,442.0,406.329987,422.309998,422.309998,2812600
2020-03-12,371.0,403.570007,366.100006,377.779999,377.779999,4419400
2020-03-13,404.0,406.420013,363.01001,390.899994,390.899994,3314400
2020-03-16,339.0,362.0,321.5,322.290009,322.290009,4179900
2020-03-17,330.019989,358.0,307.670013,355.089996,355.089996,4233800


**Next Step: Correlate the score of the daily movement of S&P and Dow Jones**

<p>Daily average Title score vs S&P <br />
   Daily average Description score vs S&P <br />
   Daily average Title score vs DJ <br />
   Daily average Description score vs DJ <br /></p>

**Next Step: Compare the correlations**

**Other to do: add pages to API call to pull up to 20 pages**