In [49]:
"""
Created on Sun Feb 21 2021
@author: Sahand-j
"""

import pandas as pd
import pandas_datareader,datetime
import pandas_datareader.data as web
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date 
import nltk
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment import SentimentIntensityAnalyzer
from sqlalchemy import create_engine
import yahoo_fin.stock_info as si

In [50]:
s = 'voo,vti,jpm,iipr,vt,vxus,tgt,dfs,schd,dgro,nobl,schb,spy,nsc,sdy,gm,unp,qqq,dis,land,aapl,stor,ko'
s2 = 'tsla,amat,avgo'

In [51]:
def format_ticker_list(ticker_list):
    return ticker_list.upper().split(',')

In [52]:
"""
    webscrapes new stock headlines from finviz.com
    :@return(dict): returns dictionary of stok tickers and their assoicated news headlines for availabe dates
    """

def stock_headline_scraper_dict(ticker_list):
    website_url = 'https://finviz.com/quote.ashx?t='
    news_tables_dict = {}
    
    for ticker in ticker_list:

        #URL for each stock
        url = website_url + ticker

        #requesting url for each ticker
        response = urlopen(Request(url=url, headers={'user-agent': 'sentiment-analysis-app'}))

        #html parser, using bs4. downloaded the html
        html = BeautifulSoup(response,'html')

        #the body that contains all the news article links
        news_table_html_body = html.find(id = 'news-table')

        #each stock is in dictionary with value corresponding to news table
        news_tables_dict.update({ ticker.upper() : news_table_html_body })
    return news_tables_dict
    

In [53]:
"""
    takes in dictionary of stock tickers and their associated headlines 
    :@return(Dataframe): returns Dataframe of stock tickers, their assoicated news headlines, and sentiment score 
    """

def stock_sentiment_df(news_tables_dict):
    
    parsed_data = []
    #itterating over key and value pairs. itterating over a dict
    for ticker, news_tables_dict in news_tables_dict.items():
        for row in news_tables_dict.find_all('tr'):

            #title is in acnchor tag 'a', retrieving that from bs4 obj row
            title = row.a.text

            #time stamps have td tags
            timestamp = row.td.text

            #no date information
            if(len(timestamp.split(' ')) == 1):
                time = timestamp.split(' ')[0]

            #has date info, before time
            else:
                date = timestamp.split(' ')[0]
                time = timestamp.split(' ')[1]
            parsed_data.append([ticker,title,date,time])

        df = pd.DataFrame(parsed_data,columns=['ticker','title','date','time'])

        #compund score for each article title
        vader = SentimentIntensityAnalyzer()
        df['comp_score'] = df['title'].apply(lambda title : vader.polarity_scores(title)['compound'])

        for i in df.index:
            df.at[i, 'time'] = df['time'][i][0:7]

        #converting string time col to datetime obj   
        df['time'] = pd.to_datetime(df['time']).dt.strftime('%H:%M:%S')
        df['date'] = pd.to_datetime(df.date).dt.date

        #df = df.set_index('date')
        df['updated'] = pd.to_datetime('now')

        #filtering nuetral news out of df
        df = df[df.comp_score != 0]

        #columns of interest
        df = df[['date','ticker','comp_score','title','updated']]
    return df


In [54]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/Stocks')

In [55]:
#has datetime index
df = stock_sentiment_df(stock_headline_scraper_dict(format_ticker_list(s)))
df['date'] = pd.to_datetime(df.date)
df.set_index('date',inplace=True)

df.head(2)

Unnamed: 0_level_0,ticker,comp_score,title,updated
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-06,VOO,0.3818,Whats Behind ETF Issuer Growth Gap,2021-02-24 02:48:36.363511
2021-01-04,VOO,0.5106,Record ETF Assets Growth In 2020,2021-02-24 02:48:36.363511


In [56]:
#make this also updatale with new stock daily data
#df.to_sql('stock_sentiments_data', engine, if_exists='replace')

In [57]:
#loading stock data
def stock_prices_dict(ticker_list):
    
    """
    :@return(dict): returns dictionary of stock ticker with Dataframe values with stock historic price data
    """
    
    dict_of_dfs = {}
    for i in ticker_list:
        temp_df = si.get_data(i)  
        temp_df['rolling_mean'] = temp_df['adjclose'].rolling(round(len(temp_df)*.15)).mean()
        temp_df['rolling_std'] = temp_df['adjclose'].rolling(round(len(temp_df)*.15)).std()
        temp_df['cumel_return'] = (1 + temp_df['adjclose'].pct_change(1)).cumprod()
        temp_df['updated'] = pd.to_datetime('now')
        dict_of_dfs.update({i.upper() : temp_df})
    return dict_of_dfs


In [59]:
#datetime index
#stock_prices_dict(format_ticker_list(s)).get('AVGO').head(2)

In [60]:
#putting all available current web scraping data
#in 'main_stock_sentiment_data' db table, this is master data 
df.to_sql('main_stock_sentiment_data', engine, if_exists='fail')

In [61]:
#new day to reitterate information. adding new info 
df2 = stock_sentiment_df(stock_headline_scraper_dict(format_ticker_list(s2)))

check = engine.has_table('new_data_with_old')
print(check)

if check == True:
    engine.execute('DROP TABLE new_data_with_old CASCADE;')
    df2.to_sql('new_data_with_old', engine, if_exists='replace')
else:
    df2.to_sql('new_data_with_old', engine, if_exists='replace')
    

    
view_query = '''

create view new_sentiment_data_view as
select
new_data_with_old.date,
new_data_with_old.ticker,
new_data_with_old.comp_score,
new_data_with_old.title,
new_data_with_old.updated

from new_data_with_old
left join main_stock_sentiment_data ON
new_data_with_old.date = main_stock_sentiment_data.date AND
new_data_with_old.ticker = main_stock_sentiment_data.ticker AND
new_data_with_old.title = main_stock_sentiment_data.title
WHERE main_stock_sentiment_data.date IS null;

'''



add_new_vals_to_senti_table_query = '''
insert into main_stock_sentiment_data
select *
from new_sentiment_data_view;'''

False


In [62]:
engine.execute(view_query);

In [63]:
engine.execute(add_new_vals_to_senti_table_query);