In [128]:
import pandas as pd
import numpy as np
import time
from datetime import *
import matplotlib.pyplot as plt

import nltk
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment import SentimentIntensityAnalyzer

import psycopg2 as pg2
from sqlalchemy import create_engine

In [129]:
#s = ['vym','hd','tsla','amat','dfs','aapl']
#s = ['tsla']
string = 'tsla,amat,avgo,voo,vti,jpm,iipr,vt,vxus,tgt,dfs,schd,dgro,nobl,schb,spy,nsc,sdy,gm,unp,qqq,dis,land,aapl,stor,ko'

In [130]:
s = string.split(',')

In [131]:
def stock_news_headline_parser_scraper(stock_ticker_list):
    
    website_url = 'https://finviz.com/quote.ashx?t='
    news_tables = {}

    for ticker in stock_ticker_list:

        #URL for each stock
        url = website_url + ticker

        #requesting url for each ticker
        response = urlopen(Request(url=url, headers={'user-agent': 'sentiment-analysis-app'}))

        #html parser, using bs4. downloaded the html
        html = BeautifulSoup(response,'html')

        #the body that contains all the news article links
        news_table_html_body = html.find(id = 'news-table')

        #each stock is in dictionary with value corresponding to news table
        news_tables.update({ ticker.upper() : news_table_html_body })



    parsed_data = []

    #itterating over key and value pairs. itterating over a dict
    for ticker, news_tables in news_tables.items():

        # news_tables.find_all('tr') is bs4 list of all articles headlins
        for row in news_tables.find_all('tr'):

            #title is in acnchor tag 'a', retrieving that from bs4 obj row
            title = row.a.text

            #time stamps have td tags
            timestamp = row.td.text

            #no date information
            if(len(timestamp.split(' ')) == 1):
                time = timestamp.split(' ')[0]

            #has date info, before time
            else:
                date = timestamp.split(' ')[0]
                time = timestamp.split(' ')[1]

            parsed_data.append([ticker,title,date,time])
        
        df = pd.DataFrame(parsed_data,columns=['Ticker','Title','Date', 'Time'])
        vader = SentimentIntensityAnalyzer()
        
        #compund score for each article title
        df['Compound_score'] = df['Title'].apply(lambda title : vader.polarity_scores(title)['compound'] )

        for i in df.index:
            df.at[i, 'Time'] = df['Time'][i][0:7]
        
        #converting string time col to datetime obj   
        df['Time'] = pd.to_datetime(df['Time']).dt.strftime('%H:%M:%S')
        df['Date'] = pd.to_datetime(df.Date).dt.date
        
        df = df.set_index('Date')
        
        df['Updated'] = pd.to_datetime('now')
        
        #filterign nuetral news out of df
        df = df[df.Compound_score != 0]

    
    return df[['Time','Ticker','Compound_score','Title','Updated']]

In [132]:
new_df = stock_news_headline_parser_scraper(s)

#new_df

In [133]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/Stock_sentiment_analysis_data')
new_df.to_sql('my_stock_sentiment_data', engine,if_exists='append')

In [134]:
query = '''select my_stock_sentiment_data."Date",my_stock_sentiment_data."Ticker", 
round(cast(avg(my_stock_sentiment_data."Compound_score") as numeric),2)
from my_stock_sentiment_data
group by my_stock_sentiment_data."Date", my_stock_sentiment_data."Ticker"
order by my_stock_sentiment_data."Ticker";'''

In [135]:
from_sql_df = pd.read_sql_query(query,con=engine,)

In [136]:
from_sql_df

Unnamed: 0,Date,Ticker,round
0,2021-02-17,AAPL,-0.07
1,2021-02-16,AAPL,-0.29
2,2021-02-19,AAPL,0.34
3,2021-02-20,AAPL,0.15
4,2021-02-18,AAPL,0.07
...,...,...,...
904,2021-01-04,VXUS,0.51
905,2019-02-27,VXUS,-0.60
906,2020-03-03,VXUS,-0.36
907,2018-01-25,VXUS,0.36


In [137]:
#from_sql_df.plot(kind = 'bar',figsize=(15,10))
