In [373]:
"""
Created on Sun Feb 21 2021
@author: Sahand-j
"""

import pandas as pd
import pandas_datareader,datetime
import pandas_datareader.data as web
import numpy as np
import time
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date 

import nltk
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment import SentimentIntensityAnalyzer
from sqlalchemy import create_engine

import yahoo_fin.stock_info as si

In [374]:
#s = 'tsla,amat,avgo,voo,vti,jpm,iipr,vt,vxus,tgt,dfs,schd,dgro,nobl,schb,spy,nsc,sdy,gm,unp,qqq,dis,land,aapl,stor,ko'
s = 'tsla'

In [375]:
def stock_prices_df(ticker_list):
    
    """
    Inserts each stock ticker price history into database
    :@return(string): returns string confirming query execution
    """
    
    engine = create_engine('postgresql://postgres:postgres@localhost:5432/Stocks')
    
    for i in ticker_list.split(','):
        df = si.get_data(i)
    
        # Basic formatting
        df = df[['ticker','open', 'high', 'low', 'close', 'adjclose', 'volume']]    
        df = df.fillna(0)
        df['updated'] = pd.to_datetime('now')
        
        # Resetting index for db
        df.reset_index(inplace=True)
        df = df.rename(columns = {'index':'date'})

        
        # Write the data into the database
        check = engine.has_table('stocks_daily_prices')
        print(check)
        if(check == False):
            df.to_sql('stocks_daily_prices', engine, if_exists='replace')


    '''# Create a primary key on the table
    query = """ALTER TABLE stocks_daily_prices 
                ADD PRIMARY KEY (ticker,date);"""
    engine.execute(query)'''

    return 'query succesful'


In [376]:
#sentiment analysis data 
"""
    Inserts each stock ticker price history into database
    :@return(string): returns string confirming query execution
    """

def stock_news_headline_scraper(ticker_list):
    
    website_url = 'https://finviz.com/quote.ashx?t='
    news_tables = {}
    engine = create_engine('postgresql://postgres:postgres@localhost:5432/Stocks')

    for ticker in ticker_list.split(','):

        #URL for each stock
        url = website_url + ticker

        #requesting url for each ticker
        response = urlopen(Request(url=url, headers={'user-agent': 'sentiment-analysis-app'}))

        #html parser, using bs4. downloaded the html
        html = BeautifulSoup(response,'html')

        #the body that contains all the news article links
        news_table_html_body = html.find(id = 'news-table')

        #each stock is in dictionary with value corresponding to news table
        news_tables.update({ ticker.upper() : news_table_html_body })


    parsed_data = []

    #itterating over key and value pairs. itterating over a dict
    for ticker, news_tables in news_tables.items():

        # news_tables.find_all('tr') is bs4 list of all articles headlins
        for row in news_tables.find_all('tr'):

            #title is in acnchor tag 'a', retrieving that from bs4 obj row
            title = row.a.text

            #time stamps have td tags
            timestamp = row.td.text

            #no date information
            if(len(timestamp.split(' ')) == 1):
                time = timestamp.split(' ')[0]

            #has date info, before time
            else:
                date = timestamp.split(' ')[0]
                time = timestamp.split(' ')[1]

            parsed_data.append([ticker,title,date,time])
        
        
        
        df = pd.DataFrame(parsed_data,columns=['ticker','title','date','time'])
        
        #compund score for each article title
        vader = SentimentIntensityAnalyzer()
        df['comp_score'] = df['title'].apply(lambda title : vader.polarity_scores(title)['compound'])
        
        for i in df.index:
            df.at[i, 'time'] = df['time'][i][0:7]
        
        #converting string time col to datetime obj   
        df['time'] = pd.to_datetime(df['time']).dt.strftime('%H:%M:%S')
        df['date'] = pd.to_datetime(df.date).dt.date
        
        #df = df.set_index('date')
        df['updated'] = pd.to_datetime('now')
        
        #filterign nuetral news out of df
        df = df[df.comp_score != 0]

        df = df[['date','ticker','comp_score','title','updated']]
        
        
        # Write the data into the database
        check = engine.has_table('stock_sentiments')
        print(check)
        if check == False:
            # Write the data into the database
            df.to_sql('stock_sentiments', engine, if_exists='replace')
        
        
    '''# Create a primary key on the table
    query = """ALTER TABLE stock_sentiments 
                ADD PRIMARY KEY (ticker, date, time);"""
    
    engine.execute(query) 
    #return df   '''
    
    return 'sentiment table created successfully'



In [377]:
stock_prices_df(s)

False


'query succesful'

In [378]:
stock_news_headline_scraper(s)

False


'sentiment table created successfully'

In [379]:
#happens second
merged_price_sent_view = '''create view joined_sentiment_stock_price as
select grouped_sentiment.date, grouped_sentiment.avg, stocks_daily_prices.ticker,
stocks_daily_prices.adjclose,stocks_daily_prices.volume,stocks_daily_prices.high,stocks_daily_prices.low
from grouped_sentiment
join stocks_daily_prices
on grouped_sentiment.date = stocks_daily_prices.date
and grouped_sentiment.ticker = stocks_daily_prices.ticker
order by grouped_sentiment.date desc;'''

In [380]:
#happens first
avg_comp_per_day_view = '''create view grouped_sentiment as
select date, ticker, avg(comp_score)
from stock_sentiments
group by date,ticker
order by date desc;'''

In [381]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/Stocks')

check1 = engine.has_table('joined_sentiment_stock_price')
check2 = engine.has_table('grouped_sentiment')

print(check1)
if check1 == False:
    engine.execute(avg_comp_per_day_view)
print(check2)
if check2 == False:
    engine.execute(merged_price_sent_view)
    
    


False
False


In [423]:
#splitting db portfolio data based on stocks
#we have a dcitionary of tickers and their stock data with rolling means, cumel ret, 
#and std to be analyzed ith sentiment

stock_price_cent_df = pd.read_sql_query('select * from joined_sentiment_stock_price;', engine)
daily_price_info_df = pd.read_sql_query('select * from stocks_daily_prices;', engine)

dict_of_dfs = {}
for i in s.split(','):
    temp_df = daily_price_info_df[daily_price_info_df['ticker'] == i.upper()]
    
    temp_df['rolling_mean'] = temp_df['adjclose'].rolling(round(len(temp_df)*.15)).mean()
    temp_df['rolling_std'] = temp_df['adjclose'].rolling(round(len(temp_df)*.15)).std()
    temp_df['cumel_return'] = (1 + temp_df['adjclose'].pct_change(1)).cumprod()

    dict_of_dfs.update({i.upper() : temp_df})

#list_of_dfs.get('TSLA')
dict_of_dfs

{'TSLA':       index       date ticker        open        high         low       close  \
 0         0 2010-06-29   TSLA    3.800000    5.000000    3.508000    4.778000   
 1         1 2010-06-30   TSLA    5.158000    6.084000    4.660000    4.766000   
 2         2 2010-07-01   TSLA    5.000000    5.184000    4.054000    4.392000   
 3         3 2010-07-02   TSLA    4.600000    4.620000    3.742000    3.840000   
 4         4 2010-07-06   TSLA    4.000000    4.000000    3.166000    3.222000   
 ...     ...        ...    ...         ...         ...         ...         ...   
 2676   2676 2021-02-16   TSLA  818.000000  821.000000  792.440002  796.219971   
 2677   2677 2021-02-17   TSLA  779.090027  799.840027  762.010010  798.150024   
 2678   2678 2021-02-18   TSLA  780.900024  794.690002  776.270020  787.380005   
 2679   2679 2021-02-19   TSLA  795.000000  796.789978  777.369995  781.299988   
 2680   2680 2021-02-22   TSLA  762.640015  768.500000  710.500000  714.500000   
 
      

In [420]:
#daily_price_info_df[daily_price_info_df['ticker']=='TSLA']

In [383]:
#new table with new stock info
daily_price_info_df.fillna(0).to_sql('stock_price_calc', engine, if_exists='append')

In [385]:
'''corrMatrix = tsla.corr()
sns.heatmap(corrMatrix, annot=True)
plt.show()''';