In [88]:
import pandas as pd
import numpy as np
import time
from datetime import *
import matplotlib.pyplot as plt

import nltk
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment import SentimentIntensityAnalyzer

import psycopg2 as pg2
from sqlalchemy import create_engine

In [89]:
s = ['vym','hd','tsla','amat','dfs','aapl']
#s = ['tsla']

In [90]:
def stock_news_headline_parser_scraper(stock_ticker_list):
    
    website_url = 'https://finviz.com/quote.ashx?t='
    news_tables = {}

    for ticker in stock_ticker_list:

        #URL for each stock
        url = website_url + ticker

        #requesting url for each ticker
        response = urlopen(Request(url=url, headers={'user-agent': 'sentiment-analysis-app'}))

        #html parser, using bs4. downloaded the html
        html = BeautifulSoup(response,'html')

        #the body that contains all the news article links
        news_table_html_body = html.find(id = 'news-table')

        #each stock is in dictionary with value corresponding to news table
        news_tables.update({ ticker.upper() : news_table_html_body })



    parsed_data = []

    #itterating over key and value pairs. itterating over a dict
    for ticker, news_tables in news_tables.items():

        # news_tables.find_all('tr') is bs4 list of all articles headlins
        for row in news_tables.find_all('tr'):

            #title is in acnchor tag 'a', retrieving that from bs4 obj row
            title = row.a.text

            #time stamps have td tags
            timestamp = row.td.text

            #no date information
            if(len(timestamp.split(' ')) == 1):
                time = timestamp.split(' ')[0]

            #has date info, before time
            else:
                date = timestamp.split(' ')[0]
                time = timestamp.split(' ')[1]

            parsed_data.append([ticker,title,date,time])
        
        df = pd.DataFrame(parsed_data,columns=['Ticker','Title','Date', 'Time'])
        vader = SentimentIntensityAnalyzer()
        
        #compund score for each article title
        df['Compound_score'] = df['Title'].apply(lambda title : vader.polarity_scores(title)['compound'] )

        for i in df.index:
            df.at[i, 'Time'] = df['Time'][i][0:7]
        
        #converting string time col to datetime obj   
        df['Time'] = pd.to_datetime(df['Time']).dt.strftime('%H:%M:%S')
        df['Date'] = pd.to_datetime(df.Date).dt.date
        
        df = df.set_index('Date')
        
        df['Updated'] = pd.to_datetime('now')
        
        #filterign nuetral news out of df
        df = df[df.Compound_score != 0]

    
    return df[['Time','Ticker','Compound_score','Title','Updated']]

In [91]:
new_df = stock_news_headline_parser_scraper(s)

new_df

Unnamed: 0_level_0,Time,Ticker,Compound_score,Title,Updated
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-20,10:11:00,VYM,0.6369,These are the 3 best dividend yield investment...,2021-02-20 23:56:59.446049
2020-07-07,10:59:00,VYM,0.5267,5 ETF Ideas for a Winning Portfolio in the Sec...,2021-02-20 23:56:59.446049
2020-06-12,11:00:00,VYM,0.4019,Understanding Smart Beta ETFs' Indexes,2021-02-20 23:56:59.446049
2020-03-11,14:39:00,VYM,0.6249,8 Great Vanguard ETFs for a Low-Cost Core,2021-02-20 23:56:59.446049
2020-01-28,08:00:00,VYM,0.3400,"Has the First ""Correction Since October"" Start...",2021-02-20 23:56:59.446049
...,...,...,...,...,...
2021-02-17,01:40:00,AAPL,-0.2500,"Texas Freeze, Buffett Cuts Apple Stake, Retail...",2021-02-20 23:56:59.446049
2021-02-17,01:12:00,AAPL,0.5267,"Apple, Spotify, Others Pay $424M In Music Lice...",2021-02-20 23:56:59.446049
2021-02-16,22:46:00,AAPL,-0.2960,Warren Buffett's Berkshire Cuts Apple Stake An...,2021-02-20 23:56:59.446049
2021-02-16,19:00:00,AAPL,-0.2960,Epic Games files EU antitrust complaint agains...,2021-02-20 23:56:59.446049


In [92]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/Stock_sentiment_analysis_data')
new_df.to_sql('stock_news_upated', engine,if_exists='append')

#Saving to Database
conn = pg2.connect(database='Stock_sentiment_analysis_data',user='postgres',password='postgres')
cur = conn.cursor()




conn.close()