In [1]:
# libraries for webscraping, parsing and getting stock data
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import yfinance as yf
import ssl
import sys
import yfinance as yf
import time
import time as my_time

# for plotting and data manipulation
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly
import plotly.express as px

# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
ssl._create_default_https_context = ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('vader_lexicon')

fil_1 = "/Users/ferdinandhgjesdahl/Documents/testsentimentanalysis/test10.txt"
sys.stdout = open(fil_1, 'a')

tickers_dict = {'AMZN': 5, 'TSLA': 1, 'GOOG': 3, 'META': 3, 'KO': 10, 'PEP': 5,  # amazon, tesla, google, meta, coke, pepsi
                'BA': 5, 'XOM': 5, 'CVX': 4, 'UNH': 1, 'JNJ': 3, 'JPM': 3, # boeing, exxon mobil, chevron, united health, johnson&johnson, jp morgan
                'BAC': 5, 'C': 5, 'SPG': 10, 'AAPL': 6, 'MSFT': 5, 'WMT': 6, # bank of america, citigroup, simon property group, apple, microsoft, walmart
                'LMT': 2, 'PFE': 10, 'MMM': 3, 'CRWD': 3, 'WBD': 20, 'DIS': 8, # lockheed martin, pfizer, 3M, crowdstrike, warner bros, disney
                'AIG': 5, 'BRK-B': 4, 'DDOG': 3, 'SLB': 16, 'SONY': 5, 'PLD': 5, # american international group, berkshire hathaway, datadog, schlumberger, sony, prologis
                'AMD': 5, 'ISRG': 3, 'INTC': 5} # world fuel services, advanced micro devices, intuitive surgical, intel

tickers = tickers_dict.keys()
number_of_shares = tickers_dict.values()

##### Scrape the Date, Time and News Headlines Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

##### Initialize Sentiment Analyzer
vader = SentimentIntensityAnalyzer()

while True:
    # Scrape the Date, Time and News Headlines Data
    for ticker in tickers:
       my_time.sleep(1)
       print(ticker)
       url = finwiz_url + ticker
       req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
       response = urlopen(req)
       html = BeautifulSoup(response)
       news_table = html.find(id='news-table')
       news_tables[ticker] = news_table

   
    parsed_news = []
    # Iterate through the news
    for file_name, news_table in news_tables.items():
        # Iterate through all tr tags in 'news_table'
        for x in news_table.findAll('tr'):
            # read the text from each tr tag into text
            # get text from a only
            text = x.a.get_text()
            # splite text in the td tag into a list
            date_scrape = x.td.text.split()
            # if the length of 'date_scrape' is 1, load 'time' as the only element
            if len(date_scrape) == 1:
                time = date_scrape[0]

            # else load 'date' as the 1st element and 'time' as the second
            else:
                date = date_scrape[0]
                time = date_scrape[1]
            # Extract the ticker from the file name, get the string up to the 1st '_'
            ticker = file_name.split('_')[0]

            # Append ticker, date, time and headline as a list to the 'parsed_news' list
            parsed_news.append([ticker, date, time, text])

    ##### Perform Sentiment Analysis with Vader
    # Set column names
    columns = ['ticker', 'date', 'time', 'headline']
    # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
    parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns)

    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_and_scored_news['headline'].apply(vader.polarity_scores).tolist()
    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_and_scored_news.join(scores_df, rsuffix='_right')
    # Convert the date column from string to datetime
    parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date

    ##### Get Stock Price, Industry, and Sector
    def get_stock_price_industry_and_sector(ticker):
        stock = yf.Ticker(ticker)
        price = stock.history(period='1d')['Close'].iloc[-1]
        return price

    mean_scores = parsed_and_scored_news.groupby('ticker')[['compound', 'neg', 'neu', 'pos']].mean()
    sectors = []
    industries = []
    prices = []
    percent_changes = []

    def get_daily_percent_change(ticker):
        stock = yf.Ticker(ticker)
        hist = stock.history(period='1d')
        open_price = hist['Open'][0]
        close_price = hist['Close'][-1]
        percent_change = ((close_price - open_price) / open_price) * 100  # calculate percentage change
        return percent_change

    for ticker in tickers:
        tickerdata = yf.Ticker(ticker)
        prices.append(get_stock_price_industry_and_sector(ticker))
        sectors.append(tickerdata.info['sector'])
        industries.append(tickerdata.info['industry'])
        percent_changes.append(get_daily_percent_change(ticker))

    # dictionary {'column name': list of values for column} to be converted to dataframe
    d = {'Sector': sectors, 'Industry': industries, 'Price': prices, 'No. of Shares': number_of_shares, 'stock price changed': percent_changes}
    # create dataframe from
    df_info = pd.DataFrame(data=d, index=tickers)
    df = mean_scores.join(df_info)
    df = df.rename(columns={"compound": "Sentiment Score", "neg": "Negative", "neu": "Neutral", "pos": "Positive",
                            "stock price changed": 'stock price changed'})
    df = df.reset_index()

    fig = px.treemap(df, path=[px.Constant("Sectors"), 'Sector', 'Industry', 'ticker'],
                     color='Sentiment Score',
                     hover_data=['Price', 'Negative', 'Neutral', 'Positive', 'Sentiment Score', 'stock price changed'],
                     color_continuous_scale=['#FF0000', "#000000", '#00FF00'],
                     color_continuous_midpoint=0)
    fig.data[0].customdata = df[
        ['Price', 'Negative', 'Neutral', 'Positive', 'Sentiment Score', 'stock price changed']].round(3).values.tolist()
    fig.data[0].texttemplate = "%{label}<br>%{customdata[4]}"
    fig.update_traces(textposition="middle center")
    fig.update_layout(margin=dict(t=30, l=10, r=10, b=10), font_size=20)

    plotly.offline.plot(fig, filename='stock_sentiment.html')  # this writes the plot into a html file and opens it
    fig.show()

    my_time.sleep(3600)  # Legg til en tidsoverskridelse på 5 minutter (300 sekunder)

[nltk_data] Error loading vader_lexicon: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/ferdinandhgjesdahl/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
  parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date
