# Import Custom and General Packages

In [2]:
from data_google_news import get_google_news, get_google_business_news
from data_get_sp500 import get_sp500_details
from data_reddit_wallstreetbets import get_hot_reddit_comments, get_top_reddit_comments
from FinBert_NLP import get_FinBERT_sentiment

from transformers import AutoTokenizer, AutoModelForSequenceClassification

import pandas as pd
import numpy as np
import time
import warnings
warnings.simplefilter(action='ignore')

In [3]:
print("Current datetime:\t", time.strftime("%Y%m%d-%H%M%S"))
current_datetime = time.strftime("%Y%m%d-%H%M%S")

Current datetime:	 20230120-132918


# Intialize and load Fin-BERT Pre-Trained Model

In [4]:
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

# Get S&P 500 Company Names and Tickers

In [5]:
sp500_df = get_sp500_details()
sp500_df.head(10)

Unnamed: 0,Symbol,Security
0,MMM,3M
1,AOS,A. O. Smith
2,ABT,Abbott
3,ABBV,AbbVie
4,ACN,Accenture
5,ATVI,Activision Blizzard
6,ADM,ADM
7,ADBE,Adobe Inc.
8,ADP,ADP
9,AAP,Advance Auto Parts


# Sentiment Analysis for USA Economy

## Get Google News Data for Overall Economy

In [6]:
%%capture
economy_news_df = get_google_business_news()

In [7]:
economy_news_df.head(5)

Unnamed: 0,title,description,published date,url,publisher,text
1,"Stocks close higher Friday, Nasdaq notches thi...","Stocks close higher Friday, Nasdaq notches thi...","Fri, 20 Jan 2023 21:17:00 GMT",https://www.cnbc.com/2023/01/19/stock-futures-...,"{'href': 'https://www.cnbc.com', 'title': 'CNBC'}",Stocks rallied on Friday to finish the week st...
2,Customs officials are seizing eggs at the U.S....,Customs officials are seizing eggs at the U.S....,"Fri, 20 Jan 2023 16:02:27 GMT",https://finance.yahoo.com/news/customs-officia...,"{'href': 'https://finance.yahoo.com', 'title':...",U.S. customs officials are cracking down on eg...
4,"Fed Rate Increases Have ‘A Ways to Go,’ Top Of...","Fed Rate Increases Have ‘A Ways to Go,’ Top Of...","Fri, 20 Jan 2023 20:14:25 GMT",https://www.nytimes.com/2023/01/20/business/ec...,"{'href': 'https://www.nytimes.com', 'title': '...","Christopher Waller, a Federal Reserve governor..."
6,Microsoft Hosts Sting Concert Before Laying Of...,Microsoft Hosts Sting Concert Before Laying Of...,"Fri, 20 Jan 2023 14:46:00 GMT",https://gizmodo.com/microsoft-sting-layoffs-te...,"{'href': 'https://gizmodo.com', 'title': 'Gizm...",Layoffs are plaguing the tech industry left an...
7,Bed Bath & Beyond beefs up legal team ahead of...,Bed Bath & Beyond beefs up legal team ahead of...,"Fri, 20 Jan 2023 17:11:42 GMT",https://www.cnbc.com/2023/01/20/bed-bath-weigh...,"{'href': 'https://www.cnbc.com', 'title': 'CNBC'}","A ""Store Closing"" banner on a Bed Bath & Beyon..."


In [8]:
economy_text = economy_news_df['text'].tolist()
economy_sentiment = get_FinBERT_sentiment(economy_text)
economy_sentiment.head(5)

Unnamed: 0,Text,Positive,Negative,Neutral
0,(CNN) — When Pizza Hut decided to bring back i...,0.088229,0.019342,0.892429
1,Tesla (TSLA) CEO Elon Musk is expected to be c...,0.01283,0.933882,0.053289
2,ChatGPT Stole Your Work. So What Are You Going...,0.029096,0.077994,0.89291
3,"A ""Store Closing"" banner on a Bed Bath & Beyon...",0.14424,0.106963,0.748797
4,"Elizabeth Holmes, the founder of disgraced bio...",0.041273,0.426115,0.532612


Based on Google News, the general news for the USA economy seems to be negative representing bearish sentiment about the economy.

In [9]:
economy_sentiment[["Positive","Negative", "Neutral"]].mean()

Positive    0.148488
Negative    0.517718
Neutral     0.333795
dtype: float64

# Sentiment Analysis for Individual Stocks

## Get Reddit wallstreetbets data

In [11]:
reddit_hot_df = get_hot_reddit_comments(5)
reddit_top_df = get_top_reddit_comments(5)
reddit_WSB = pd.DataFrame(reddit_top_df.append(reddit_hot_df))

In [12]:
reddit_WSB.head(5)

Unnamed: 0,comment
0,Someone needs to open a GameStop across the ny...
1,Bro wtf... 2021 is crazy.
2,This is the hype we need for next week!\n\nWE ...
3,This is easily one of the greatest things to h...
4,Just when you think this sub can't be any dumb...


## Company Level Sentiment Analysis

In [13]:
sp500_df = sp500_df['Security'].tolist()

In [14]:
sp500_df = ['Google', 'Tesla', 'Microsoft', 'Amazon']

In [15]:
%%capture
sentiment_df = pd.DataFrame()
for i in range(0,len(sp500_df)):
    company_name = sp500_df[i]
    news = get_google_news(company_name)
    text = news['text'].tolist()
    company_sentiment_google = get_FinBERT_sentiment(text)
    company_sentiment_google['Source'] ='Google News'
    company_sentiment_google['Company'] = company_name
    sentiment_df = sentiment_df.append(company_sentiment_google)

    try:
        reddit_WSB_text = reddit_WSB.loc[reddit_WSB['comment'].str.contains(company_name, case=False)]['comment'].tolist()
        company_sentiment_reddit_WSB = get_FinBERT_sentiment(reddit_WSB_text)
        company_sentiment_reddit_WSB['Source'] ='Reddit WSB'
        company_sentiment_reddit_WSB['Company'] = company_name
        sentiment_df = sentiment_df.append(company_sentiment_reddit_WSB)
    except:
        print('No relevant Reddit Comments')

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [16]:
sentiment_df.head(5)

Unnamed: 0,Text,Positive,Negative,Neutral,Source,Company
0,"Sonos Inc., facing a patent war with Google, i...",0.077699,0.064474,0.857827,Google News,Google
1,In this article META\n\nAMZN\n\nGOOGL Follow y...,0.039229,0.842668,0.118103,Google News,Google
2,Google’s AirTag copycat could be incredible — ...,0.043759,0.189893,0.766348,Google News,Google
3,Google workers in Switzerland sent a letter th...,0.008158,0.968439,0.023403,Google News,Google
4,Google plans to demo AI chatbot search as it p...,0.045091,0.242307,0.712602,Google News,Google


## Sentiment for Different Companies by Source

The Reddit sentiment seems to be a lot more positive as compared to the Google News.

Sample output for Tesla:

In [18]:
sentiment_df.groupby(['Company', 'Source'])[["Positive","Negative", "Neutral"]].mean().tail(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Positive,Negative,Neutral
Company,Source,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tesla,Google News,0.208132,0.317431,0.474437
Tesla,Reddit WSB,0.056851,0.209848,0.7333
