In [33]:
# Import Libraries
import pandas as pd 
import numpy as np 
import yfinance as yf
from datetime import datetime, timedelta
import os
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv, find_dotenv
load_dotenv("newsapi.env")

True

### Sentiment Analysis

In [2]:
# Get S&P500 symbols ranked by market cap 
sp500_by_marketcap = pd.read_csv('SP500_By_MarketCap.csv')
sp500_by_marketcap.head()

Unnamed: 0,Symbol,Name,Industry,Market Cap
0,AAPL,Apple Inc.,Electronic Technology,"$ 2,986,128,347,290.24"
1,MSFT,Microsoft Corporation,Technology Services,"$ 2,513,296,516,647.36"
2,GOOG,Alphabet Inc.,Technology Services,"$ 1,927,101,773,229.48"
3,GOOGL,Alphabet Inc.,Technology Services,"$ 1,923,705,624,039.54"
4,AMZN,"Amazon.com, Inc.",Retail Trade,"$ 1,728,404,755,739.39"


In [3]:
# Get tickers in a list

tickers = sp500_by_marketcap['Symbol'].head(50).tolist()
print(tickers)

['AAPL', 'MSFT', 'GOOG', 'GOOGL', 'AMZN', 'TSLA', 'FB', 'NVDA', 'BRK.B', 'JPM', 'UNH', 'V', 'JNJ', 'HD', 'WMT', 'PG', 'BAC', 'MA', 'PFE', 'DIS', 'AVGO', 'XOM', 'ACN', 'CSCO', 'NFLX', 'NKE', 'LLY', 'KO', 'TMO', 'CRM', 'COST', 'ABT', 'ABBV', 'PEP', 'ORCL', 'CMCSA', 'CVX', 'PYPL', 'DHR', 'VZ', 'INTC', 'QCOM', 'WFC', 'MCD', 'MRK', 'UPS', 'T', 'AMD', 'NEE', 'MS']


In [8]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/rajaabhishek/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [9]:
load_dotenv()
find_dotenv()

'/Users/rajaabhishek/FinTech/NW_Fintech_Project3/.env'

In [10]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")
type(api_key)

str

In [11]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [14]:
stocks_sentiment_df = pd.DataFrame()

for ticker in tickers:
    ticker_headlines = newsapi.get_everything(q=ticker,
                                              language="en",
                                              page_size=100,
                                              sort_by="relevancy"
                                             )
    ticker_sentiments = []
    
    for article in ticker_headlines["articles"]:
        try:
            text = article["content"]
            date = article["publishedAt"][:10]
            sentiment = analyzer.polarity_scores(text)
            compound = sentiment["compound"]
            pos = sentiment["pos"]
            neu = sentiment["neu"]
            neg = sentiment["neg"]

            ticker_sentiments.append({
                "text": text,
                "Stock" : ticker,
                "date": date,
                "compound": compound,
                "positive": pos,
                "negative": neg,
                "neutral": neu

            })

        except AttributeError:
            pass
# Create DataFrame
    ticker_sentiments_df = pd.DataFrame(ticker_sentiments)
    stocks_sentiment_df = pd.concat([stocks_sentiment_df,ticker_sentiments_df],axis=0)
    
# Reorder DataFrame columns
cols = ["date", "Stock", "text", "compound", "positive", "negative", "neutral"]
stocks_sentiment_df = stocks_sentiment_df[cols]
stocks_sentiment_df.sort_values(by=["date","Stock"],inplace=True)
stocks_sentiment_df["unique_key"] = stocks_sentiment_df["Stock"]+stocks_sentiment_df["date"].astype(str)
stocks_sentiment_df = stocks_sentiment_df.set_index("unique_key")
stocks_sentiment_df.head()

Unnamed: 0_level_0,date,Stock,text,compound,positive,negative,neutral
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AAPL2022-03-14,2022-03-14,AAPL,Apple Inc. manufacturer Foxconn Technology Gro...,0.0,0.0,0.0,1.0
AAPL2022-03-14,2022-03-14,AAPL,Discovery Inc. Chief Executive David Zaslav re...,0.5859,0.142,0.0,0.858
ABBV2022-03-14,2022-03-14,ABBV,Michael Vi/iStock Editorial via Getty Images\r...,0.0,0.0,0.0,1.0
ABT2022-03-14,2022-03-14,ABT,The death of a family member is never an easy ...,-0.7909,0.037,0.213,0.75
ABT2022-03-14,2022-03-14,ABT,"More than 8,000 single mothers completed appli...",0.7269,0.174,0.0,0.826


In [22]:
sentiment_compound_df = pd.concat([pd.DataFrame(stocks_sentiment_df["compound"].groupby([stocks_sentiment_df.index]).mean()).rename(columns={"compound":"mean_compound"}),
                          pd.DataFrame(stocks_sentiment_df["compound"].groupby([stocks_sentiment_df.index]).max()).rename(columns={"compound":"max_compound"}),
                          pd.DataFrame(stocks_sentiment_df["compound"].groupby([stocks_sentiment_df.index]).min()).rename(columns={"compound":"min_compound"})],axis=1)
sentiment_compound_df

Unnamed: 0_level_0,mean_compound,max_compound,min_compound
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL2022-03-14,0.292950,0.5859,0.0000
AAPL2022-03-15,0.371500,0.7430,0.0000
AAPL2022-03-16,-0.025800,-0.0258,-0.0258
AAPL2022-03-17,0.483550,0.5267,0.4404
AAPL2022-03-18,-0.139350,0.2960,-0.7003
...,...,...,...
XOM2022-04-11,-0.165567,0.8392,-0.8442
XOM2022-04-12,0.228200,0.7964,-0.7003
XOM2022-04-13,0.157267,0.6249,-0.1531
XOM2022-04-14,0.183775,0.7351,-0.2732


In [23]:
sentiment_positive_df = pd.concat([pd.DataFrame(stocks_sentiment_df["positive"].groupby([stocks_sentiment_df.index]).mean()).rename(columns={"positive":"mean_positive"}),
                          pd.DataFrame(stocks_sentiment_df["positive"].groupby([stocks_sentiment_df.index]).max()).rename(columns={"positive":"max_positive"}),
                          pd.DataFrame(stocks_sentiment_df["positive"].groupby([stocks_sentiment_df.index]).min()).rename(columns={"positive":"min_positive"})],axis=1)
sentiment_positive_df

Unnamed: 0_level_0,mean_positive,max_positive,min_positive
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL2022-03-14,0.071000,0.142,0.000
AAPL2022-03-15,0.098500,0.197,0.000
AAPL2022-03-16,0.050000,0.050,0.050
AAPL2022-03-17,0.102500,0.112,0.093
AAPL2022-03-18,0.015250,0.061,0.000
...,...,...,...
XOM2022-04-11,0.076167,0.256,0.000
XOM2022-04-12,0.107000,0.202,0.000
XOM2022-04-13,0.094000,0.174,0.000
XOM2022-04-14,0.074750,0.171,0.000


In [24]:
sentiment_negative_df = pd.concat([pd.DataFrame(stocks_sentiment_df["negative"].groupby([stocks_sentiment_df.index]).mean()).rename(columns={"negative":"mean_negative"}),
                          pd.DataFrame(stocks_sentiment_df["negative"].groupby([stocks_sentiment_df.index]).max()).rename(columns={"negative":"max_negative"}),
                          pd.DataFrame(stocks_sentiment_df["negative"].groupby([stocks_sentiment_df.index]).min()).rename(columns={"negative":"min_negative"})],axis=1)
sentiment_negative_df

Unnamed: 0_level_0,mean_negative,max_negative,min_negative
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL2022-03-14,0.000000,0.000,0.000
AAPL2022-03-15,0.018000,0.036,0.000
AAPL2022-03-16,0.053000,0.053,0.053
AAPL2022-03-17,0.000000,0.000,0.000
AAPL2022-03-18,0.048000,0.149,0.000
...,...,...,...
XOM2022-04-11,0.106833,0.216,0.000
XOM2022-04-12,0.056000,0.162,0.000
XOM2022-04-13,0.032667,0.098,0.000
XOM2022-04-14,0.033750,0.072,0.000


In [25]:
sentiment_neutral_df = pd.concat([pd.DataFrame(stocks_sentiment_df["neutral"].groupby([stocks_sentiment_df.index]).mean()).rename(columns={"neutral":"mean_neutral"}),
                          pd.DataFrame(stocks_sentiment_df["neutral"].groupby([stocks_sentiment_df.index]).max()).rename(columns={"neutral":"max_neutral"}),
                          pd.DataFrame(stocks_sentiment_df["neutral"].groupby([stocks_sentiment_df.index]).min()).rename(columns={"neutral":"min_neutral"})],axis=1)
sentiment_neutral_df

Unnamed: 0_level_0,mean_neutral,max_neutral,min_neutral
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL2022-03-14,0.929000,1.000,0.858
AAPL2022-03-15,0.883500,1.000,0.767
AAPL2022-03-16,0.896000,0.896,0.896
AAPL2022-03-17,0.897500,0.907,0.888
AAPL2022-03-18,0.936750,1.000,0.851
...,...,...,...
XOM2022-04-11,0.817167,0.960,0.711
XOM2022-04-12,0.837000,0.915,0.797
XOM2022-04-13,0.873333,1.000,0.794
XOM2022-04-14,0.891500,1.000,0.800


In [28]:
sentiment_df = pd.concat([sentiment_compound_df,sentiment_positive_df,sentiment_negative_df,sentiment_neutral_df],axis=1)
sentiment_df

Unnamed: 0_level_0,mean_compound,max_compound,min_compound,mean_positive,max_positive,min_positive,mean_negative,max_negative,min_negative,mean_neutral,max_neutral,min_neutral
unique_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AAPL2022-03-14,0.292950,0.5859,0.0000,0.071000,0.142,0.000,0.000000,0.000,0.000,0.929000,1.000,0.858
AAPL2022-03-15,0.371500,0.7430,0.0000,0.098500,0.197,0.000,0.018000,0.036,0.000,0.883500,1.000,0.767
AAPL2022-03-16,-0.025800,-0.0258,-0.0258,0.050000,0.050,0.050,0.053000,0.053,0.053,0.896000,0.896,0.896
AAPL2022-03-17,0.483550,0.5267,0.4404,0.102500,0.112,0.093,0.000000,0.000,0.000,0.897500,0.907,0.888
AAPL2022-03-18,-0.139350,0.2960,-0.7003,0.015250,0.061,0.000,0.048000,0.149,0.000,0.936750,1.000,0.851
...,...,...,...,...,...,...,...,...,...,...,...,...
XOM2022-04-11,-0.165567,0.8392,-0.8442,0.076167,0.256,0.000,0.106833,0.216,0.000,0.817167,0.960,0.711
XOM2022-04-12,0.228200,0.7964,-0.7003,0.107000,0.202,0.000,0.056000,0.162,0.000,0.837000,0.915,0.797
XOM2022-04-13,0.157267,0.6249,-0.1531,0.094000,0.174,0.000,0.032667,0.098,0.000,0.873333,1.000,0.794
XOM2022-04-14,0.183775,0.7351,-0.2732,0.074750,0.171,0.000,0.033750,0.072,0.000,0.891500,1.000,0.800


In [29]:
sentiment_df.to_csv('sentiment.csv')