In [1]:
## Import the essential library

import os
import requests
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime, timedelta

In [2]:
# Load the enviornment
load_dotenv()

True

In [67]:
#Get the Twitter bearer token from the .env file

twitter_bearer_token = os.getenv("TWITTER_BEARER_TOKEN")

#Check if retrieved
type(twitter_bearer_token)

str

In [38]:
## Read the ticker symbols from CSV and put it in the dataframe
csvpath = Path("Data/StockPriceData.csv")
df_stock_price_data = pd.read_csv(csvpath, infer_datetime_format=True, parse_dates=True)

#Delete the unnecessary index column from CSV
df_stock_price_data = df_stock_price_data.drop(columns=df_stock_price_data.columns[0])
df_stock_price_data.dtypes

Date       object
Close     float64
Ticker     object
dtype: object

In [5]:
#List of unique tickers
tickers_list = list(set(df_stock_price_data['Ticker']))
tickers_list

['SHCRW',
 'OEPWU',
 'CREC',
 'KWR',
 'HORI',
 'ACLX',
 'IMH',
 'WNC',
 'TRIP',
 'GBX',
 'AACI',
 'COSM',
 'SQ',
 'DOCU',
 'GRC',
 'FCT',
 'WEX',
 'DRQ',
 'MAR',
 'PWR',
 'MPRAU',
 'POAI',
 'HGEN',
 'SBNY',
 'BKCC',
 'IVCB',
 'INSW',
 'SBTX',
 'CGABL',
 'CBRGW',
 'BSM']

In [6]:
# Set the Bearer Token for Authorisation for Twitter API call.
auth_token = "Bearer " + twitter_bearer_token
headers = {"Authorization": auth_token}

## Function to read 100 recent tweets related to ticker and from the date time specified.
def read_100_Tweets(ticker, tweet_date_time):
    ## Function to read 100 recent tweets from the specific dates
    ##  Input : ticker - Stock Ticker
    ##          tweet_date_time - UTC Date/Time Format YYYY-MM-DDTHH:mm:ssZ (ISO 8601/RFC 3339)
    ## 
    ##  Output: List of 100 tweets
    twitter_api_url  = f"https://api.twitter.com/2/tweets/search/recent?max_results=100&query={ticker}&end_time={tweet_date_time}"
    response = requests.get(twitter_api_url, headers=headers)
    ## Check for 200 status code which means it was successful
    tweets_list = [];
    if(response.status_code == 200):
        json_response = response.json()
        #Check if there are any tweets at all
        if('data' in json_response.keys()): 
            all_tweets = response.json()["data"]
            for tweet in all_tweets:
                tweets_list.append(tweet["text"])
            
    else:
        print(f"Response code: {response.status_code}.  Error in getting the tweet");
    return tweets_list    
    

In [7]:
## Perform Vader Sentiment Analysis

## Define Sentiment Object for Sentiment Analysis

sentiment_obj = SentimentIntensityAnalyzer()
def perform_sentiment_analysis(tweets_list):
    ## Function to read 100 recent tweets from the specific dates
    ##  Input : tweets_list - List of 100 tweets
    ##          
    ## 
    ##  Output: sentiment score average
    
    ##Check if there are tweets to analyse
    if (len(tweets_list) > 0):
        sentiment_scores_all = []
        for tweet in tweets_list:
            sentiment_dict = sentiment_obj.polarity_scores(tweet)
            sentiment_scores_all.append(sentiment_dict["compound"])

        #Average the sentiment of all tweets    
        average_sentiment = np.average(sentiment_scores_all)
        return average_sentiment
    else:
        return 0;
    

    

In [None]:
#perform_sentiment_analysis(tweets_list)

In [9]:
def execute_tweet_sentiment_analysis(start_date, end_date):
    
    tweet_sentiments = []

    while start_date <= end_date:
        delta = timedelta(hours=4)
        for ticker in tickers_list:
            if(start_date.hour == 0 or start_date.hour == 4 ):
                continue
            print(f"Executing Tweet Analysis for {ticker} on {start_date.isoformat()}")
            tweets_list = []
            tweets_list = read_100_Tweets(ticker, start_date.isoformat() + "Z")
            sentiment_score = perform_sentiment_analysis(tweets_list)
            tweet_sentiment = {}
            tweet_sentiment["Ticker"] = ticker
            tweet_sentiment["Date"] = start_date
            tweet_sentiment["Sentiment_Score"] = sentiment_score
            tweet_sentiments.append(tweet_sentiment)
            #print("Ticker: " + tweet_sentiment["ticker"] + ", Date : " + str(tweet_sentiment["date"]) + ", Sentiment Score: " + str(tweet_sentiment["sentiment_score"]))
        start_date += delta
    return tweet_sentiments 

In [12]:
start_date = datetime(2022,5,10, 12,0,0)
end_date = datetime(2022,5,12, 0,0,0)
tweet_sentiments = []
tweet_sentiments = execute_tweet_sentiment_analysis(start_date, end_date)


Executing Tweet Analysis for SHCRW on 2022-05-10T12:00:00
Executing Tweet Analysis for OEPWU on 2022-05-10T12:00:00
Executing Tweet Analysis for CREC on 2022-05-10T12:00:00
Executing Tweet Analysis for KWR on 2022-05-10T12:00:00
Executing Tweet Analysis for HORI on 2022-05-10T12:00:00
Executing Tweet Analysis for ACLX on 2022-05-10T12:00:00
Executing Tweet Analysis for IMH on 2022-05-10T12:00:00
Executing Tweet Analysis for WNC on 2022-05-10T12:00:00
Executing Tweet Analysis for TRIP on 2022-05-10T12:00:00
Executing Tweet Analysis for GBX on 2022-05-10T12:00:00
Executing Tweet Analysis for AACI on 2022-05-10T12:00:00
Executing Tweet Analysis for COSM on 2022-05-10T12:00:00
Executing Tweet Analysis for SQ on 2022-05-10T12:00:00
Executing Tweet Analysis for DOCU on 2022-05-10T12:00:00
Executing Tweet Analysis for GRC on 2022-05-10T12:00:00
Executing Tweet Analysis for FCT on 2022-05-10T12:00:00
Executing Tweet Analysis for WEX on 2022-05-10T12:00:00
Executing Tweet Analysis for DRQ on 20

[{'Ticker': 'SHCRW',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0},
 {'Ticker': 'OEPWU',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0},
 {'Ticker': 'CREC',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': -0.06088585858585858},
 {'Ticker': 'KWR',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': -0.233375},
 {'Ticker': 'HORI',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0.100716},
 {'Ticker': 'ACLX',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0},
 {'Ticker': 'IMH',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0.11356250000000002},
 {'Ticker': 'WNC',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0.11606315789473684},
 {'Ticker': 'TRIP',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),
  'Sentiment_Score': 0.23439899999999997},
 {'Ticker': 'GBX',
  'Date': datetime.datetime(2022, 5, 10, 12, 0),

In [15]:
## Do not touch
tweet_sentiments_all = []
tweet_sentiments_all.append(tweet_sentiments)

In [68]:
 #Second Call due to Twitter Rate Limits  
tweet_sentiments_2 = []
start_date = datetime(2022,5,12, 0,0,0)
end_date = datetime(2022,5,14, 0,0,0)

tweet_sentiments_2 = execute_tweet_sentiment_analysis(start_date, end_date)

Executing Tweet Analysis for SHCRW on 2022-05-12T08:00:00
Executing Tweet Analysis for OEPWU on 2022-05-12T08:00:00
Executing Tweet Analysis for CREC on 2022-05-12T08:00:00
Executing Tweet Analysis for KWR on 2022-05-12T08:00:00
Executing Tweet Analysis for HORI on 2022-05-12T08:00:00
Executing Tweet Analysis for ACLX on 2022-05-12T08:00:00
Executing Tweet Analysis for IMH on 2022-05-12T08:00:00
Executing Tweet Analysis for WNC on 2022-05-12T08:00:00
Executing Tweet Analysis for TRIP on 2022-05-12T08:00:00
Executing Tweet Analysis for GBX on 2022-05-12T08:00:00
Executing Tweet Analysis for AACI on 2022-05-12T08:00:00
Executing Tweet Analysis for COSM on 2022-05-12T08:00:00
Executing Tweet Analysis for SQ on 2022-05-12T08:00:00
Executing Tweet Analysis for DOCU on 2022-05-12T08:00:00
Executing Tweet Analysis for GRC on 2022-05-12T08:00:00
Executing Tweet Analysis for FCT on 2022-05-12T08:00:00
Executing Tweet Analysis for WEX on 2022-05-12T08:00:00
Executing Tweet Analysis for DRQ on 20

In [52]:
 #Third Call due to Twitter Rate Limits  
tweet_sentiments_3 = []
start_date = datetime(2022,5,14, 0,0,0)
end_date = datetime(2022,5,15, 0,0,0)

tweet_sentiments_3 = execute_tweet_sentiment_analysis(start_date, end_date)

Executing Tweet Analysis for SHCRW on 2022-05-14T08:00:00
Executing Tweet Analysis for OEPWU on 2022-05-14T08:00:00
Executing Tweet Analysis for CREC on 2022-05-14T08:00:00
Executing Tweet Analysis for KWR on 2022-05-14T08:00:00
Executing Tweet Analysis for HORI on 2022-05-14T08:00:00
Executing Tweet Analysis for ACLX on 2022-05-14T08:00:00
Executing Tweet Analysis for IMH on 2022-05-14T08:00:00
Executing Tweet Analysis for WNC on 2022-05-14T08:00:00
Executing Tweet Analysis for TRIP on 2022-05-14T08:00:00
Executing Tweet Analysis for GBX on 2022-05-14T08:00:00
Executing Tweet Analysis for AACI on 2022-05-14T08:00:00
Executing Tweet Analysis for COSM on 2022-05-14T08:00:00
Executing Tweet Analysis for SQ on 2022-05-14T08:00:00
Executing Tweet Analysis for DOCU on 2022-05-14T08:00:00
Executing Tweet Analysis for GRC on 2022-05-14T08:00:00
Executing Tweet Analysis for FCT on 2022-05-14T08:00:00
Executing Tweet Analysis for WEX on 2022-05-14T08:00:00
Executing Tweet Analysis for DRQ on 20

In [54]:
 #Fourth Call due to Twitter Rate Limits  
tweet_sentiments_4 = []
start_date = datetime(2022,5,15, 0,0,0)
end_date = datetime(2022,5,16, 0,0,0)

tweet_sentiments_4 = execute_tweet_sentiment_analysis(start_date, end_date)

Executing Tweet Analysis for SHCRW on 2022-05-15T08:00:00
Executing Tweet Analysis for OEPWU on 2022-05-15T08:00:00
Executing Tweet Analysis for CREC on 2022-05-15T08:00:00
Executing Tweet Analysis for KWR on 2022-05-15T08:00:00
Executing Tweet Analysis for HORI on 2022-05-15T08:00:00
Executing Tweet Analysis for ACLX on 2022-05-15T08:00:00
Executing Tweet Analysis for IMH on 2022-05-15T08:00:00
Executing Tweet Analysis for WNC on 2022-05-15T08:00:00
Executing Tweet Analysis for TRIP on 2022-05-15T08:00:00
Executing Tweet Analysis for GBX on 2022-05-15T08:00:00
Executing Tweet Analysis for AACI on 2022-05-15T08:00:00
Executing Tweet Analysis for COSM on 2022-05-15T08:00:00
Executing Tweet Analysis for SQ on 2022-05-15T08:00:00
Executing Tweet Analysis for DOCU on 2022-05-15T08:00:00
Executing Tweet Analysis for GRC on 2022-05-15T08:00:00
Executing Tweet Analysis for FCT on 2022-05-15T08:00:00
Executing Tweet Analysis for WEX on 2022-05-15T08:00:00
Executing Tweet Analysis for DRQ on 20

In [70]:
tweet_sentiments_all = tweet_sentiments + tweet_sentiments_2 + tweet_sentiments_3 + tweet_sentiments_4
len(tweet_sentiments_all)

713

In [95]:

stock_tweet_sentiment_df = pd.DataFrame(tweet_sentiments_all)
#Date Clean up

#stock_tweet_sentiment_df["Date"] = stock_tweet_sentiment_df["Date"].to_timestamp()
#stock_tweet_sentiment_df["Date"] = pd.Timestamp(pd.to_datetime(stock_tweet_sentiment_df["Date"]), tz="America/New_York").isoformat()
#Check Date Type
stock_tweet_sentiment_df = stock_tweet_sentiment_df.set_index("Date")
stock_tweet_sentiment_df.index = stock_tweet_sentiment_df.index.tz_localize("UTC") 
#Sort the tweets df, reset the index after sorting and then drop the column "index"
stock_tweet_sentiment_df = stock_tweet_sentiment_df.reset_index()

In [96]:
stock_tweet_sentiment_df

Unnamed: 0,Date,Ticker,Sentiment_Score
0,2022-05-10 12:00:00+00:00,SHCRW,0.000000
1,2022-05-10 12:00:00+00:00,OEPWU,0.000000
2,2022-05-10 12:00:00+00:00,CREC,-0.060886
3,2022-05-10 12:00:00+00:00,KWR,-0.233375
4,2022-05-10 12:00:00+00:00,HORI,0.100716
...,...,...,...
708,2022-05-15 20:00:00+00:00,INSW,0.178263
709,2022-05-15 20:00:00+00:00,SBTX,0.042025
710,2022-05-15 20:00:00+00:00,CGABL,-0.136600
711,2022-05-15 20:00:00+00:00,CBRGW,0.000000


In [97]:
stock_tweet_sentiment_df = stock_tweet_sentiment_df.sort_values(by=["Ticker", "Date"])
#stock_tweet_sentiment_df = stock_tweet_sentiment_df.reset_index()
stock_tweet_sentiment_df

Unnamed: 0,Date,Ticker,Sentiment_Score
10,2022-05-10 12:00:00+00:00,AACI,0.000000
41,2022-05-10 16:00:00+00:00,AACI,-0.101150
72,2022-05-10 20:00:00+00:00,AACI,-0.067433
103,2022-05-11 08:00:00+00:00,AACI,0.036443
134,2022-05-11 12:00:00+00:00,AACI,0.036443
...,...,...,...
565,2022-05-14 20:00:00+00:00,WNC,0.203865
596,2022-05-15 08:00:00+00:00,WNC,0.215041
627,2022-05-15 12:00:00+00:00,WNC,0.256676
658,2022-05-15 16:00:00+00:00,WNC,0.034728


In [98]:
stock_tweet_sentiment_df.to_csv("Data/stock_tweet_sentiment.csv")