In [1]:
## Import the essential library

import os
import requests
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime, timedelta

In [2]:
# Load the enviornment
load_dotenv()

True

In [16]:
#Get the Twitter bearer token from the .env file

twitter_bearer_token = os.getenv("TWITTER_BEARER_TOKEN")

#Check if retrieved
type(twitter_bearer_token)

str

In [4]:
## Read the ticker symbols from CSV and put it in the dataframe
csvpath = Path("Data/StockPriceData.csv")
df_stock_price_data = pd.read_csv(csvpath, infer_datetime_format=True, parse_dates=True)

#Delete the unnecessary index column from CSV
df_stock_price_data = df_stock_price_data.drop(columns=df_stock_price_data.columns[0])
df_stock_price_data.dtypes

Date       object
Close     float64
Ticker     object
dtype: object

In [5]:
#List of unique tickers
tickers_list = list(set(df_stock_price_data['Ticker']))
tickers_list

['ARBG',
 'GNE',
 'NVSA',
 'TKNO',
 'BRX',
 'HUSN',
 'XOMAO',
 'MGRC',
 'IAS',
 'SCD',
 'CFSB',
 'GGGVR',
 'ENO',
 'MMX',
 'TETCU',
 'PYN',
 'FRLA',
 'PRTC',
 'TBLA',
 'COWN',
 'GBX',
 'NPCT',
 'DECAU',
 'DCRDW',
 'JACK',
 'PVL',
 'KOP',
 'IGACW',
 'GRTS',
 'ENERR',
 'AGRI']

In [6]:
# Set the Bearer Token for Authorisation for Twitter API call.
auth_token = "Bearer " + twitter_bearer_token
headers = {"Authorization": auth_token}

## Function to read 100 recent tweets related to ticker and from the date time specified.
def read_100_Tweets(ticker, tweet_date_time):
    ## Function to read 100 recent tweets from the specific dates
    ##  Input : ticker - Stock Ticker
    ##          tweet_date_time - UTC Date/Time Format YYYY-MM-DDTHH:mm:ssZ (ISO 8601/RFC 3339)
    ## 
    ##  Output: List of 100 tweets
    twitter_api_url  = f"https://api.twitter.com/2/tweets/search/recent?max_results=100&query={ticker}&end_time={tweet_date_time}"
    response = requests.get(twitter_api_url, headers=headers)
    ## Check for 200 status code which means it was successful
    tweets_list = [];
    if(response.status_code == 200):
        json_response = response.json()
        #Check if there are any tweets at all
        if('data' in json_response.keys()): 
            all_tweets = response.json()["data"]
            for tweet in all_tweets:
                tweets_list.append(tweet["text"])
            
    else:
        print(f"Response code: {response.status_code}.  Error in getting the tweet");
    return tweets_list    
    

In [7]:
## Perform Vader Sentiment Analysis

## Define Sentiment Object for Sentiment Analysis

sentiment_obj = SentimentIntensityAnalyzer()
def perform_sentiment_analysis(tweets_list):
    ## Function to read 100 recent tweets from the specific dates
    ##  Input : tweets_list - List of 100 tweets
    ##          
    ## 
    ##  Output: sentiment score average
    
    ##Check if there are tweets to analyse
    if (len(tweets_list) > 0):
        sentiment_scores_all = []
        for tweet in tweets_list:
            sentiment_dict = sentiment_obj.polarity_scores(tweet)
            sentiment_scores_all.append(sentiment_dict["compound"])

        #Average the sentiment of all tweets    
        average_sentiment = np.average(sentiment_scores_all)
        return average_sentiment
    else:
        return 0;
    

    

In [8]:
def execute_tweet_sentiment_analysis(start_date, end_date):
    
    tweet_sentiments = []

    while start_date <= end_date:
        delta = timedelta(hours=4)
        for ticker in tickers_list:
            if(start_date.hour == 0 or start_date.hour == 4 ):
                continue
            print(f"Executing Tweet Analysis for {ticker} on {start_date.isoformat()}")
            tweets_list = []
            tweets_list = read_100_Tweets(ticker, start_date.isoformat() + "Z")
            sentiment_score = perform_sentiment_analysis(tweets_list)
            tweet_sentiment = {}
            tweet_sentiment["Ticker"] = ticker
            tweet_sentiment["Date"] = start_date
            tweet_sentiment["Sentiment_Score"] = sentiment_score
            tweet_sentiments.append(tweet_sentiment)
            #print("Ticker: " + tweet_sentiment["ticker"] + ", Date : " + str(tweet_sentiment["date"]) + ", Sentiment Score: " + str(tweet_sentiment["sentiment_score"]))
        start_date += delta
    return tweet_sentiments 

In [9]:
start_date = datetime(2022,5,11, 12,0,0)
end_date = datetime(2022,5,13, 0,0,0)
tweet_sentiments = []
tweet_sentiments = execute_tweet_sentiment_analysis(start_date, end_date)


Executing Tweet Analysis for ARBG on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for GNE on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for NVSA on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for TKNO on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for BRX on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for HUSN on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for XOMAO on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for MGRC on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for IAS on 2022-05-11T12:00:00
Response code: 400.  Error in getting the tweet
Executing Tweet Analysis for SCD on 2022-05-11T12:00:00
R

In [11]:
 #Second Call due to Twitter Rate Limits  
tweet_sentiments_2 = []
start_date = datetime(2022,5,13, 0,0,0)
end_date = datetime(2022,5,15, 0,0,0)

tweet_sentiments_2 = execute_tweet_sentiment_analysis(start_date, end_date)

Executing Tweet Analysis for ARBG on 2022-05-13T08:00:00
Executing Tweet Analysis for GNE on 2022-05-13T08:00:00
Executing Tweet Analysis for NVSA on 2022-05-13T08:00:00
Executing Tweet Analysis for TKNO on 2022-05-13T08:00:00
Executing Tweet Analysis for BRX on 2022-05-13T08:00:00
Executing Tweet Analysis for HUSN on 2022-05-13T08:00:00
Executing Tweet Analysis for XOMAO on 2022-05-13T08:00:00
Executing Tweet Analysis for MGRC on 2022-05-13T08:00:00
Executing Tweet Analysis for IAS on 2022-05-13T08:00:00
Executing Tweet Analysis for SCD on 2022-05-13T08:00:00
Executing Tweet Analysis for CFSB on 2022-05-13T08:00:00
Executing Tweet Analysis for GGGVR on 2022-05-13T08:00:00
Executing Tweet Analysis for ENO on 2022-05-13T08:00:00
Executing Tweet Analysis for MMX on 2022-05-13T08:00:00
Executing Tweet Analysis for TETCU on 2022-05-13T08:00:00
Executing Tweet Analysis for PYN on 2022-05-13T08:00:00
Executing Tweet Analysis for FRLA on 2022-05-13T08:00:00
Executing Tweet Analysis for PRTC o

In [13]:
 #Third Call due to Twitter Rate Limits  
tweet_sentiments_3 = []
start_date = datetime(2022,5,15, 0,0,0)
end_date = datetime(2022,5,16, 0,0,0)

tweet_sentiments_3 = execute_tweet_sentiment_analysis(start_date, end_date)

Executing Tweet Analysis for ARBG on 2022-05-15T08:00:00
Executing Tweet Analysis for GNE on 2022-05-15T08:00:00
Executing Tweet Analysis for NVSA on 2022-05-15T08:00:00
Executing Tweet Analysis for TKNO on 2022-05-15T08:00:00
Executing Tweet Analysis for BRX on 2022-05-15T08:00:00
Executing Tweet Analysis for HUSN on 2022-05-15T08:00:00
Executing Tweet Analysis for XOMAO on 2022-05-15T08:00:00
Executing Tweet Analysis for MGRC on 2022-05-15T08:00:00
Executing Tweet Analysis for IAS on 2022-05-15T08:00:00
Executing Tweet Analysis for SCD on 2022-05-15T08:00:00
Executing Tweet Analysis for CFSB on 2022-05-15T08:00:00
Executing Tweet Analysis for GGGVR on 2022-05-15T08:00:00
Executing Tweet Analysis for ENO on 2022-05-15T08:00:00
Executing Tweet Analysis for MMX on 2022-05-15T08:00:00
Executing Tweet Analysis for TETCU on 2022-05-15T08:00:00
Executing Tweet Analysis for PYN on 2022-05-15T08:00:00
Executing Tweet Analysis for FRLA on 2022-05-15T08:00:00
Executing Tweet Analysis for PRTC o

In [15]:
 #Fourth Call due to Twitter Rate Limits  
tweet_sentiments_4 = []
start_date = datetime(2022,5,16, 0,0,0)
end_date = datetime(2022,5,17, 0,0,0)

tweet_sentiments_4 = execute_tweet_sentiment_analysis(start_date, end_date)

Executing Tweet Analysis for ARBG on 2022-05-16T08:00:00
Executing Tweet Analysis for GNE on 2022-05-16T08:00:00
Executing Tweet Analysis for NVSA on 2022-05-16T08:00:00
Executing Tweet Analysis for TKNO on 2022-05-16T08:00:00
Executing Tweet Analysis for BRX on 2022-05-16T08:00:00
Executing Tweet Analysis for HUSN on 2022-05-16T08:00:00
Executing Tweet Analysis for XOMAO on 2022-05-16T08:00:00
Executing Tweet Analysis for MGRC on 2022-05-16T08:00:00
Executing Tweet Analysis for IAS on 2022-05-16T08:00:00
Executing Tweet Analysis for SCD on 2022-05-16T08:00:00
Executing Tweet Analysis for CFSB on 2022-05-16T08:00:00
Executing Tweet Analysis for GGGVR on 2022-05-16T08:00:00
Executing Tweet Analysis for ENO on 2022-05-16T08:00:00
Executing Tweet Analysis for MMX on 2022-05-16T08:00:00
Executing Tweet Analysis for TETCU on 2022-05-16T08:00:00
Executing Tweet Analysis for PYN on 2022-05-16T08:00:00
Executing Tweet Analysis for FRLA on 2022-05-16T08:00:00
Executing Tweet Analysis for PRTC o

In [17]:
tweet_sentiments_all = tweet_sentiments + tweet_sentiments_2 + tweet_sentiments_3 + tweet_sentiments_4
len(tweet_sentiments_all)

713

In [18]:
#Initialise DataFrame from tweet sentiments
stock_tweet_sentiment_df = pd.DataFrame(tweet_sentiments_all)

#Change the date format to include TZ information to align ALPACA output from previous notebook
stock_tweet_sentiment_df = stock_tweet_sentiment_df.set_index("Date")
stock_tweet_sentiment_df.index = stock_tweet_sentiment_df.index.tz_localize("UTC") 

stock_tweet_sentiment_df = stock_tweet_sentiment_df.reset_index()

In [19]:
stock_tweet_sentiment_df

Unnamed: 0,Date,Ticker,Sentiment_Score
0,2022-05-11 12:00:00+00:00,ARBG,0.000000
1,2022-05-11 12:00:00+00:00,GNE,0.000000
2,2022-05-11 12:00:00+00:00,NVSA,0.000000
3,2022-05-11 12:00:00+00:00,TKNO,0.000000
4,2022-05-11 12:00:00+00:00,BRX,0.000000
...,...,...,...
708,2022-05-16 20:00:00+00:00,KOP,-0.004660
709,2022-05-16 20:00:00+00:00,IGACW,0.139300
710,2022-05-16 20:00:00+00:00,GRTS,0.080411
711,2022-05-16 20:00:00+00:00,ENERR,0.113333


In [20]:
stock_tweet_sentiment_df = stock_tweet_sentiment_df.sort_values(by=["Ticker", "Date"])
#stock_tweet_sentiment_df = stock_tweet_sentiment_df.reset_index()
stock_tweet_sentiment_df

Unnamed: 0,Date,Ticker,Sentiment_Score
30,2022-05-11 12:00:00+00:00,AGRI,0.000000
61,2022-05-11 16:00:00+00:00,AGRI,0.100719
92,2022-05-11 20:00:00+00:00,AGRI,0.103330
123,2022-05-12 08:00:00+00:00,AGRI,0.063434
154,2022-05-12 12:00:00+00:00,AGRI,0.008716
...,...,...,...
564,2022-05-15 20:00:00+00:00,XOMAO,-0.401900
595,2022-05-16 08:00:00+00:00,XOMAO,-0.401900
626,2022-05-16 12:00:00+00:00,XOMAO,-0.401900
657,2022-05-16 16:00:00+00:00,XOMAO,-0.401900


In [21]:
stock_tweet_sentiment_df.to_csv("Data/stock_tweet_sentiment.csv")