In [2]:
## Import the essential library

import os
import requests
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime, timedelta

In [3]:
# Load the enviornment
load_dotenv()

True

In [4]:
#Get the Twitter bearer token from the .env file

twitter_bearer_token = os.getenv("TWITTER_BEARER_TOKEN")

#Check if retrieved
type(twitter_bearer_token)

str

In [6]:
## Read the ticker symbols from CSV and put it in the dataframe
csvpath = Path("Data/StockPriceData.csv")
df_stock_price_data = pd.read_csv(csvpath, infer_datetime_format=True, parse_dates=True)

#Delete the unnecessary index column from CSV
df_stock_price_data = df_stock_price_data.drop(columns=df_stock_price_data.columns[0])
df_stock_price_data

Unnamed: 0,Date,Close,Ticker
0,2022-05-05,6.47,ADSE
1,2022-05-06,6.40,ADSE
2,2022-05-09,6.43,ADSE
3,2022-05-10,5.85,ADSE
4,2022-05-11,5.91,ADSE
...,...,...,...
147,2022-05-05,9.60,ZGN
148,2022-05-06,9.52,ZGN
149,2022-05-09,9.22,ZGN
150,2022-05-10,9.24,ZGN


In [11]:
#List of unique tickers
tickers_list = list(set(df_stock_price_data['Ticker']))
tickers_list

['BNIXW',
 'SVM',
 'TNXP',
 'ADSE',
 'KBNTW',
 'NLSN',
 'LVRAU',
 'VAL',
 'ZGN',
 'ALF',
 'FTAIO',
 'HYB',
 'CGABL',
 'TUP',
 'INDT',
 'PLMIW',
 'CLVT',
 'VWEWW',
 'FICV',
 'CRECU',
 'NAZ',
 'CLLS',
 'MBNKP',
 'JUGG',
 'EDSA',
 'SBT',
 'INVH',
 'ROCLW',
 'FNVTW',
 'GLBL',
 'UPH']

In [12]:
# Set the Bearer Token for Authorisation for Twitter API call.
auth_token = "Bearer " + twitter_bearer_token
headers = {"Authorization": auth_token}

## Function to read 100 recent tweets related to ticker and from the date time specified.
def read_100_Tweets(ticker, tweet_date_time):
    ## Function to read 100 recent tweets from the specific dates
    ##  Input : ticker - Stock Ticker
    ##          tweet_date_time - UTC Date/Time Format YYYY-MM-DDTHH:mm:ssZ (ISO 8601/RFC 3339)
    ## 
    ##  Output: List of 100 tweets
    twitter_api_url  = f"https://api.twitter.com/2/tweets/search/recent?max_results=100&query={ticker}&start_time={tweet_date_time}"
    response = requests.get(twitter_api_url, headers=headers)
    ## Check for 200 status code which means it was successful
    tweets_list = [];
    if(response.status_code == 200):
        json_response = response.json()
        #Check if there are any tweets at all
        if('data' in json_response.keys()): 
            all_tweets = response.json()["data"]
            for tweet in all_tweets:
                tweets_list.append(tweet["text"])
            
    else:
        print(f"Response code: {response.status_code}.  Error in getting the tweet");
    return tweets_list    
    

In [13]:
## Perform Vader Sentiment Analysis

## Define Sentiment Object for Sentiment Analysis

sentiment_obj = SentimentIntensityAnalyzer()
def perform_sentiment_analysis(tweets_list):
    ## Function to read 100 recent tweets from the specific dates
    ##  Input : tweets_list - List of 100 tweets
    ##          
    ## 
    ##  Output: sentiment score average
    
    ##Check if there are tweets to analyse
    if (len(tweets_list) > 0):
        sentiment_scores_all = []
        for tweet in tweets_list:
            sentiment_dict = sentiment_obj.polarity_scores(tweet)
            sentiment_scores_all.append(sentiment_dict["compound"])

        #Average the sentiment of all tweets    
        average_sentiment = np.average(sentiment_scores_all)
        return average_sentiment
    else:
        return 0;
    

    

In [62]:
#perform_sentiment_analysis(tweets_list)

In [21]:
start_date = datetime(2022,5,7, 13,0,0)
end_date = datetime(2022,5,13, 13,0,0)
tweet_sentiments = []

while start_date <= end_date:
    delta = timedelta(days=1)
    for ticker in tickers_list:
        print(f"Executing Tweet Analysis for {ticker} on {start_date.isoformat()}")
        tweets_list = read_100_Tweets(ticker, start_date.isoformat() + "Z")
        sentiment_score = perform_sentiment_analysis(tweets_list)
        tweet_sentiment = {}
        tweet_sentiment["Ticker"] = ticker
        tweet_sentiment["Date"] = start_date
        tweet_sentiment["Sentiment_Score"] = sentiment_score
        tweet_sentiments.append(tweet_sentiment)
        #print("Ticker: " + tweet_sentiment["ticker"] + ", Date : " + str(tweet_sentiment["date"]) + ", Sentiment Score: " + str(tweet_sentiment["sentiment_score"]))
    start_date += delta
 

Executing Tweet Analysis for BNIXW on 2022-05-07T13:00:00
Executing Tweet Analysis for SVM on 2022-05-07T13:00:00
Executing Tweet Analysis for TNXP on 2022-05-07T13:00:00
Executing Tweet Analysis for ADSE on 2022-05-07T13:00:00
Executing Tweet Analysis for KBNTW on 2022-05-07T13:00:00
Executing Tweet Analysis for NLSN on 2022-05-07T13:00:00
Executing Tweet Analysis for LVRAU on 2022-05-07T13:00:00
Executing Tweet Analysis for VAL on 2022-05-07T13:00:00
Executing Tweet Analysis for ZGN on 2022-05-07T13:00:00
Executing Tweet Analysis for ALF on 2022-05-07T13:00:00
Executing Tweet Analysis for FTAIO on 2022-05-07T13:00:00
Executing Tweet Analysis for HYB on 2022-05-07T13:00:00
Executing Tweet Analysis for CGABL on 2022-05-07T13:00:00
Executing Tweet Analysis for TUP on 2022-05-07T13:00:00
Executing Tweet Analysis for INDT on 2022-05-07T13:00:00
Executing Tweet Analysis for PLMIW on 2022-05-07T13:00:00
Executing Tweet Analysis for CLVT on 2022-05-07T13:00:00
Executing Tweet Analysis for VW

In [22]:
start_date.isoformat()

'2022-05-14T13:00:00'

In [23]:
   
tweet_sentiments

[{'Ticker': 'BNIXW',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0.9719},
 {'Ticker': 'SVM',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0.10295600000000002},
 {'Ticker': 'TNXP',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0.12217399999999999},
 {'Ticker': 'ADSE',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0.35438700000000006},
 {'Ticker': 'KBNTW',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0},
 {'Ticker': 'NLSN',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0.084703},
 {'Ticker': 'LVRAU',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0},
 {'Ticker': 'VAL',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': 0.058570000000000004},
 {'Ticker': 'ZGN',
  'Date': datetime.datetime(2022, 5, 7, 13, 0),
  'Sentiment_Score': -0.14843900000000004},
 {'Ticker': 'ALF',
  'Date': datetime.datetime(2022, 5, 7, 

In [27]:
stock_tweet_sentiment_df = pd.DataFrame(tweet_sentiments)
#Date Clean up
stock_tweet_sentiment_df["Date"] = pd.to_datetime(stock_tweet_sentiment_df["Date"].dt.date)
#Check Date Type
stock_tweet_sentiment_df.dtypes

Ticker                     object
Date               datetime64[ns]
Sentiment_Score           float64
dtype: object

In [28]:
stock_tweet_sentiment_df

Unnamed: 0,Ticker,Date,Sentiment_Score
0,BNIXW,2022-05-07,0.971900
1,SVM,2022-05-07,0.102956
2,TNXP,2022-05-07,0.122174
3,ADSE,2022-05-07,0.354387
4,KBNTW,2022-05-07,0.000000
...,...,...,...
212,INVH,2022-05-13,-0.036675
213,ROCLW,2022-05-13,0.000000
214,FNVTW,2022-05-13,0.000000
215,GLBL,2022-05-13,0.016040


In [29]:
#Sort the tweets df, reset the index after sorting and then drop the column "index"
stock_tweet_sentiment_df = stock_tweet_sentiment_df.sort_values(by=["Ticker", "Date"]).reset_index().drop(columns="index")
stock_tweet_sentiment_df

Unnamed: 0,Ticker,Date,Sentiment_Score
0,ADSE,2022-05-07,0.354387
1,ADSE,2022-05-08,0.354387
2,ADSE,2022-05-09,0.354387
3,ADSE,2022-05-10,0.354387
4,ADSE,2022-05-11,0.354387
...,...,...,...
212,ZGN,2022-05-09,-0.148439
213,ZGN,2022-05-10,-0.148439
214,ZGN,2022-05-11,-0.148439
215,ZGN,2022-05-12,-0.148439


In [33]:
stock_tweet_sentiment_df.to_csv("Data/stock_tweet_sentiment.csv", mode="a", header="false")