This source code was gathered from "https://github.com/mehranshakarami/AI_Spectrum/tree/main/2022/Twitter_API" and "https://www.pluralsight.com/guides/building-a-twitter-sentiment-analysis-in-python"

In [1]:

import tweepy
import configparser
import pandas as pd
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re
import string
import numpy as np
import logging
#from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
import nltk
# nltk.download()

In [3]:
# Setting stop words to take out the common words in a tweet and use just english
stop_words = set(stopwords.words("english"))


In [4]:

# read configs to hide the twitter key 
config = configparser.ConfigParser()
config.read('config.ini')

#Set variables from config to use for authentication
api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']

#Set variables up for the access token to use for authentication
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']


In [5]:

# authentication using tweepy
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)





In [6]:
# search tweets with the keyword that I want the bot to analyze
keywords = '#oott'
limit=10000

#Tweepy librabry to search tweets
tweets = tweepy.Cursor(api.search_tweets, q=keywords, count=100, tweet_mode='extended').items(limit)

# tweets = api.user_timeline(screen_name=user, count=limit, tweet_mode='extended')

# create DataFrame
columns = ['User', 'Tweet']
data = []

# Create a loop that goes through each tweet and user and adds them to the above dataframe
for tweet in tweets:
   
    data.append([tweet.user.screen_name, tweet.full_text])

df = pd.DataFrame(data, columns=columns)

#Making sure that the DF is complete and loop works
print(df)

                 User                                              Tweet
0     Markets87700133  RT @staunovo: Europe 16 nations oil inventorie...
1            staunovo  Europe 16 nations oil inventories fell by 9.24...
2             geokalp  RT @BrynneKKelly: WTI/Brent spread curve shift...
3       DaniloOnorino  The oil price today is completely distorted. A...
4        matthewmreed  Since March, around the time #India started bi...
...               ...                                                ...
9995           BeegJj  RT @Amena__Bakr: The JMMC supports a 100k cut ...
9996         satjapan  RT @JavierBlas: And 52 days after US President...
9997  cruisin4bruisin  RT @JavierBlas: And 52 days after US President...
9998   FrankKaneDubai  RT @Amena__Bakr: Final statement #OOTT #opec h...
9999      Amena__Bakr  In the last meeting the US issued a statement ...

[10000 rows x 2 columns]


In [7]:
# Create function to process and clean the tweets.
def preprocess_tweet_text(tweet):
    tweet = tweet.lower()
    # Remove urls
    tweet = re.sub(r'http\S+|www\S+|https\S+', '', tweet, flags=re.MULTILINE)
    # Remove user @ references and '#' from tweet
    tweet = re.sub(r'\@\w+|\#','', tweet)
    # Remove punctuations
    tweet = tweet.translate(str.maketrans('', '', string.punctuation))
    # Remove stopwords and tokenize the words
    tweet_tokens = word_tokenize(tweet)
    filtered_words = [w for w in tweet_tokens if not w in stop_words]
    
 
    # joining after removing stop words
    return " ".join(filtered_words)

In [8]:
# Applying the function to each tweet within the data frame
df['Tweet'] = df['Tweet'].map(lambda x: preprocess_tweet_text(x))

In [9]:
# Looking to see how the cleaned data looks
df.head(25)

Unnamed: 0,User,Tweet
0,Markets87700133,rt europe 16 nations oil inventories fell 924m...
1,staunovo,europe 16 nations oil inventories fell 924mb m...
2,geokalp,rt wtibrent spread curve shift last 10 days oott
3,DaniloOnorino,oil price today completely distorted outlook t...
4,matthewmreed,since march around time india started binging ...
5,DaniloOnorino,oil price today completely distorted outlook t...
6,EnergyEXCH,📣 market impact ecb agreed start discussions s...
7,DTNMarkets,oil rallies usd slides putin threatens cut oil...
8,hLyHLcpubwToovV,ノルウェーのガスが欧州のライフラインになるのは賢明な選択だ ノルウェーのエネルギー企業のエク...
9,Chiwizz1,rt wtibrent spread curve shift last 10 days oott


In [10]:
#Removing the emojis within the tweets
def remove_emoji(string):
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002500-\U00002BEF"  # chinese char
                               u"\U00002702-\U000027B0"
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               u"\U0001f926-\U0001f937"
                               u"\U00010000-\U0010ffff"
                               u"\u2640-\u2642"
                               u"\u2600-\u2B55"
                               u"\u200d"
                               u"\u23cf"
                               u"\u23e9"
                               u"\u231a"
                               u"\ufe0f"  # dingbats
                               u"\u3030"
                               "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', string)

In [11]:
# Removing the emojis by applying the remove emoji function
df['Tweet'] = df['Tweet'].map(lambda x: remove_emoji(x))

In [12]:
#checking to see how if any emojis remain
df.head(25)

Unnamed: 0,User,Tweet
0,Markets87700133,rt europe 16 nations oil inventories fell 924m...
1,staunovo,europe 16 nations oil inventories fell 924mb m...
2,geokalp,rt wtibrent spread curve shift last 10 days oott
3,DaniloOnorino,oil price today completely distorted outlook t...
4,matthewmreed,since march around time india started binging ...
5,DaniloOnorino,oil price today completely distorted outlook t...
6,EnergyEXCH,market impact ecb agreed start discussions sh...
7,DTNMarkets,oil rallies usd slides putin threatens cut oil...
8,hLyHLcpubwToovV,oott
9,Chiwizz1,rt wtibrent spread curve shift last 10 days oott


In [13]:
df.dtypes

User     object
Tweet    object
dtype: object

In [14]:
#Creating columns for positive and negative results from the sentiment analysis
df['pos'] = ''
df['neg'] = ''
df['neutra'] = ''

In [15]:
df.head()

Unnamed: 0,User,Tweet,pos,neg,neutra
0,Markets87700133,rt europe 16 nations oil inventories fell 924m...,,,
1,staunovo,europe 16 nations oil inventories fell 924mb m...,,,
2,geokalp,rt wtibrent spread curve shift last 10 days oott,,,
3,DaniloOnorino,oil price today completely distorted outlook t...,,,
4,matthewmreed,since march around time india started binging ...,,,


In [16]:
def get_tweet_sentiment(df):
		'''
		Utility function to classify sentiment of passed tweet
		using textblob's sentiment method
		'''
		# create TextBlob object of passed tweet text
		analysis = TextBlob(df.Tweet)
		#analysis = SentimentIntensityAnalyzer()
		# set sentiment
		if analysis.sentiment.polarity > 0:
			return 1 + df.pos
		elif analysis.sentiment.polarity == 0:
			return 1 + df.neutra
		else:
			return 1 + df.neg

In [17]:
#Applying the sentiment from textblob to each
df['polarity'] = df.Tweet.apply(lambda x: TextBlob (x).sentiment.polarity)

In [18]:
df.head()

Unnamed: 0,User,Tweet,pos,neg,neutra,polarity
0,Markets87700133,rt europe 16 nations oil inventories fell 924m...,,,,-0.7
1,staunovo,europe 16 nations oil inventories fell 924mb m...,,,,-0.35
2,geokalp,rt wtibrent spread curve shift last 10 days oott,,,,0.0
3,DaniloOnorino,oil price today completely distorted outlook t...,,,,0.1
4,matthewmreed,since march around time india started binging ...,,,,0.061905


In [20]:
# Adding to 1 to where the polarity correlates with sentitment score
df['neg'] = np.where((df['polarity'] < 0) , 1 , 0)  

df['neutra'] = np.where((df['polarity'] == 0) , 1 , 0)  

df['pos'] = np.where((df['polarity'] > 0) , 1 , 0)  


In [21]:
df.head(25)

Unnamed: 0,User,Tweet,pos,neg,neutra,polarity
0,Markets87700133,rt europe 16 nations oil inventories fell 924m...,0,1,0,-0.7
1,staunovo,europe 16 nations oil inventories fell 924mb m...,0,1,0,-0.35
2,geokalp,rt wtibrent spread curve shift last 10 days oott,0,0,1,0.0
3,DaniloOnorino,oil price today completely distorted outlook t...,1,0,0,0.1
4,matthewmreed,since march around time india started binging ...,1,0,0,0.061905
5,DaniloOnorino,oil price today completely distorted outlook t...,1,0,0,0.1
6,EnergyEXCH,market impact ecb agreed start discussions sh...,1,0,0,0.1
7,DTNMarkets,oil rallies usd slides putin threatens cut oil...,0,1,0,-0.7
8,hLyHLcpubwToovV,oott,0,0,1,0.0
9,Chiwizz1,rt wtibrent spread curve shift last 10 days oott,0,0,1,0.0


In [22]:
#Creates ratio to get a percentage
pos_rate = df.pos.sum() / len(df)

pos_rate

0.2397

In [23]:
#Creates ratio to get a percentage
neutral_rate = df.neutra.sum() / len(df)

neutral_rate

0.5296

In [24]:
#Creates ratio to get a percentage
neg_rate = df.neg.sum() / len(df)

neg_rate

0.2307

In [25]:
# Checking to see how the format works

print(f"Bullish Tweets: "+"{:.2%}".format(pos_rate))

print(f"Neutral Tweets: "+"{:.2%}".format(neutral_rate))

print(f"Bearish Tweets "+"{:.2%}".format(neg_rate))

Bullish Tweets: 23.97%
Neutral Tweets: 52.96%
Bearish Tweets 23.07%


In [None]:
# for follower in tweepy.Cursor(api.followers).items():
#     follower.follow()
#     print (follower.screen_name)

In [26]:
# Setting the status to post and formating correctly
status = '''
Daily #oil Sentiment Check
Bullish rate: {:.2%}
Neutral rate: {:.2%}
Bearish rate: {:.2%}
'''.format(pos_rate, neutral_rate, neg_rate)

In [27]:
#Updating the bot status using the api
api.update_status(status)

Status(_api=<tweepy.api.API object at 0x156cad5b0>, _json={'created_at': 'Fri Sep 09 13:29:29 +0000 2022', 'id': 1568230112310575106, 'id_str': '1568230112310575106', 'text': 'Daily #oil Sentiment Check\nBullish rate: 23.97%\nNeutral rate: 52.96%\nBearish rate: 23.07%', 'truncated': False, 'entities': {'hashtags': [{'text': 'oil', 'indices': [6, 10]}], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href="https://www.twitter.com" rel="nofollow">OILsentimentChecker</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1524125916879798274, 'id_str': '1524125916879798274', 'name': 'OilSentimentTracker', 'screen_name': 'Jaradang', 'location': '', 'description': 'A bot which processes 20,000 tweets with #oott and delivers an overall sentiment. Lets you see outside your follows to get a bigger picture on sentiment', 'url': None, 'entities': {'descript