In [32]:
# https://towardsdatascience.com/sentiment-analysis-for-stock-price-prediction-in-python-bed40c65d178
# http://importpython.blogspot.com/2014/07/how-to-convert-date-formats-from.html
# https://stackoverflow.com/questions/43557254/how-to-clean-a-tweet-using-regex-without-removing-punctuations-and-hasthag

In [33]:
import requests
import json
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import flair
import re

In [34]:
with open('tokens.txt') as f:
    token = f.read()
    token = token.strip('\n')

In [52]:
date_format = '%Y-%m-%dT%H:%M:%SZ'
start_date='2021-04-13'
end_date='2021-04-19'
start_obj = datetime.strptime(start_date,'%Y-%m-%d')
start = start_obj.strftime(date_format)
end_obj = datetime.strptime(end_date,'%Y-%m-%d')
end = end_obj.strftime(date_format)

In [137]:
params = {
    'query': '(tesla OR tsla) (lang:en)',
    'tweet.fields': 'created_at,lang',
    'max_results': '100',
    'start_time': start,
    'end_time': end
}
headers = {'authorization': f'Bearer {token}'}

In [54]:
response = requests.get(
    'https://api.twitter.com/2/tweets/search/recent',
    params=params,
    headers=headers
)

In [72]:
model = flair.models.TextClassifier.load('en-sentiment')

2021-04-18 22:02:37,290 loading file /Users/rowlavel/.flair/models/sentiment-en-mix-distillbert_4.pt


In [73]:
def clean_tweet(tweet):
    return re.sub(r'@\S+|https?://\S+', '', tweet)

In [74]:
def format_tweet(tweet):
    data = {
        'id': tweet['id'],
        'date': tweet['created_at'],
        'lang': tweet['lang'],
        'text': clean_tweet(tweet['text'])
    }

    # sentiment analysis using flair
    sentence = flair.data.Sentence(text=data['text'])
    pred = model.predict(sentence)

    data['probability'] = sentence.labels[0].score
    data['sentiment'] = sentence.labels[0].value

    return data

In [75]:
# get more data by scraping 100 tweets every 60 minutes
def backup(now):
    now = datetime.strptime(now,date_format)
    back = now - timedelta(minutes=60)
    return back.strftime(date_format)

In [138]:
tweets = pd.DataFrame()
now = datetime.now()
last_week = now-timedelta(days=7)
now = now.strftime(date_format)
i = 0

print(last_week)
print(now)
while True:
    if datetime.strptime(now,date_format) < last_week:
        break
        
    print(i, end='\r', flush=True)
    i += 1
    back = backup(now)
    
    params['start_time'] = back
    params['end_time'] = now
    
    response = requests.get(
        'https://api.twitter.com/2/tweets/search/recent',
        params=params,
        headers=headers
    )
    
    now = back
    
    for tweet in response.json()['data']:
        tweets = tweets.append(format_tweet(tweet), ignore_index=True)

2021-04-11 23:01:23.111543
2021-04-18T23:01:23Z
163

KeyError: 'data'

In [139]:
tweets

Unnamed: 0,date,id,lang,probability,sentiment,text
0,2021-04-18T23:01:21.000Z,1383918602537951232,en,0.998802,NEGATIVE,I feel sorry for you If you sell before $TSLA...
1,2021-04-18T23:01:21.000Z,1383918601938235398,en,0.956833,POSITIVE,Motley Crue and Tesla. Dr. Feelgood tour
2,2021-04-18T23:01:20.000Z,1383918597437677578,en,0.995796,POSITIVE,✨ Participate in the April #Webull wheel event...
3,2021-04-18T23:01:17.000Z,1383918585509138445,en,0.999586,NEGATIVE,RT Who thinks Mr. Musk will announce Tesla ac...
4,2021-04-18T23:01:16.000Z,1383918580631097347,en,0.995796,POSITIVE,✨ Participate in the April #Webull wheel event...
...,...,...,...,...,...,...
16183,2021-04-12T04:55:25.000Z,1381470993034702850,en,0.990167,POSITIVE,$TSLA $NIO $MVIS and EV sector\nEV space laggi...
16184,2021-04-12T04:55:19.000Z,1381470966144921600,en,0.753576,POSITIVE,RT “Tesla’s long-term competitive advantage w...
16185,2021-04-12T04:55:18.000Z,1381470960629387268,en,0.980765,NEGATIVE,RT Guys its confirmed Corpse has a Tesla took...
16186,2021-04-12T04:55:16.000Z,1381470953687764995,en,0.995136,NEGATIVE,"Well, in the deluded world Elon fanatic..."


In [140]:
point = tweets.iloc[0]

In [141]:
point

date                                    2021-04-18T23:01:21.000Z
id                                           1383918602537951232
lang                                                          en
probability                                             0.998802
sentiment                                               NEGATIVE
text            I feel sorry for you If you sell before $TSLA...
Name: 0, dtype: object

In [142]:
point['date']

'2021-04-18T23:01:21.000Z'

In [143]:
date = datetime.strptime(point['date'], '%Y-%m-%dT%H:%M:%S.%fZ')

In [144]:
date.strftime('%Y-%m-%d')

'2021-04-18'

In [145]:
def convert_date(date):
    date = datetime.strptime(date, '%Y-%m-%dT%H:%M:%S.%fZ')
    return date.strftime('%Y-%m-%d')

In [146]:
tweets['date'] = tweets['date'].apply(convert_date)

In [147]:
tweets

Unnamed: 0,date,id,lang,probability,sentiment,text
0,2021-04-18,1383918602537951232,en,0.998802,NEGATIVE,I feel sorry for you If you sell before $TSLA...
1,2021-04-18,1383918601938235398,en,0.956833,POSITIVE,Motley Crue and Tesla. Dr. Feelgood tour
2,2021-04-18,1383918597437677578,en,0.995796,POSITIVE,✨ Participate in the April #Webull wheel event...
3,2021-04-18,1383918585509138445,en,0.999586,NEGATIVE,RT Who thinks Mr. Musk will announce Tesla ac...
4,2021-04-18,1383918580631097347,en,0.995796,POSITIVE,✨ Participate in the April #Webull wheel event...
...,...,...,...,...,...,...
16183,2021-04-12,1381470993034702850,en,0.990167,POSITIVE,$TSLA $NIO $MVIS and EV sector\nEV space laggi...
16184,2021-04-12,1381470966144921600,en,0.753576,POSITIVE,RT “Tesla’s long-term competitive advantage w...
16185,2021-04-12,1381470960629387268,en,0.980765,NEGATIVE,RT Guys its confirmed Corpse has a Tesla took...
16186,2021-04-12,1381470953687764995,en,0.995136,NEGATIVE,"Well, in the deluded world Elon fanatic..."


In [148]:
def calc_avg_sentiment(df,date):
    subdf = tweets[tweets['date'] == date]
    sentiment = subdf['sentiment'].values
    avg_sentiment = sum([-1 if s == 'NEGATIVE' else 1 for s in sentiment])/len(sentiment)
    return avg_sentiment

In [149]:
calc_avg_sentiment(tweets,'2021-04-12')

-0.10493827160493827

In [150]:
calc_avg_sentiment(tweets,'2021-04-13')

-0.06255283178360102

In [151]:
calc_avg_sentiment(tweets,'2021-04-14')

0.12414965986394558

In [152]:
calc_avg_sentiment(tweets,'2021-04-15')

0.0989648033126294

In [153]:
calc_avg_sentiment(tweets,'2021-04-16')

-0.1253693541578725

In [154]:
calc_avg_sentiment(tweets,'2021-04-17')

0.0054416073670992045

In [155]:
calc_avg_sentiment(tweets,'2021-04-18')

0.08372290692732681