# Testing out some features on the Twitter API using Tweepy

In [43]:
import json
import tweepy
import datetime as dt
import pandas as pd

In [5]:
with open('keys.json') as keys:
    config = json.load(keys)
auth = tweepy.OAuthHandler(config['twitter']['API_key'], config['twitter']['API_secret_key'])
# auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

## Example account (Elon Musk)

In [6]:
user = api.get_user('elonmusk')

In [7]:
print(user.screen_name)
print(user.followers_count)
for friend in user.friends():
    print(friend.screen_name)

elonmusk
48308839
CoinDesk
dogecoin_devs
dogecoin
cleantechnica
TheOnion
TheBabylonBee
karpathy
Astro_Soichi
PopMech
PyTorch
Nigel_Lockyer
jagarikin
AstroVicGlover
Tesmanian_com
flcnhvy
machineIearning
BBC_Future
kanyewest
teslacn
DeepMind


## Search by keyword on all tweets

In [38]:
# tweets = tweepy.Cursor(user.search, q=str(search_string), tweet_mode='extended', lang='pt').items(ct.num_of_tweets)
# tweet_search = api.search()
query = '$GNUS'
max_tweets = 100000
searched_tweets = [status for status in tweepy.Cursor(api.search, q=query, since="2021-2-22").items(max_tweets)]

In [39]:
print(searched_tweets[0])

Status(_api=<tweepy.api.API object at 0x0000019C107F42C8>, _json={'created_at': 'Tue Mar 02 01:13:38 +0000 2021', 'id': 1366557273758253063, 'id_str': '1366557273758253063', 'text': 'RT @PaulStillman8: $BBRW  Brewbilt assessment article. This is a must read! Things happening fast!\nhttps://t.co/ZeI6QZOv9W…\n$GAXY $TOPS $OP…', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [{'text': 'BBRW', 'indices': [19, 24]}, {'text': 'GAXY', 'indices': [124, 129]}, {'text': 'TOPS', 'indices': [130, 135]}], 'user_mentions': [{'screen_name': 'PaulStillman8', 'name': 'Paul Stillman', 'id': 1286817107703926785, 'id_str': '1286817107703926785', 'indices': [3, 17]}], 'urls': [{'url': 'https://t.co/ZeI6QZOv9W', 'expanded_url': 'https://reddit.com/r/pennystocks/comments/lueji3/brewbilt_bbrw_buy_thesis/?utm_source=amp&utm_medium=&utm_content=post_body', 'display_url': 'reddit.com/r/pennystocks/…', 'indices': [99, 122]}]}, 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'}, 'source'

Looks like there is a lot of information to dig into here. We might be able to just search on 'symbols' since this seems to already have the stock symbols taged. For now we'll just get a rough count of the number of tweets in the past week.

In [40]:
print(len(searched_tweets))

931


So we had 917 tweets in the past week with the $GNUS tag. Now we need to get this in a format we can use with pandas.

In [34]:
stock = 'GNUS'
start_date = dt.date(year=2021, month=2, day=22)
end_date = dt.date(year=2021, month=2, day=28)

searched_tweets = [status for status in tweepy.Cursor(api.search, q="${}".format(stock), since=start_date, until=end_date).items(max_tweets)]
print(len(searched_tweets))

553


In [121]:
def get_stock_tweets(stock, start_date, end_date):
    searched_tweets = [status for status in tweepy.Cursor(api.search, q="${}".format(stock), since=start_date, until=end_date).items(max_tweets)]
    return searched_tweets

In [125]:
# Test of function above
start_date = dt.datetime(year=2021, month=2, day=25)
end_date = dt.datetime(year=2021, month=2, day=28)
get_stock_tweets('GNUS', start_date, end_date)

[Status(_api=<tweepy.api.API object at 0x0000019C107F42C8>, _json={'created_at': 'Fri Feb 26 18:30:45 +0000 2021', 'id': 1365368721158664193, 'id_str': '1365368721158664193', 'text': "I know it's a wild market, so manage your risk, but $GNUS is curling up. It was at $3.00 less than a month ago.", 'truncated': False, 'entities': {'hashtags': [], 'symbols': [{'text': 'GNUS', 'indices': [52, 57]}], 'user_mentions': [], 'urls': []}, 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 260041054, 'id_str': '260041054', 'name': 'Rogelio Piedra', 'screen_name': 'RogelioPiedra', 'location': 'Ohio, USA', 'description': "Jesus is King, and life. Psalm 23. CSU MBA. i lift bro. Stock Trading, Auto Tech, Husband & Fa

In [118]:
starting_stocks = ['GNUS', 'XSPA', 'IBIO', 'GME', 'OPES']

In [126]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=2, day=28)
data = pd.DataFrame()
for stock in starting_stocks:
    tweets = get_stock_tweets(stock, start_date, end_date)
    
    stock_data = pd.DataFrame([{'datetime':tweet.created_at, 'tweet':tweet._json['text'], 'stock': stock} for tweet in tweets])
    data = data.append(stock_data)
data = data.set_index('datetime')

In [127]:
data.head()

Unnamed: 0_level_0,tweet,stock
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-02-26 18:30:45,"I know it's a wild market, so manage your risk...",GNUS
2021-02-25 08:41:55,We Alerted\n\n@everyone BTO ABNB 02/26 195P @6...,GNUS
2021-02-25 04:32:37,We Alerted\n\n@everyone BTO ABNB 02/26 195P @6...,GNUS
2021-02-25 01:46:52,We Alerted\n\n@everyone BTO ABNB 02/26 195P @6...,GNUS
2021-02-24 19:58:12,We Alerted\n\n@everyone BTO ABNB 02/26 195P @6...,GNUS


In [128]:
tweets_bucketed = data.groupby([pd.Grouper(freq='5min'), 'stock']).count().reset_index()
tweets_bucketed.head()

Unnamed: 0,datetime,stock,tweet
0,2021-02-22 12:50:00,GME,1
1,2021-02-22 13:25:00,GME,1
2,2021-02-22 13:35:00,GME,1
3,2021-02-22 13:40:00,GME,2
4,2021-02-22 14:00:00,GME,1


In [114]:
tweets_bucketed[tweets_bucketed['stock'] == 'GNUS']

Unnamed: 0,datetime,stock,tweet
20,2021-02-22 19:10:00,GNUS,1
93,2021-02-24 19:15:00,GNUS,1
101,2021-02-24 19:55:00,GNUS,1
147,2021-02-25 01:45:00,GNUS,1
161,2021-02-25 04:30:00,GNUS,1
174,2021-02-25 08:40:00,GNUS,1
307,2021-02-26 18:30:00,GNUS,1


In [129]:
tweets_bucketed.to_parquet('tweet_count.parquet')

## Generate tweets df using multiple API calls

In [42]:
starting_stocks = ['GNUS', 'XSPA', 'IBIO', 'GME', 'OPES']

In [76]:
def generate_intervals(minutes_interval, start_time, end_time):
    total_minutes = (end_time - start_time).total_seconds()/60
    return [end_time - dt.timedelta(minutes=x) for x in range(0, int(total_minutes), minutes_interval)]

In [81]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=2, day=28)
generate_intervals(5, start_date, end_date)

[datetime.datetime(2021, 2, 28, 0, 0),
 datetime.datetime(2021, 2, 27, 23, 55),
 datetime.datetime(2021, 2, 27, 23, 50),
 datetime.datetime(2021, 2, 27, 23, 45),
 datetime.datetime(2021, 2, 27, 23, 40),
 datetime.datetime(2021, 2, 27, 23, 35),
 datetime.datetime(2021, 2, 27, 23, 30),
 datetime.datetime(2021, 2, 27, 23, 25),
 datetime.datetime(2021, 2, 27, 23, 20),
 datetime.datetime(2021, 2, 27, 23, 15),
 datetime.datetime(2021, 2, 27, 23, 10),
 datetime.datetime(2021, 2, 27, 23, 5),
 datetime.datetime(2021, 2, 27, 23, 0),
 datetime.datetime(2021, 2, 27, 22, 55),
 datetime.datetime(2021, 2, 27, 22, 50),
 datetime.datetime(2021, 2, 27, 22, 45),
 datetime.datetime(2021, 2, 27, 22, 40),
 datetime.datetime(2021, 2, 27, 22, 35),
 datetime.datetime(2021, 2, 27, 22, 30),
 datetime.datetime(2021, 2, 27, 22, 25),
 datetime.datetime(2021, 2, 27, 22, 20),
 datetime.datetime(2021, 2, 27, 22, 15),
 datetime.datetime(2021, 2, 27, 22, 10),
 datetime.datetime(2021, 2, 27, 22, 5),
 datetime.datetime(20

In [83]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=2, day=28)

for stock in starting_stocks:
    print(stock)
    for time in generate_intervals(5, start_date, end_date):
        print(time.isoformat())
        stock_tweet_num = len(get_stock_tweets(stock, time, time + dt.timedelta(minutes=5)))
        print(stock_tweet_num)

GNUS
2021-02-28T00:00:00
0
2021-02-27T23:55:00
0
2021-02-27T23:50:00
0
2021-02-27T23:45:00
0
2021-02-27T23:40:00
0
2021-02-27T23:35:00
0
2021-02-27T23:30:00
0
2021-02-27T23:25:00
0
2021-02-27T23:20:00
0
2021-02-27T23:15:00
0
2021-02-27T23:10:00
0
2021-02-27T23:05:00
0
2021-02-27T23:00:00
0
2021-02-27T22:55:00
0
2021-02-27T22:50:00
0
2021-02-27T22:45:00
0
2021-02-27T22:40:00
0
2021-02-27T22:35:00
0
2021-02-27T22:30:00
0
2021-02-27T22:25:00
0
2021-02-27T22:20:00
0
2021-02-27T22:15:00
0
2021-02-27T22:10:00
0
2021-02-27T22:05:00
0
2021-02-27T22:00:00
0
2021-02-27T21:55:00
0
2021-02-27T21:50:00
0
2021-02-27T21:45:00
0
2021-02-27T21:40:00
0
2021-02-27T21:35:00
0
2021-02-27T21:30:00
0
2021-02-27T21:25:00
0
2021-02-27T21:20:00
0
2021-02-27T21:15:00
0
2021-02-27T21:10:00
0
2021-02-27T21:05:00
0
2021-02-27T21:00:00
0
2021-02-27T20:55:00
0
2021-02-27T20:50:00
0
2021-02-27T20:45:00
0
2021-02-27T20:40:00
0
2021-02-27T20:35:00
0
2021-02-27T20:30:00
0
2021-02-27T20:25:00
0
2021-02-27T20:20:00
0
2021-

TweepError: Twitter error response: status code = 429