# Testing out some features on the Twitter API using Tweepy

In [8]:
import json
import tweepy
import datetime as dt
import pandas as pd

In [9]:
with open('keys.json') as keys:
    config = json.load(keys)
auth = tweepy.OAuthHandler(config['twitter']['API_key'], config['twitter']['API_secret_key'])
# auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

## Example account (Elon Musk)

In [None]:
user = api.get_user('elonmusk')

In [None]:
print(user.screen_name)
print(user.followers_count)
for friend in user.friends():
    print(friend.screen_name)

## Search by keyword on all tweets

In [10]:
# tweets = tweepy.Cursor(user.search, q=str(search_string), tweet_mode='extended', lang='pt').items(ct.num_of_tweets)
# tweet_search = api.search()
query = '$GNUS'
max_tweets = 100000
searched_tweets = [status for status in tweepy.Cursor(api.search, q=query, since="2021-2-22").items(max_tweets)]

In [None]:
print(searched_tweets[0])

Looks like there is a lot of information to dig into here. We might be able to just search on 'symbols' since this seems to already have the stock symbols taged. For now we'll just get a rough count of the number of tweets in the past week.

In [11]:
print(len(searched_tweets))

3479


So we had 917 tweets in the past week with the $GNUS tag. Now we need to get this in a format we can use with pandas.

In [None]:
stock = 'GNUS'
start_date = dt.date(year=2021, month=2, day=22)
end_date = dt.date(year=2021, month=2, day=28)

searched_tweets = [status for status in tweepy.Cursor(api.search, q="${}".format(stock), since=start_date, until=end_date).items(max_tweets)]
print(len(searched_tweets))

In [16]:
def get_stock_tweets(stock, start_date, end_date=None, max_tweets=100000):
    searched_tweets = [status for status in tweepy.Cursor(api.search, q="${}".format(stock), since=start_date, until=end_date).items(max_tweets)]
    return searched_tweets

In [18]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=3, day=22)
tweets = get_stock_tweets('GNUS', start_date)
print(len(tweets))

12


In [None]:
# Test of function above
start_date = dt.datetime(year=2021, month=2, day=25)
end_date = dt.datetime(year=2021, month=2, day=28)
get_stock_tweets('GNUS', start_date, end_date)

In [None]:
starting_stocks = ['GNUS', 'XSPA', 'IBIO', 'GME', 'OPES']

In [None]:
start_date = dt.datetime(year=2021, month=3, day=15)
end_date = dt.datetime(year=2021, month=3, day=22)
data = pd.DataFrame()
for stock in starting_stocks:
    tweets = get_stock_tweets(stock, start_date, end_date, max_tweets=)
    
    stock_data = pd.DataFrame([{'datetime':tweet.created_at, 'tweet':tweet._json['text'], 'stock': stock} for tweet in tweets])
    data = data.append(stock_data)
data = data.set_index('datetime')

In [None]:
data.head()

In [None]:
tweets_bucketed = data.groupby([pd.Grouper(freq='5min'), 'stock']).count().reset_index()
tweets_bucketed.head()

In [None]:
tweets_bucketed[tweets_bucketed['stock'] == 'XSPA']

In [None]:
tweets_bucketed.to_parquet('tweet_count.parquet')

## Generate tweets df using multiple API calls

In [None]:
starting_stocks = ['GNUS', 'XSPA', 'IBIO', 'GME', 'OPES']

In [None]:
def generate_intervals(minutes_interval, start_time, end_time):
    total_minutes = (end_time - start_time).total_seconds()/60
    return [end_time - dt.timedelta(minutes=x) for x in range(0, int(total_minutes), minutes_interval)]

In [None]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=2, day=28)
generate_intervals(5, start_date, end_date)

In [None]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=2, day=28)

for stock in starting_stocks:
    print(stock)
    for time in generate_intervals(5, start_date, end_date):
        print(time.isoformat())
        stock_tweet_num = len(get_stock_tweets(stock, time, time + dt.timedelta(minutes=5)))
        print(stock_tweet_num)

## Testing functions for twitter_data.py

In [27]:
def get_stock_tweets_list(stock, start_date, end_date=None, max_tweets=None):
    searched_tweets = [status for status in tweepy.Cursor(api.search, q="${}".format(stock), since=start_date, until=end_date).items()]
    return searched_tweets

In [32]:
def generate_df(stocks, start_date, end_date=None):
    data = pd.DataFrame()
    for stock in stocks:
        tweets = get_stock_tweets_list(stock, start_date, end_date)
        json_data = [tweet._json for tweet in tweets]
        stock_data = pd.io.json.json_normalize(json_data)
        stock_data['stock'] = stock
        data = data.append(stock_data)
    data = data.set_index('created_at')
    return data

In [35]:
start_date = dt.datetime(year=2021, month=2, day=22)
end_date = dt.datetime(year=2021, month=2, day=28)
test_df = generate_df(['GNUS'], start_date)

In [36]:
print(test_df.shape)

(12, 286)
