# Twitter downloader
This notebook downloads tweets from Twitter. A search query and date has to be specified.

In [None]:
import json
import twitterClient
from dateutil import parser

In [None]:
def load_tweets(filename):
    """
    Loads the tweets from the file with the given name into an array of tweets.

    @param filename: The filename of the file to load the tweets from.

    @returns: An array of tweets.
    """
    tweets = []
    with open(filename, 'r') as f:
        for sLine in f:
            tweet = json.loads(sLine)
            tweets.append(tweet)
    return tweets

In [None]:
client = twitterClient.twitterClient()

In [None]:
# Define what tweets do download
search_query = 'coronation (kingcharles OR charles)'

end_time = parser.parse("05.05.2023 23:59")

# All non-authenticated tweet fields
all_tweet_fields = ['id', 'text', 'attachments', 'author_id', 'context_annotations', 'conversation_id', 'created_at', 'entities', 'geo', 'in_reply_to_user_id', 'lang', 'possibly_sensitive', 'public_metrics', 'referenced_tweets', 'reply_settings', 'source', 'withheld']

# The maximum amount of tweets to download
max_tweets = 100000  # 50000 was used here

# The filename of the file to store the tweets into
all_twitter_fields_filename = "../data/coronation_2023_05_05_3.json"

In [None]:
tweets = []

twitterResponse = client.search_recent_tweets(search_query, max_results=100, tweet_fields=all_tweet_fields, end_time=end_time) # User next_token="..." to continue downloading
while len(tweets) < max_tweets:
    try:
        twitterResponse = client.search_recent_tweets(search_query, max_results=100, tweet_fields=all_tweet_fields, next_token=twitterResponse.meta.get("next_token"), end_time=end_time)
    except:
        break
    finally:
        print(len(tweets))

    if twitterResponse.data is not None:
        for tweet in twitterResponse.data:
            tweets.append(tweet)

print("Number of tweets downloaded: ", len(tweets))
print(twitterResponse.meta.get("next_token"))

In [None]:
print(len(tweets))

In [None]:
with open(all_twitter_fields_filename, 'w') as json_file:
    for tweet in tweets:
        json.dump(tweet.data, json_file)
        json_file.write('\n')

print("Tweets successfully stored to: ", all_twitter_fields_filename)