# Twitter downloader
This notebook downloads tweets from Twitter. A search query and date has to be specified.

In [109]:
import json
import twitterClient
from dateutil import parser

In [110]:
def load_tweets(filename):
    """
    Loads the tweets from the file with the given name into an array of tweets.

    @param filename: The filename of the file to load the tweets from.

    @returns: An array of tweets.
    """
    tweets = []
    with open(filename, 'r') as f:
        for sLine in f:
            tweet = json.loads(sLine)
            tweets.append(tweet)
    return tweets

In [111]:
client = twitterClient.twitterClient()

In [112]:
# Define what tweets do download
search_query = 'coronation (kingcharles OR charles)'

end_time = parser.parse("05.05.2023 23:59")

# All non-authenticated tweet fields
all_tweet_fields = ['id', 'text', 'attachments', 'author_id', 'context_annotations', 'conversation_id', 'created_at', 'entities', 'geo', 'in_reply_to_user_id', 'lang', 'possibly_sensitive', 'public_metrics', 'referenced_tweets', 'reply_settings', 'source', 'withheld']

# The maximum amount of tweets to download
max_tweets = 100000  # 50000 was used here

# The filename of the file to store the tweets into
all_twitter_fields_filename = "../data/coronation_2023_05_05_3.json"

In [113]:
tweets = []

twitterResponse = client.search_recent_tweets(search_query, max_results=100, tweet_fields=all_tweet_fields, end_time=end_time) # User next_token="..." to continue downloading
while len(tweets) < max_tweets:
    try:
        twitterResponse = client.search_recent_tweets(search_query, max_results=100, tweet_fields=all_tweet_fields, next_token=twitterResponse.meta.get("next_token"), end_time=end_time)
    except:
        break
    finally:
        print(len(tweets))

    if twitterResponse.data is not None:
        for tweet in twitterResponse.data:
            tweets.append(tweet)

print("Number of tweets downloaded: ", len(tweets))
print(twitterResponse.meta.get("next_token"))

0
99
197
297
397
497
597
697
797
897
996
1096
1196
1295
1395
1495
1595
1695
1795
1894
1994
2094
2194
2294
2394
2494
2594
2693
2793
2893
2993
3093
3193
3293
3393
3493
3593
3693
3793
3893
3993
4093
4193
4293
4393
4493
4592
4692
4792
4892
4992
5092
5192
5292
5392
5492
5592
5692
5791
5891
5991
6091
6191
6291
6391
6491
6591
6691
6791
6891
6991
7091
7191
7291
7391
7491
7591
7691
7791
7891
7991
8091
8191
8291
8391
8491
8591
8691
8791
8891
8991
9091
9191
9290
9390
9490
9590
9690
9790
9890
9990
10090
10190
10290
10389
10489
10589
10689
10789
10889
10989
11089
11189
11289
11389
11489
11588
11688
11788
11888
11987
12087
12187
12287
12387
12487
12587
12687
12787
12887
12986
13086
13185
13285
13385
13485
13585
13685
13784
13884
13984
14084
14184
14284
14383
14483
14583
14683
14782
14882
14982
15082
15182
15282
15382
15481
15581
15681
15781
15881
15981
16081
16181
16281
16381
16481
16581
16681
16781
16881
16980
17080
17180
17280
17380
17480
17580
17680
17779
17879
17979
18079
18179
18279
18379
18478

In [114]:
print(len(tweets))

44796
None


In [115]:
with open(all_twitter_fields_filename, 'w') as json_file:
    for tweet in tweets:
        json.dump(tweet.data, json_file)
        json_file.write('\n')

print("Tweets successfully stored to: ", all_twitter_fields_filename)

Tweets successfully stored to:  ../data/coronation_2023_05_05_3.json
