In [38]:
import pandas as pd
import tweepy
import json

In [39]:
with open("credentials.json") as f:
    credentials = json.load(f)

In [40]:
client = tweepy.Client(bearer_token=credentials['BEARER_TOKEN'])

In [41]:
tweets_df = pd.DataFrame()
users_df = pd.DataFrame()

In [42]:
queries = ['#Polish', 'polish', '#Poland', 'Poland', '#NATO', 'NATO', 'Przewodów']

In [63]:
params = {
    'max_results': 100,
    'limit': 10,
    'tweet_fields': ['id', 'text', 'public_metrics', 'author_id', 'created_at', 'lang'],
    'user_fields': ['username', 'created_at', 'public_metrics', 'protected', 'verified'],
    'expansions': ['author_id', 'entities.mentions.username'],
    'since_id': df.iloc[0]['id']
}

In [64]:
tweet_list = list()
users_list = list()

In [65]:
for query in queries:
    try:
        for chunk in tweepy.Paginator(client.search_recent_tweets, query=f'{query} -is:retweet', **params):
            for tweets in chunk.data:
                tweet = [query,
                         tweets.id,
                         tweets.author_id,
                         tweets.text,
                         tweets.public_metrics['like_count'],
                         tweets.public_metrics['reply_count'],
                         tweets.public_metrics['retweet_count'],
                         tweets.public_metrics['quote_count'],
                         tweets.created_at,
                         tweets.lang]

                mentions = None
                if tweets.entities is not None:
                    mentions = [mention['id'] for mention in tweets.entities['mentions']]
                tweet.append(mentions)

                tweet_list.append(tweet)

            for user in chunk.includes['users']:
                users_list.append([user.id,
                                   user.username,
                                   user.created_at,
                                   user.public_metrics['followers_count'],
                                   user.public_metrics['following_count'],
                                   user.public_metrics['tweet_count'],
                                   user.protected,
                                   user.verified])

        temp_tweets = pd.DataFrame(tweet_list,
                                   columns=['hashtag', 'id', 'author_id', 'text', 'like_count', 'reply_count',
                                            'retweet_count',
                                            'quote_count',
                                            'created_at', 'lang', 'mentions'])
        temp_users = pd.DataFrame(users_list,
                                  columns=['id', 'username', 'created_at', 'followers_count', 'following_count',
                                           'tweet_count', 'protected', 'verified'])

        tweets_df = pd.concat([tweets_df, temp_tweets])
        users_df = pd.concat([users_df, temp_users])
    except tweepy.TooManyRequests:
        print('WARNING: Tweet download stopped due to TooManyRequest exception. Wait 15 minutes...')
        break

In [66]:
tweets_df

Unnamed: 0,hashtag,id,author_id,text,like_count,reply_count,retweet_count,quote_count,created_at,lang,mentions
0,#Polish,1592619245854027776,1418420371808657408,VlSlT https://t.co/K0X0jhi9Hh\n\n#Poland #NATO...,0,0,0,0,2022-11-15 20:43:12+00:00,de,
1,#Polish,1592619191789441024,922633274236665856,VlSlT https://t.co/zZGM30T35R\n\n#Poland #NATO...,0,0,0,0,2022-11-15 20:42:59+00:00,de,
2,#Polish,1592618914902872065,362611279,#RussianMissiles struck a #Polish village kill...,1,0,1,0,2022-11-15 20:41:53+00:00,en,
3,#Polish,1592618834200236032,1638601424,#RussiaIsATerroristState \nToday #russian miss...,1,0,0,0,2022-11-15 20:41:34+00:00,en,
4,#Polish,1592618710908686336,100620062,"#Polish and NATO air defenses, as it turns out...",0,0,0,0,2022-11-15 20:41:04+00:00,en,
...,...,...,...,...,...,...,...,...,...,...,...
5710,Przewodów,1592619276988710913,4287301523,@amir_nourdine Le site a été complètement coup...,0,0,0,0,2022-11-15 20:43:19+00:00,fr,[4052297554]
5711,Przewodów,1592619267526053888,1520140070656880641,2 misiles impactan contra la localidad polaca ...,0,0,0,0,2022-11-15 20:43:17+00:00,es,
5712,Przewodów,1592619256130457600,935192852652584961,Polonya'nın Przewodow kasabasına düşen roketle...,0,0,0,0,2022-11-15 20:43:14+00:00,tr,
5713,Przewodów,1592619253244788737,1560915082611228674,An explosion of unexplained cause occurred in ...,1,0,0,0,2022-11-15 20:43:14+00:00,en,


In [47]:
users_df

Unnamed: 0,id,username,created_at,followers_count,following_count,tweet_count,protected,verified
0,1418420371808657408,raipachinko,2021-07-23 03:59:16+00:00,149,174,389,False,False
1,922633274236665856,FarsMirafuentes,2017-10-24 01:18:09+00:00,230,201,13191,False,False
2,362611279,DannyVoglesong,2011-08-26 17:51:19+00:00,1738,4988,94248,False,False
3,1638601424,taika_i_titi,2013-08-01 18:41:46+00:00,404,276,5010,False,False
4,100620062,slavamakarov,2009-12-30 23:10:19+00:00,1390,1335,66032,False,False
...,...,...,...,...,...,...,...,...
49304,15506669,JeffBezos,2008-07-20 22:38:36+00:00,5593583,109,379,False,True
49305,2425151,Meta,2007-03-27 07:29:25+00:00,13907764,719,15924,False,True
49306,417214214,Quasialex,2011-11-20 17:35:47+00:00,1233,2452,21928,False,False
49307,1433822394800541701,QstnblSpdr,2021-09-03 16:01:25+00:00,90,175,2533,False,False


In [69]:
tweets_df.to_csv('./data/poland/tweets.csv', mode='a', index=False)
users_df.to_csv('./data/poland/users.csv', mode='a', index=False)