In [1]:
import pandas as pd
import tweepy
import json

In [2]:
theme = 'WorldCup'

In [3]:
with open("credentials.json") as f:
    credentials = json.load(f)

In [4]:
client = tweepy.Client(bearer_token=credentials['BEARER_TOKEN'])

In [5]:
tweets_df = pd.DataFrame()
users_df = pd.DataFrame()

In [6]:
queries = ['#QatarWorldCup2022', '#Qatar2022', '#FIFAWorldCup']

In [7]:
indices = pd.read_csv(f'./data/{theme}/tweets.csv', usecols=['id'])

In [8]:
params = {
    'max_results': 10,
    'limit': 1,
    'tweet_fields': ['id', 'text', 'public_metrics', 'author_id', 'created_at', 'lang'],
    'user_fields': ['username', 'created_at', 'public_metrics', 'protected', 'verified'],
    'expansions': ['author_id', 'entities.mentions.username'],
    'since_id': indices.max()
}

In [9]:
tweet_list = list()
users_list = list()

In [10]:
for query in queries:
    try:
        for chunk in tweepy.Paginator(client.search_recent_tweets, query=f'{query} -is:retweet', **params):
            for tweets in chunk.data:
                tweet = [query,
                         tweets.id,
                         tweets.author_id,
                         tweets.text,
                         tweets.public_metrics['like_count'],
                         tweets.public_metrics['reply_count'],
                         tweets.public_metrics['retweet_count'],
                         tweets.public_metrics['quote_count'],
                         tweets.created_at,
                         tweets.lang]

                mentions = None
                if tweets.entities is not None:
                    mentions = [mention['id'] for mention in tweets.entities['mentions']]
                tweet.append(mentions)

                tweet_list.append(tweet)

            for user in chunk.includes['users']:
                users_list.append([user.id,
                                   user.username,
                                   user.created_at,
                                   user.public_metrics['followers_count'],
                                   user.public_metrics['following_count'],
                                   user.public_metrics['tweet_count'],
                                   user.protected,
                                   user.verified])

        temp_tweets = pd.DataFrame(tweet_list,
                                   columns=['hashtag', 'id', 'author_id', 'text', 'like_count', 'reply_count',
                                            'retweet_count',
                                            'quote_count',
                                            'created_at', 'lang', 'mentions'])
        temp_users = pd.DataFrame(users_list,
                                  columns=['id', 'username', 'created_at', 'followers_count', 'following_count',
                                           'tweet_count', 'protected', 'verified'])

        tweets_df = pd.concat([tweets_df, temp_tweets])
        users_df = pd.concat([users_df, temp_users])
    except tweepy.TooManyRequests:
        print('WARNING: Tweet download stopped due to TooManyRequest exception. Wait 15 minutes...')
        break

In [11]:
tweets_df

Unnamed: 0,hashtag,id,author_id,text,like_count,reply_count,retweet_count,quote_count,created_at,lang,mentions
0,#QatarWorldCup2022,1594689322015674377,1486192799078916097,Juega muy Bien esta Inglaterra 🥰\n#FIFAWorldCu...,0,0,0,0,2022-11-21 13:48:56+00:00,es,
1,#QatarWorldCup2022,1594689321822433281,1579287543736340480,FIFA World Cup Qatar 2022 !! England vs Iran L...,0,0,0,0,2022-11-21 13:48:56+00:00,en,
2,#QatarWorldCup2022,1594689321797357568,1578295605243383809,@footballdaily @3LionsOnMaShirt \n\nSay her na...,0,0,0,0,2022-11-21 13:48:56+00:00,en,"[278038673, 1571546683556626438]"
3,#QatarWorldCup2022,1594689321650786304,1552710950205136896,"Que time horrível, esse do Iran kkkkk\n\n#Copa...",0,0,0,0,2022-11-21 13:48:56+00:00,pt,
4,#QatarWorldCup2022,1594689321638178816,1432619663788290049,"Le temps additionnel, c'est une mi-temps\n #an...",0,0,0,0,2022-11-21 13:48:56+00:00,fr,
5,#QatarWorldCup2022,1594689321105346560,520970595,Ojo con Inglaterra que hoy puede meter al meno...,0,0,0,0,2022-11-21 13:48:56+00:00,es,
6,#QatarWorldCup2022,1594689320685867012,1537404126145007617,England vs Iran (Live) \n...,0,0,0,0,2022-11-21 13:48:56+00:00,en,
7,#QatarWorldCup2022,1594689319964659712,1141762709274472454,Complete dominance by the three lions ... #ENG...,0,0,0,0,2022-11-21 13:48:56+00:00,en,
8,#QatarWorldCup2022,1594689319880781825,1588175502891941890,⚽ Watch Qatar FIFA World Cup 2022 England vs I...,0,0,0,0,2022-11-21 13:48:56+00:00,en,
9,#QatarWorldCup2022,1594689319834361858,1437092812470771718,سورئال‌ترین لحظات فوتبال ایران رو شاهد هستیم. ...,0,0,0,0,2022-11-21 13:48:56+00:00,fa,


In [12]:
users_df

Unnamed: 0,id,username,created_at,followers_count,following_count,tweet_count,protected,verified
0,1486192799078916097,OscarRu31381192,2022-01-26 04:22:48+00:00,0,27,20,False,False
1,1579287543736340480,LiverpoolSport1,2022-10-10 01:48:07+00:00,1,12,85,False,False
2,1578295605243383809,ShireenMKMA1,2022-10-07 08:07:44+00:00,124,814,1734,False,False
3,278038673,footballdaily,2011-04-06 13:48:45+00:00,563021,1234,85930,False,True
4,1571546683556626438,3LionsOnMaShirt,2022-09-18 17:08:28+00:00,1119,1558,511,False,False
...,...,...,...,...,...,...,...,...
30,968631815798968321,NickGoldingF1E,2018-02-27 23:39:55+00:00,241,537,2746,False,False
31,1021482631735791618,PjstudioD,2018-07-23 19:50:12+00:00,11,62,748,False,False
32,4408004861,JorgeRubio99,2015-12-07 19:39:40+00:00,607,1338,9823,False,False
33,871317248715087872,doramanine,2017-06-04 10:46:35+00:00,974,845,33336,False,False


In [13]:
tweets_df.to_csv(f'./data/{theme}/tweets.csv', mode='a', index=False, header=False)
users_df.to_csv(f'./data/{theme}/users.csv', mode='a', index=False, header=False)