In [1]:
import pandas as pd
import tweepy
import json

In [2]:
theme = 'WorldCup'

In [3]:
with open("credentials.json") as f:
    credentials = json.load(f)

In [4]:
client = tweepy.Client(bearer_token=credentials['BEARER_TOKEN'])

In [5]:
tweets_df = pd.DataFrame()
users_df = pd.DataFrame()

In [6]:
queries = ['#QatarWorldCup2022', '#Qatar2022', '#FIFAWorldCup']

In [7]:
indices = pd.read_csv(f'./data/{theme}/tweets.csv', usecols=['id'])

In [9]:
params = {
    'max_results': 10,
    'limit': 1,
    'tweet_fields': ['id', 'text', 'public_metrics', 'author_id', 'created_at', 'lang'],
    'user_fields': ['username', 'created_at', 'public_metrics', 'protected', 'verified'],
    'expansions': ['author_id', 'entities.mentions.username'],
    'since_id': indices.max()
}

In [10]:
tweet_list = list()
users_list = list()

In [11]:
for query in queries:
    try:
        for chunk in tweepy.Paginator(client.search_recent_tweets, query=f'{query} -is:retweet', **params):
            for tweets in chunk.data:
                tweet = [query,
                         tweets.id,
                         tweets.author_id,
                         tweets.text,
                         tweets.public_metrics['like_count'],
                         tweets.public_metrics['reply_count'],
                         tweets.public_metrics['retweet_count'],
                         tweets.public_metrics['quote_count'],
                         tweets.created_at,
                         tweets.lang]

                mentions = None
                if tweets.entities is not None:
                    mentions = [mention['id'] for mention in tweets.entities['mentions']]
                tweet.append(mentions)

                tweet_list.append(tweet)

            for user in chunk.includes['users']:
                users_list.append([user.id,
                                   user.username,
                                   user.created_at,
                                   user.public_metrics['followers_count'],
                                   user.public_metrics['following_count'],
                                   user.public_metrics['tweet_count'],
                                   user.protected,
                                   user.verified])

        temp_tweets = pd.DataFrame(tweet_list,
                                   columns=['hashtag', 'id', 'author_id', 'text', 'like_count', 'reply_count',
                                            'retweet_count',
                                            'quote_count',
                                            'created_at', 'lang', 'mentions'])
        temp_users = pd.DataFrame(users_list,
                                  columns=['id', 'username', 'created_at', 'followers_count', 'following_count',
                                           'tweet_count', 'protected', 'verified'])

        tweets_df = pd.concat([tweets_df, temp_tweets])
        users_df = pd.concat([users_df, temp_users])
    except tweepy.TooManyRequests:
        print('WARNING: Tweet download stopped due to TooManyRequest exception. Wait 15 minutes...')
        break

In [12]:
tweets_df

Unnamed: 0,hashtag,id,author_id,text,like_count,reply_count,retweet_count,quote_count,created_at,lang,mentions
0,#QatarWorldCup2022,1594660681584103429,2680000970,Be our voice #sarinaesmailzadeh is not with us...,0,0,0,0,2022-11-21 11:55:08+00:00,en,
1,#QatarWorldCup2022,1594660657060020224,1289554949358202880,#BREAKING\nSo far 40 people have died in the 5...,0,0,0,0,2022-11-21 11:55:02+00:00,en,
2,#QatarWorldCup2022,1594660649769914368,334492893,"🚨 RESMI! Timnas Inggris, Jerman, Belanda, dll ...",0,0,0,0,2022-11-21 11:55:00+00:00,in,
3,#QatarWorldCup2022,1594660644565188608,2680000970,Be our voice #sarinaesmailzadeh is not with us...,0,0,0,0,2022-11-21 11:54:59+00:00,en,
4,#QatarWorldCup2022,1594660643483045888,711683478,@piersmorgan #FIFAWorldCup #QatarWorldCup2022,0,0,0,0,2022-11-21 11:54:59+00:00,qme,[216299334]
5,#QatarWorldCup2022,1594660621781540864,246769710,#Qatar2022 #QatarWorldCup2022 #Katar\n#WM2022 ...,0,0,0,0,2022-11-21 11:54:54+00:00,de,
6,#QatarWorldCup2022,1594660609207205889,2680000970,Be our voice #sarinaesmailzadeh is not with us...,0,0,0,0,2022-11-21 11:54:51+00:00,en,
7,#QatarWorldCup2022,1594660595638599682,2680000970,Be our voice #sarinaesmailzadeh is not with us...,0,0,0,0,2022-11-21 11:54:48+00:00,en,
8,#QatarWorldCup2022,1594660579104673792,1564911227926716416,صور الشاشة وتوقع نتيجة مباراة #إنجلترا 🏴󠁧󠁢󠁥󠁮󠁧󠁿...,0,0,0,0,2022-11-21 11:54:44+00:00,ar,
9,#QatarWorldCup2022,1594660555352313857,2680000970,Be our voice #sarinaesmailzadeh is not with us...,0,0,0,0,2022-11-21 11:54:38+00:00,en,


In [None]:
users_df

In [None]:
tweets_df.to_csv(f'./data/{theme}/tweets.csv', mode='a', index=False, headers=False)
users_df.to_csv(f'./data/{theme}/users.csv', mode='a', index=False, headers=False)