In [1]:
import pandas as pd
import tweepy
import json

In [2]:
with open("credentials.json") as f:
    credentials = json.load(f)

In [3]:
client = tweepy.Client(bearer_token=credentials['BEARER_TOKEN'])

In [4]:
tweets_df = pd.DataFrame()
users_df = pd.DataFrame()

In [5]:
queries = ['#Qatar2022']

In [6]:
params = {
    'max_results': 100,
    'limit': 30,
    'tweet_fields': ['id', 'text', 'public_metrics', 'author_id', 'created_at', 'lang'],
    'user_fields': ['username', 'created_at', 'public_metrics', 'protected', 'verified'],
    'expansions': ['author_id', 'entities.mentions.username']
}

In [7]:
tweet_list = list()
users_list = list()

In [8]:
for query in queries:
    for chunk in tweepy.Paginator(client.search_recent_tweets, query=f'{query} -is:retweet', **params):
        for tweets in chunk.data:
            tweet = [query,
                     tweets.id,
                     tweets.author_id,
                     tweets.text,
                     tweets.public_metrics['like_count'],
                     tweets.public_metrics['reply_count'],
                     tweets.public_metrics['retweet_count'],
                     tweets.public_metrics['quote_count'],
                     tweets.created_at,
                     tweets.lang]

            mentions = None
            if tweets.entities is not None:
                mentions = [mention['id'] for mention in tweets.entities['mentions']]
            tweet.append(mentions)

            tweet_list.append(tweet)

        for user in chunk.includes['users']:
            users_list.append([user.id,
                               user.username,
                               user.created_at,
                               user.public_metrics['followers_count'],
                               user.public_metrics['following_count'],
                               user.public_metrics['tweet_count'],
                               user.protected,
                               user.verified])

    temp_tweets = pd.DataFrame(tweet_list,
                               columns=['hashtag', 'id', 'author_id', 'text', 'like_count', 'reply_count',
                                        'retweet_count',
                                        'quote_count',
                                        'created_at', 'lang', 'mentions'])
    temp_users = pd.DataFrame(users_list,
                              columns=['id', 'username', 'created_at', 'followers_count', 'following_count',
                                       'tweet_count', 'protected', 'verified'])

    tweets_df = pd.concat([tweets_df, temp_tweets])
    users_df = pd.concat([users_df, temp_users])

In [9]:
tweets_df

Unnamed: 0,hashtag,id,author_id,text,like_count,reply_count,retweet_count,quote_count,created_at,lang,mentions
0,#Qatar2022,1592601212511133696,3254197047,Uffà...ma quando finiscono i mondiali di #Qata...,0,0,0,0,2022-11-15 19:31:32+00:00,it,
1,#Qatar2022,1592601205595070464,1441282471400673280,Η εντυπωσιακή σύμπτωση με τα «αστέρια» της Παρ...,0,0,0,0,2022-11-15 19:31:31+00:00,el,
2,#Qatar2022,1592601191761838081,1591065849531604994,Massage and sex in Qatar❤️\nMassage body to bo...,0,0,0,0,2022-11-15 19:31:27+00:00,en,
3,#Qatar2022,1592601170014412800,2901835883,#Qatar2022 | NKUNKU ENCIENDE LAS ALARMAS 🚨\n\n...,0,0,0,0,2022-11-15 19:31:22+00:00,es,
4,#Qatar2022,1592601169922523136,1456620452261072896,"Ils étaient en 2018 en Russie, ils sont retenu...",0,0,0,0,2022-11-15 19:31:22+00:00,fr,
...,...,...,...,...,...,...,...,...,...,...,...
95,#Qatar2022,1592600069244522496,186607467,Gianni Infantino presidente de la FIFA pidió e...,0,0,0,0,2022-11-15 19:27:00+00:00,es,[786249795014844416]
96,#Qatar2022,1592600045530288128,4704502941,#Qatar2022 World Cup leaves migrant workers de...,0,0,0,0,2022-11-15 19:26:54+00:00,en,
97,#Qatar2022,1592600043483455489,123372110,🌈Me gustaría que si @shakira actúa al final en...,0,0,0,0,2022-11-15 19:26:54+00:00,es,"[44409004, 138372303, 140070953]"
98,#Qatar2022,1592600042443264000,1128351207704207360,@deporteatc #Qatar2022 Alemania,0,0,0,0,2022-11-15 19:26:53+00:00,es,[1306298260823388161]


In [14]:
users_df

Unnamed: 0,id,username,created_at,followers_count,following_count,tweet_count,protected,verified
0,3254197047,PalloneBucato,2015-05-14 16:48:40+00:00,558,610,23651,False,False
1,1441282471400673280,BNsportsGr,2021-09-24 06:05:17+00:00,1260,142,19825,False,False
2,1591065849531604994,mistresslily85,2022-11-11 13:50:48+00:00,0,5,7,False,False
3,2901835883,lacasacatv,2014-12-02 04:01:41+00:00,1991,445,7163,False,False
4,1456620452261072896,XavBarretFoot,2021-11-05 13:54:59+00:00,631,205,624,False,False
...,...,...,...,...,...,...,...,...
175,44409004,shakira,2009-06-03 17:38:07+00:00,53312206,233,7961,False,True
176,140070953,FIFAcom,2010-05-04 13:48:36+00:00,15673346,267,90459,False,True
177,1128351207704207360,LolaPer19200564,2019-05-14 17:27:46+00:00,199,4792,36462,False,False
178,1306298260823388161,deporteatc,2020-09-16 18:26:03+00:00,912,152,2065,False,False


In [15]:
tweets_df.to_csv('./data/tweets.csv', mode='a', index=False)
users_df.to_csv('./data/users.csv', mode='a', index=False)