In [1]:
import pandas as pd
import tweepy
import json

In [2]:
with open("credentials.json") as f:
    credentials = json.load(f)

In [3]:
client = tweepy.Client(bearer_token=credentials['BEARER_TOKEN'])

In [4]:
tweets_df = pd.DataFrame()
users_df = pd.DataFrame()

In [5]:
# queries = ['#Polish', 'polish', '#Poland', 'Poland', '#NATO', 'NATO', 'Przewodów', '#WWIII']
queries = ['#Trump', 'Trump']

In [6]:
indices = pd.read_csv(f'./data/Trump/tweets.csv', usecols=['id'])

In [7]:
params = {
    'max_results': 100,
    'limit': 50,
    'tweet_fields': ['id', 'text', 'public_metrics', 'author_id', 'created_at', 'lang'],
    'user_fields': ['username', 'created_at', 'public_metrics', 'protected', 'verified'],
    'expansions': ['author_id', 'entities.mentions.username'],
    'since_id': indices.max()
}

In [8]:
tweet_list = list()
users_list = list()

In [9]:
for query in queries:
    try:
        for chunk in tweepy.Paginator(client.search_recent_tweets, query=f'{query} -is:retweet', **params):
            for tweets in chunk.data:
                tweet = [query,
                         tweets.id,
                         tweets.author_id,
                         tweets.text,
                         tweets.public_metrics['like_count'],
                         tweets.public_metrics['reply_count'],
                         tweets.public_metrics['retweet_count'],
                         tweets.public_metrics['quote_count'],
                         tweets.created_at,
                         tweets.lang]

                mentions = None
                if tweets.entities is not None:
                    mentions = [mention['id'] for mention in tweets.entities['mentions']]
                tweet.append(mentions)

                tweet_list.append(tweet)

            for user in chunk.includes['users']:
                users_list.append([user.id,
                                   user.username,
                                   user.created_at,
                                   user.public_metrics['followers_count'],
                                   user.public_metrics['following_count'],
                                   user.public_metrics['tweet_count'],
                                   user.protected,
                                   user.verified])

        temp_tweets = pd.DataFrame(tweet_list,
                                   columns=['hashtag', 'id', 'author_id', 'text', 'like_count', 'reply_count',
                                            'retweet_count',
                                            'quote_count',
                                            'created_at', 'lang', 'mentions'])
        temp_users = pd.DataFrame(users_list,
                                  columns=['id', 'username', 'created_at', 'followers_count', 'following_count',
                                           'tweet_count', 'protected', 'verified'])

        tweets_df = pd.concat([tweets_df, temp_tweets])
        users_df = pd.concat([users_df, temp_users])
    except tweepy.TooManyRequests:
        print('WARNING: Tweet download stopped due to TooManyRequest exception. Wait 15 minutes...')
        break

In [10]:
tweets_df

Unnamed: 0,hashtag,id,author_id,text,like_count,reply_count,retweet_count,quote_count,created_at,lang,mentions
0,#Trump,1596455049307508736,1518646783508979716,#cbs #trump https://t.co/aiIrISHUjq,0,0,0,0,2022-11-26 10:45:19+00:00,qht,
1,#Trump,1596455043645030400,1596402338566135809,@BNB_Tracker 🇺🇸🇺🇸🇺🇸STEALTH LAUNCHING TRUMPLON ...,0,0,0,0,2022-11-26 10:45:17+00:00,en,[1546364668503298048]
2,#Trump,1596455024766627840,1593917973055561729,@TAG24 🚨STEALTH LAUNCHED TRUMPLON ON #BSC🚨\n\n...,0,0,0,0,2022-11-26 10:45:13+00:00,en,[781788488022777856]
3,#Trump,1596455017233666048,1596402338566135809,@TimesTabloid1 🇺🇸🇺🇸🇺🇸STEALTH LAUNCHING TRUMPLO...,0,0,0,0,2022-11-26 10:45:11+00:00,en,[1249009357737668610]
4,#Trump,1596455012170866689,1385036460735279113,"TRUMP LIE #26959\n\n""We have massive trade def...",0,0,0,0,2022-11-26 10:45:10+00:00,en,
...,...,...,...,...,...,...,...,...,...,...,...
9991,Trump,1596436096711524352,1464841632096919552,Watch ANGRY Rand Paul BEGS Trump back 'second ...,0,0,0,0,2022-11-26 09:30:00+00:00,en,[10228272]
9992,Trump,1596436095629234178,14592411,The U.S. is banning the sale of communications...,1,0,1,0,2022-11-26 09:30:00+00:00,en,
9993,Trump,1596436095528378371,15724740,The U.S. is banning the sale of communications...,0,0,0,0,2022-11-26 09:30:00+00:00,en,
9994,Trump,1596436094643589122,1430601209384259591,@EdweardIX @RealityAwake @JackPosobiec The vil...,1,0,0,0,2022-11-26 09:29:59+00:00,en,"[69072504, 1586281540555886592, 592730371]"


In [11]:
users_df

Unnamed: 0,id,username,created_at,followers_count,following_count,tweet_count,protected,verified
0,1518646783508979716,salomonsen2,2022-04-25 17:43:13+00:00,16,136,1658,False,False
1,1596402338566135809,quantumshiller,2022-11-26 07:16:18+00:00,0,5,79,False,False
2,1546364668503298048,BNB_Tracker,2022-07-11 05:24:30+00:00,309,5,86753,False,False
3,1593917973055561729,NanaHaban,2022-11-19 10:43:59+00:00,3,1,3455,False,False
4,781788488022777856,TAG24,2016-09-30 09:31:15+00:00,8641,193,71598,False,False
...,...,...,...,...,...,...,...,...
17047,15724740,iowasnewsnow,2008-08-04 17:11:43+00:00,23863,1187,159397,False,True
17048,1430601209384259591,user599999,2021-08-25 18:43:59+00:00,19,0,4706,False,False
17049,69072504,EdweardIX,2009-08-26 19:34:55+00:00,3321,3943,27105,False,False
17050,1586281540555886592,RealityAwake,2022-10-29 09:00:32+00:00,311,515,979,False,False


In [12]:
tweets_df.to_csv('./data/Trump/tweets.csv', mode='a', index=False, header=False)
users_df.to_csv('./data/Trump/users.csv', mode='a', index=False, header=False)