In [1]:
import pandas as pd
import requests
import json
import datetime

### Set up:

In [2]:
# define all necessary functions:
def connect_to_twitter(token):
    bearer_token = token
    return {"Authorization": "Bearer {}".format(bearer_token)}

def make_request(headers, params, url):
    url=url
    params=params
    return requests.request("GET", url, params=params, headers=headers).json()

def make_df(response):
    return pd.DataFrame(response['data'])


In [3]:
# read credentials:
creds = pd.read_csv(f'../creds/CredentialsStandAPI.csv')

# define bearer_token:
bearer_token = creds.iloc[0]['bearer_token']

In [4]:
# read twitter handles and convert to list:
twitter_handles=pd.read_excel('data/twitter_handles.xlsx').dropna(axis=0)
#twitter_handles=twitter_handles.dropna(axis=0) # remove outlets for which there is no Twitter handle
twitter_handles = twitter_handles['twitter_handle'].tolist()

In [5]:
# connect to API
headers = connect_to_twitter(bearer_token)

### Run Test with only few Outlets: Loop over List of Twitter handles

In [142]:
handles=['19thnews', 'ABC', 'TheAdvocateMag', 'AFPFactCheck', 'AJEnglish', 'aldotcom', 'AlterNet', 'theamgreatness', 'AmerIndependent']

In [109]:
# url => later I need http://api.twitter.com/2/tweets/search/all
url="http://api.twitter.com/2/tweets/search/recent"

# create empty dict:
response={}


for handle in handles:
    params={'query': f'from:{handle}', # add Twitter handle for outlets here
            'start_time': datetime.datetime.strptime('2021-10-24T00:00:00+0200', '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
            'end_time': datetime.datetime.strptime('2021-10-24T23:59:59+0200', '%Y-%m-%dT%H:%M:%S%z').isoformat(), 
            'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
            'max_results': 100} # default is 10, max possible is 100
    
    response[f'{handle}']=make_request(headers, params, url)

In [127]:
# from dictionary to dataframe:
tweets = pd.DataFrame(list(response.items()))

In [128]:
# create empty dataframe with the respective columns
all_tweets = pd.DataFrame(columns=['entities', 'text', 'author_id', 'id', 'conversation_id', 'created_at', 'attachments', 'referenced_tweets'])

# concatenate all dataframes into one large
for i in range(0, len(tweets)):
    all_tweets=pd.concat([all_tweets, make_df(tweets.iloc[i,1])])

In [141]:
all_tweets

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets
0,"{'urls': [{'start': 198, 'end': 221, 'url': 'h...","During the pandemic, 1 in 4 women in the study...",1219278784693768193,1452281979622330373,1452281979622330373,2021-10-24T14:33:00.000Z,,
0,"{'annotations': [{'start': 14, 'end': 24, 'pro...",Parade-loving New Orleans is about to get its ...,28785486,1452394057171447823,1452394057171447823,2021-10-24T21:58:22.000Z,,
1,"{'annotations': [{'start': 107, 'end': 112, 'p...","A group of about 2,000 mainly Central American...",28785486,1452388445217824769,1452388445217824769,2021-10-24T21:36:04.000Z,,
2,"{'annotations': [{'start': 0, 'end': 11, 'prob...",Donald Trump's new social media app could prov...,28785486,1452383946122092548,1452383946122092548,2021-10-24T21:18:11.000Z,,
3,"{'annotations': [{'start': 35, 'end': 41, 'pro...",PUMPKIN PARTY: Animals at a zoo in Chicago wer...,28785486,1452379412729638927,1452379412729638927,2021-10-24T21:00:10.000Z,{'media_keys': ['13_1451536997651202060']},
...,...,...,...,...,...,...,...,...
2,"{'urls': [{'start': 210, 'end': 233, 'url': 'h...",Nearly three years after the massacre at the T...,2467720274,1452276195949793287,1452276195949793287,2021-10-24T14:10:01.000Z,,
3,"{'urls': [{'start': 224, 'end': 247, 'url': 'h...",Loudoun County parents say the people who have...,2467720274,1452243485722034189,1452243485722034189,2021-10-24T12:00:03.000Z,,
4,"{'urls': [{'start': 177, 'end': 200, 'url': 'h...",The Biden administration says that increased I...,2467720274,1452093741494423555,1452093741494423555,2021-10-24T02:05:01.000Z,,
5,"{'urls': [{'start': 104, 'end': 127, 'url': 'h...","Moving forward, Bannon could face up to 12 mon...",2467720274,1452062534488444933,1452062534488444933,2021-10-24T00:01:00.000Z,,


### Retreave IDs for each outlet (by Twitter handle)

In [18]:
# new request:
# define new parameters
url="https://api.twitter.com/2/users/by"

# create empty dict:
users={}


for handle in handles:
    params={'usernames': f'{handle}'} # add Twitter handle for outlets here
    
    users[f'{handle}']=make_request(headers, params, url)

In [19]:
# dict to dataframe:
df_ids = pd.DataFrame(list(users.items()))

In [20]:
# extract user ids for each outlet from dictionary column:
user_ids=[]
for i in range(0, len(df_ids)):
    user_ids.append(df_ids.iloc[i,1]['data'][0]['id'])

In [21]:
df_ids[1]=user_ids
df_ids=df_ids.rename(columns={0: "username", 1: "author_id"})

In [22]:
df_ids

Unnamed: 0,username,author_id
0,19thnews,1219278784693768193
1,ABC,28785486
2,TheAdvocateMag,21692297
3,AFPFactCheck,1002203254065950720
4,AJEnglish,4970411
5,aldotcom,14528874
6,AlterNet,18851248
7,theamgreatness,749016639287414784
8,AmerIndependent,2467720274


### Add Twitter handle (=username) to each tweet (join via author_id)

In [158]:
# join all_tweets and df_ids to assign outlet name to each tweet:
result = pd.merge(all_tweets, df_ids, on="author_id").sort_values('username')

In [159]:
result

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets,username
0,"{'urls': [{'start': 198, 'end': 221, 'url': 'h...","During the pandemic, 1 in 4 women in the study...",1219278784693768193,1452281979622330373,1452281979622330373,2021-10-24T14:33:00.000Z,,,19thnews
68,"{'annotations': [{'start': 4, 'end': 7, 'proba...",The U.S. is welcoming tens of thousands of Afg...,28785486,1452142958124441604,1452142958124441604,2021-10-24T05:20:35.000Z,,,ABC
67,"{'urls': [{'start': 80, 'end': 103, 'url': 'ht...","As people head indoors for the winter, coronav...",28785486,1452148243362721794,1452148243362721794,2021-10-24T05:41:35.000Z,,,ABC
66,"{'annotations': [{'start': 66, 'end': 69, 'pro...","""There was a lot of pressure on me to not make...",28785486,1452152906619269125,1452152906619269125,2021-10-24T06:00:07.000Z,{'media_keys': ['13_1451608383036002314']},,ABC
64,"{'urls': [{'start': 253, 'end': 276, 'url': 'h...","""It can never be just what we saw in 2017. It'...",28785486,1452168019409620994,1452168019409620994,2021-10-24T07:00:10.000Z,{'media_keys': ['13_1451607945062584356']},,ABC
...,...,...,...,...,...,...,...,...,...
216,"{'annotations': [{'start': 0, 'end': 14, 'prob...","Douglas Dechert: ""The stark choice in the upco...",749016639287414784,1452276196293726208,1452276196293726208,2021-10-24T14:10:01.000Z,,,theamgreatness
217,"{'annotations': [{'start': 0, 'end': 16, 'prob...","Josiah Lippincott: ""Americans must stop granti...",749016639287414784,1452273714398220295,1452273714398220295,2021-10-24T14:00:10.000Z,,,theamgreatness
218,"{'annotations': [{'start': 0, 'end': 8, 'proba...","Adam Mill: ""A punk from the 1980s would not re...",749016639287414784,1452271163044999172,1452271163044999172,2021-10-24T13:50:01.000Z,,,theamgreatness
219,"{'annotations': [{'start': 0, 'end': 12, 'prob...","Roger Kimball: ""The Soviets had the gulag, we ...",749016639287414784,1452268647393681412,1452268647393681412,2021-10-24T13:40:02.000Z,,,theamgreatness


### Run Test with only few Outlets: Loop over List of Twitter handles AND start, end date

In [173]:
# define parameters:
url="http://api.twitter.com/2/tweets/search/recent"

# read time_stamps:
time_stamps=pd.read_csv('data/time_stamps.csv')
start_dates=time_stamps['start'].to_list() # all start dates to list
end_dates=time_stamps['end'].to_list() # all end dates to list

In [303]:
# define request:

# create empty dict:
responses_with_dates={}


for handle in handles:
    responses_with_dates[f'{handle}']={}
    
    for start, end in zip(start_dates, end_dates):
        params={'query': f'from:{handle}', # add Twitter handle for outlets here
                'start_time': datetime.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
                'end_time': datetime.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z').isoformat(), 
                'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
                'max_results': 100} # default is 10, max possible is 100
        
        responses_with_dates[f'{handle}'][f'{start}, {end}']=make_request(headers, params, url)



In [304]:
# dict to dataframe:
df_dates = pd.DataFrame(list(responses_with_dates.items()))

In [305]:
# concatenate all tweets to one dataframe:
all_tweets_dates=pd.DataFrame(columns=['entities', 'text', 'author_id', 'id', 'conversation_id', 'created_at', 'attachments', 'referenced_tweets'])

for i in range(0, len(df_dates)):
    for j in range(0, len(pd.DataFrame(list(df_dates.iloc[0,1].items())))):
        all_tweets_dates=pd.concat([all_tweets_dates, make_df(pd.DataFrame(list(df_dates.iloc[i,1].items())).iloc[j,1])])

In [306]:
all_tweets_dates

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets
0,"{'annotations': [{'start': 139, 'end': 143, 'p...",The court will not specifically examine the co...,1219278784693768193,1451999872710754304,1451999872710754304,2021-10-23T19:52:01.000Z,,
1,"{'annotations': [{'start': 4, 'end': 11, 'prob...",One Michigan state rep said men in the legisla...,1219278784693768193,1451999871536353285,1451999871536353285,2021-10-23T19:52:00.000Z,,
2,"{'annotations': [{'start': 122, 'end': 127, 'p...",Women make up more than half of the American p...,1219278784693768193,1451950297589460994,1451950297589460994,2021-10-23T16:35:01.000Z,,
3,"{'annotations': [{'start': 137, 'end': 140, 'p...",Women aged 40 to 65 have been hit hard by the ...,1219278784693768193,1451935949030662158,1451935949030662158,2021-10-23T15:38:00.000Z,,
4,"{'annotations': [{'start': 0, 'end': 12, 'prob...",Kamala Harris made history in 2020 as the firs...,1219278784693768193,1451746701669277696,1451746701669277696,2021-10-23T03:06:00.000Z,,
...,...,...,...,...,...,...,...,...
15,"{'annotations': [{'start': 17, 'end': 19, 'pro...",Wife of indicted GOP congressman says charges ...,2467720274,1452671526814240774,1452671526814240774,2021-10-25T16:20:55.000Z,,
16,"{'annotations': [{'start': 0, 'end': 12, 'prob...",Supreme Court to hear case on spiritual advise...,2467720274,1452646697826455552,1452646697826455552,2021-10-25T14:42:16.000Z,,
17,"{'annotations': [{'start': 0, 'end': 9, 'proba...",Republican leaders support jobless benefits — ...,2467720274,1452629865253986311,1452629865253986311,2021-10-25T13:35:23.000Z,,
18,"{'annotations': [{'start': 32, 'end': 36, 'pro...","In her campaign kickoff speech, Fiore promised...",2467720274,1452424678258221058,1452424678258221058,2021-10-25T00:00:02.000Z,,


In [309]:
# join with df_ids:
result_tweets_dates = pd.merge(all_tweets_dates, df_ids, on="author_id").sort_values(by=['username', 'created_at'])

In [310]:
result_tweets_dates

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets,username
6,"{'urls': [{'start': 199, 'end': 222, 'url': 'h...",Black women candidates could make gains — and ...,1219278784693768193,1451693099273662464,1451693099273662464,2021-10-22T23:33:00.000Z,,,19thnews
5,"{'annotations': [{'start': 0, 'end': 10, 'prob...",White House reveals nation’s first gender equi...,1219278784693768193,1451706438317924356,1451706438317924356,2021-10-23T00:26:00.000Z,,,19thnews
4,"{'annotations': [{'start': 0, 'end': 12, 'prob...",Kamala Harris made history in 2020 as the firs...,1219278784693768193,1451746701669277696,1451746701669277696,2021-10-23T03:06:00.000Z,,,19thnews
3,"{'annotations': [{'start': 137, 'end': 140, 'p...",Women aged 40 to 65 have been hit hard by the ...,1219278784693768193,1451935949030662158,1451935949030662158,2021-10-23T15:38:00.000Z,,,19thnews
2,"{'annotations': [{'start': 122, 'end': 127, 'p...",Women make up more than half of the American p...,1219278784693768193,1451950297589460994,1451950297589460994,2021-10-23T16:35:01.000Z,,,19thnews
...,...,...,...,...,...,...,...,...,...
703,"{'urls': [{'start': 256, 'end': 279, 'url': 'h...",The last 12 months saw a record total of over ...,749016639287414784,1452645197553283084,1452645197553283084,2021-10-25T14:36:18.000Z,,,theamgreatness
702,"{'urls': [{'start': 249, 'end': 272, 'url': 'h...",A series of new leaks from Big Tech giant Face...,749016639287414784,1452645525753319428,1452645525753319428,2021-10-25T14:37:36.000Z,,,theamgreatness
701,"{'urls': [{'start': 239, 'end': 262, 'url': 'h...",A group of two dozen members of Congress from ...,749016639287414784,1452645711963643913,1452645711963643913,2021-10-25T14:38:21.000Z,,,theamgreatness
700,"{'urls': [{'start': 85, 'end': 108, 'url': 'ht...",Morning Greatness: Government Spends Tax Dolla...,749016639287414784,1452645877051502592,1452645877051502592,2021-10-25T14:39:00.000Z,,,theamgreatness


### Run Test with only few Outlets: Loop over List of Twitter handles, start date and end date

actually, need combination of twitter handle, start date and end date >> idea: for handle, start, end in zip(handles, start_dates, end_dates)

In [18]:
# read twitter handles + start & end date:
data = pd.read_csv('data/outlets_time_stamps.csv')

In [19]:
# define parameters:
url="http://api.twitter.com/2/tweets/search/recent"

twitter_handles=data['handle'].tolist()
start_dates=data['start'].tolist()
end_dates=data['end'].tolist()



In [20]:
# define request:

# create empty dict:
responses_individual_dates={}


for handle, start, end in zip(twitter_handles, start_dates, end_dates):
    params={'query': f'from:{handle}', # add Twitter handle for outlets here
            'start_time': datetime.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
            'end_time': datetime.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z').isoformat(), 
            'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
            'max_results': 100} # default is 10, max possible is 100
    
    responses_individual_dates[f'{handle}']=make_request(headers, params, url)



In [21]:
# dict to dataframe:
df_ind_dates = pd.DataFrame(list(responses_individual_dates.items()))

In [22]:
df_ind_dates

Unnamed: 0,0,1
0,19thnews,"{'data': [{'id': '1453077632900292612', 'conve..."
1,ABC,{'data': [{'entities': {'urls': [{'start': 255...
2,TheAdvocateMag,"{'data': [{'id': '1453836292593111043', 'autho..."


In [39]:
# create empty dataframe with the respective columns
tweets_ind_dates = pd.DataFrame(columns=['entities', 'text', 'author_id', 'id', 'conversation_id', 'created_at', 'attachments', 'referenced_tweets'])

# concatenate all dataframes into one large
for i in range(0, len(df_ind_dates)):
    tweets_ind_dates=pd.concat([tweets_ind_dates, make_df(df_ind_dates.iloc[i,1])])

In [40]:
tweets_ind_dates

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets
0,"{'urls': [{'start': 193, 'end': 216, 'url': 'h...",Intersex advocates and parents of intersex kid...,1219278784693768193,1453077632900292612,1453077632900292612,2021-10-26T19:14:39.000Z,,
1,"{'mentions': [{'start': 3, 'end': 10, 'usernam...","RT @flpeir: Roughly 1 in 2,000 babies demonstr...",1219278784693768193,1453077216787451906,1453077216787451906,2021-10-26T19:13:00.000Z,,"[{'type': 'retweeted', 'id': '1453075273071595..."
2,"{'urls': [{'start': 237, 'end': 260, 'url': 'h...",Washington Secretary of State Kim Wyman has ac...,1219278784693768193,1453057397858217990,1453057397858217990,2021-10-26T17:54:14.000Z,,
3,"{'mentions': [{'start': 3, 'end': 15, 'usernam...",RT @bcrodriguez: Multiple senators are pushing...,1219278784693768193,1453049316348547076,1453049316348547076,2021-10-26T17:22:08.000Z,,"[{'type': 'retweeted', 'id': '1453046682300604..."
4,"{'mentions': [{'start': 3, 'end': 9, 'username...","RT @amzam: With abortion arguments looming, @A...",1219278784693768193,1453045339166953475,1453045339166953475,2021-10-26T17:06:19.000Z,,"[{'type': 'retweeted', 'id': '1453044964867313..."
...,...,...,...,...,...,...,...,...
5,"{'urls': [{'start': 114, 'end': 137, 'url': 'h...",.@CampusPride named 180 campuses across the na...,21692297,1453724738568671236,1453724738568671236,2021-10-28T14:06:01.000Z,,
6,"{'urls': [{'start': 172, 'end': 195, 'url': 'h...",Italy’s Senate has quashed a bill that would h...,21692297,1453548210455146498,1453548210455146498,2021-10-28T02:24:33.000Z,,
7,"{'urls': [{'start': 179, 'end': 202, 'url': 'h...","""Time on Two Crosses: The Collected Writings o...",21692297,1453512294831251457,1453512294831251457,2021-10-28T00:01:50.000Z,,
8,"{'urls': [{'start': 135, 'end': 158, 'url': 'h...",Dave Chappelle said he's willing to speak with...,21692297,1453497592910782467,1453497592910782467,2021-10-27T23:03:25.000Z,,


In [41]:
# join tweets_ind_dates and df_ids to assign outlet name to each tweet:
final = pd.merge(tweets_ind_dates, df_ids, on="author_id").sort_values(by=['username', 'created_at'])

### Run Test with only few Outlets: While loop over dates, If loop over handles

- one response per day to keep limit of 500 tweets per response


In [12]:
# define variables:

# 1. list of handles
handles=['19thnews', 'ABC', 'TheAdvocateMag', 'AFPFactCheck', 'AJEnglish', 'aldotcom', 'AlterNet', 'theamgreatness', 'AmerIndependent']


# 2. start and end date
start_date_obj = datetime.datetime.strptime('2021-10-28T00:00:00+0100', '%Y-%m-%dT%H:%M:%S%z')
end_date_obj = datetime.datetime.strptime('2021-10-29T00:00:00+0100', '%Y-%m-%dT%H:%M:%S%z')
cut_off = datetime.datetime.strptime('2021-11-03T00:00:00+0100', '%Y-%m-%dT%H:%M:%S%z')


# 3. url
url="http://api.twitter.com/2/tweets/search/recent"


In [13]:
# loop

# create empty dict:
test_dict={}


while start_date_obj != cut_off:
    test_dict[f'{start_date_obj}, {end_date_obj}']={}
    
    for handle in handles:
        params={'query': f'from:{handle}', # add Twitter handle for outlets here
                'start_time': start_date_obj.isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
                'end_time': end_date_obj.isoformat(), 
                'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
                'max_results': 100} # default is 10, max possible is 100
        
        test_dict[f'{start_date_obj}, {end_date_obj}'][f'{handle}']=make_request(headers, params, url)
        
    start_date_obj += datetime.timedelta(days=1)
    end_date_obj += datetime.timedelta(days=1)



In [14]:
test_dict

{'2021-10-28 00:00:00+01:00, 2021-10-29 00:00:00+01:00': {'19thnews': {'data': [{'conversation_id': '1453827691141025800',
     'id': '1453827691141025800',
     'entities': {'annotations': [{'start': 18,
        'end': 23,
        'probability': 0.8423,
        'type': 'Organization',
        'normalized_text': 'Senate'},
       {'start': 112,
        'end': 124,
        'probability': 0.6511,
        'type': 'Organization',
        'normalized_text': 'Supreme Court'},
       {'start': 128,
        'end': 136,
        'probability': 0.5726,
        'type': 'Person',
        'normalized_text': 'Elizabeth'}],
      'mentions': [{'start': 3,
        'end': 16,
        'username': 'AmandaBecker',
        'id': '233985354'}]},
     'author_id': '1219278784693768193',
     'referenced_tweets': [{'type': 'retweeted', 'id': '1453813778018144265'}],
     'text': 'RT @AmandaBecker: Senate confirms second ever woman solicitor general, who represents federal government before Supreme Court — Eliz

### Tweets Count Recent

In [6]:
url="http://api.twitter.com/2/tweets/counts/recent"

handles=['19thnews', 'ABC', 'TheAdvocateMag', 'AFPFactCheck', 'AJEnglish', 'aldotcom', 'AlterNet', 'theamgreatness', 'AmerIndependent']

#### only for one outlet

In [30]:
params={'query': 'from:19thnews', # add Twitter handle for outlets here
        'start_time': datetime.datetime.strptime('2021-10-28T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
        'end_time':datetime.datetime.strptime('2021-11-03T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(),
        'granularity': 'day'}
    
tweet_count_single=make_request(headers, params, url)


In [31]:
tweet_count_single

{'errors': [{'parameters': {'start_time': ['2021-10-28T00:00Z']},
   'message': "Invalid 'start_time':'2021-10-28T00:00Z'. 'start_time' must be on or after 2021-10-28T07:54Z"}],
 'title': 'Invalid Request',
 'detail': 'One or more parameters to your request was invalid.',
 'type': 'https://api.twitter.com/2/problems/invalid-request'}

In [54]:
make_df(tweet_count_single)

Unnamed: 0,end,start,tweet_count
0,2021-10-29T00:00:00.000Z,2021-10-28T00:00:00.000Z,23
1,2021-10-30T00:00:00.000Z,2021-10-29T00:00:00.000Z,19
2,2021-10-31T00:00:00.000Z,2021-10-30T00:00:00.000Z,7
3,2021-11-01T00:00:00.000Z,2021-10-31T00:00:00.000Z,3
4,2021-11-02T00:00:00.000Z,2021-11-01T00:00:00.000Z,24
5,2021-11-03T00:00:00.000Z,2021-11-02T00:00:00.000Z,30


#### for list of outlets

In [7]:
# loop

# define empty dict:
tweet_count={}

for handle in handles:
    params={'query': f'from:{handle}', # add Twitter handle for outlets here
            'start_time': datetime.datetime.strptime('2021-10-29T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
            'end_time':datetime.datetime.strptime('2021-11-04T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(),
            'granularity': 'day'}
    
    tweet_count[f'{handle}']=make_request(headers, params, url)



In [8]:
# dict to dataframe:
df_tweet_count = pd.DataFrame(list(tweet_count.items()))

In [9]:
df_tweet_count

Unnamed: 0,0,1
0,19thnews,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
1,ABC,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
2,TheAdvocateMag,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
3,AFPFactCheck,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
4,AJEnglish,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
5,aldotcom,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
6,AlterNet,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
7,theamgreatness,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."
8,AmerIndependent,"{'data': [{'end': '2021-10-30T00:00:00.000Z', ..."


In [10]:
for i in range(0, len(df_tweet_count)):
    df_tweet_count.iloc[i,1]=df_tweet_count.iloc[i,1]['meta']['total_tweet_count']

In [11]:
df_tweet_count

Unnamed: 0,0,1
0,19thnews,117
1,ABC,712
2,TheAdvocateMag,59
3,AFPFactCheck,21
4,AJEnglish,420
5,aldotcom,213
6,AlterNet,135
7,theamgreatness,61
8,AmerIndependent,85


In [12]:
df_tweet_count[1].sum()

1823

### Pagination: Using next_token

#### only for one outlet

In [50]:
# make simple request:

# define url:
url="http://api.twitter.com/2/tweets/search/recent"

# define params
params={'query': 'from:AJEnglish', # add Twitter handle for outlets here
        'start_time': datetime.datetime.strptime('2021-10-29T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
        'end_time': datetime.datetime.strptime('2021-11-04T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(), 
        'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
        'max_results': 100} # default is 10, max possible is 100

response=make_request(headers, params, url)
response_df=make_df(response)

while True:
    params['next_token'] = response['meta']['next_token']
    response=make_request(headers, params, url)
    response_df=response_df.append(make_df(response))
    
    if 'next_token' not in  response['meta']:
        break

#### for list of outlets >> FINAL CODE???


In [13]:
# make loop request:

# define url:
url="http://api.twitter.com/2/tweets/search/recent"

# define list of Twitter handles
handles=['19thnews', 'ABC', 'TheAdvocateMag', 'AFPFactCheck', 'AJEnglish', 'aldotcom', 'AlterNet', 'theamgreatness', 'AmerIndependent']

# create empty dict:
#response={}

for handle in handles:
    params={'query': f'from:{handle}', # add Twitter handle for outlets here
            'start_time': datetime.datetime.strptime('2021-10-29T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
            'end_time': datetime.datetime.strptime('2021-11-04T00:00:00+0000', '%Y-%m-%dT%H:%M:%S%z').isoformat(), 
            'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
            'max_results': 100} # default is 10, max possible is 100
    
    response=make_request(headers, params, url)
    response_df_outlets=make_df(response)
    
    if 'next_token' in  response['meta']:
        while 'next_token' in  response['meta']:
            params['next_token'] = response['meta']['next_token']
            response=make_request(headers, params, url)
            response_df_outlets=response_df_outlets.append(make_df(response))
            
            if 'next_token' not in  response['meta']:
                break

    response_df_outlets.to_csv(f'data/tets_loop_2/{handle}.csv', index=False)


In [23]:
# open all csv's in that folder and append to one big df:

# create empty df with columns:
all_tweets_outlets = pd.DataFrame(columns=['entities', 'text', 'author_id', 'id', 'conversation_id', 'created_at', 'attachments', 'referenced_tweets'])


for handle in handles:
    all_tweets_outlets=all_tweets_outlets.append(pd.read_csv(f'data/tets_loop_2/{handle}.csv'))

In [24]:
df_ids

Unnamed: 0,username,author_id
0,19thnews,1219278784693768193
1,ABC,28785486
2,TheAdvocateMag,21692297
3,AFPFactCheck,1002203254065950720
4,AJEnglish,4970411
5,aldotcom,14528874
6,AlterNet,18851248
7,theamgreatness,749016639287414784
8,AmerIndependent,2467720274


In [25]:
# author_id: from int to string
all_tweets_outlets['author_id']=all_tweets_outlets['author_id'].astype(str)

In [26]:
# join with df_ids:
result_tweets_outlets=pd.merge(all_tweets_outlets, df_ids, on='author_id') #.sort_values(by=['username', 'created_at'])

In [27]:
result_tweets_outlets

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets,username
0,"{'hashtags': [{'start': 212, 'end': 222, 'tag'...",Can You Cure Motherhood Burnout? By giving exh...,1219278784693768193,1456041004155424776,1456041004155424776,2021-11-03T23:30:01.000Z,,,19thnews
1,"{'annotations': [{'start': 28, 'end': 38, 'pro...",RT @bcrodriguez: What could Republicans' wins ...,1219278784693768193,1456009174928744448,1456009174928744448,2021-11-03T21:23:33.000Z,,"[{'type': 'retweeted', 'id': '1455986894890848...",19thnews
2,"{'urls': [{'start': 188, 'end': 211, 'url': 'h...",“There is no other sector of health care in wh...,1219278784693768193,1455994444575019009,1455994444575019009,2021-11-03T20:25:01.000Z,,,19thnews
3,"{'urls': [{'start': 202, 'end': 225, 'url': 'h...","Wins by Glenn Youngkin, Winsome Sears and Repu...",1219278784693768193,1455987707201695758,1455987707201695758,2021-11-03T19:58:14.000Z,,,19thnews
4,"{'mentions': [{'start': 3, 'end': 12, 'usernam...",RT @jayomiko: Congratulations @19thnews Editor...,1219278784693768193,1455986244203081729,1455986244203081729,2021-11-03T19:52:26.000Z,,"[{'type': 'retweeted', 'id': '1455984760870752...",19thnews
...,...,...,...,...,...,...,...,...,...
1818,"{'urls': [{'start': 121, 'end': 144, 'url': 'h...",Far-right extremist groups are using COVID saf...,2467720274,1454113134361391109,1454113134361391109,2021-10-29T15:49:22.000Z,,,AmerIndependent
1819,"{'urls': [{'start': 73, 'end': 96, 'url': 'htt...",RT @CAPAction: And who will pay for it? The ri...,2467720274,1454109827941154817,1454109827941154817,2021-10-29T15:36:13.000Z,{'media_keys': ['3_1454093180249518083']},"[{'type': 'retweeted', 'id': '1454093185731473...",AmerIndependent
1820,"{'urls': [{'start': 153, 'end': 176, 'url': 'h...","As noted by one expert, policymakers should no...",2467720274,1454043086464733197,1454043086464733197,2021-10-29T11:11:01.000Z,,,AmerIndependent
1821,"{'urls': [{'start': 218, 'end': 241, 'url': 'h...",Fox News: Uses images that evoke violence and ...,2467720274,1453901905659416577,1453901905659416577,2021-10-29T01:50:01.000Z,,,AmerIndependent


### Loop with Pagination + time period

In [31]:
# read handles + start & end dates:
df=pd.read_csv('data/outlets_time_stamps.csv')

handles=df['handle'].tolist()
start_dates=df['start'].tolist()
end_dates=df['end'].tolist()

#### 1. Tweet Count (optional)

In [33]:
# define url:
url="http://api.twitter.com/2/tweets/counts/recent"

In [35]:
# define empty dict:
tweet_count={}

for handle, start, end in zip(handles, start_dates, end_dates):
    params={'query': f'from:{handle}', # add Twitter handle for outlets here
            'start_time': datetime.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
            'end_time':datetime.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z').isoformat(),
            'granularity': 'day'}
    
    tweet_count[f'{handle}']=make_request(headers, params, url)



In [36]:
# dict to dataframe:
df_tweet_count = pd.DataFrame(list(tweet_count.items()))

In [37]:
for i in range(0, len(df_tweet_count)):
    df_tweet_count.iloc[i,1]=df_tweet_count.iloc[i,1]['meta']['total_tweet_count']

In [38]:
df_tweet_count

Unnamed: 0,0,1
0,19thnews,7
1,ABC,604
2,TheAdvocateMag,18


In [39]:
df_tweet_count[1].sum()

629

#### 2. Collect Tweets

In [43]:
# define url:
url="http://api.twitter.com/2/tweets/search/recent"

In [44]:
for handle, start, end in zip(handles, start_dates, end_dates):
    params={'query': f'from:{handle}', # add Twitter handle for outlets here
            'start_time': datetime.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z').isoformat(), # german time => created at 2021-10-23T22:00:00.000Z means 2021-10-24T00:00:00 GERMAN TIME 
            'end_time': datetime.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z').isoformat(), 
            'tweet.fields': 'author_id,entities,attachments,conversation_id,created_at,referenced_tweets',
            'max_results': 100} # default is 10, max possible is 100
    
    response=make_request(headers, params, url)
    response_df_outlets=make_df(response)
    
    if 'next_token' in  response['meta']:
        while 'next_token' in  response['meta']:
            params['next_token'] = response['meta']['next_token']
            response=make_request(headers, params, url)
            response_df_outlets=response_df_outlets.append(make_df(response))
            
            if 'next_token' not in  response['meta']:
                break

    response_df_outlets.to_csv(f'data/test_loop_3/{handle}.csv', index=False)

In [46]:
# create empty df with columns:
all_tweets_outlets = pd.DataFrame(columns=['entities', 'text', 'author_id', 'id', 'conversation_id', 'created_at', 'attachments', 'referenced_tweets'])


for handle in handles:
    all_tweets_outlets=all_tweets_outlets.append(pd.read_csv(f'data/test_loop_3/{handle}.csv'))

In [47]:
all_tweets_outlets

Unnamed: 0,entities,text,author_id,id,conversation_id,created_at,attachments,referenced_tweets
0,"{'urls': [{'start': 179, 'end': 202, 'url': 'h...",Texas Gov. Greg Abbott on Monday signed a bill...,1219278784693768193,1454550177918881799,1454550177918881799,2021-10-30T20:46:01.000Z,,
1,"{'urls': [{'start': 148, 'end': 171, 'url': 'h...",“This is more than a short-term problem. Being...,1219278784693768193,1454536336745107462,1454536336745107462,2021-10-30T19:51:01.000Z,,
2,"{'urls': [{'start': 124, 'end': 147, 'url': 'h...",Schools still have not done enough to stop the...,1219278784693768193,1454483737345941510,1454483737345941510,2021-10-30T16:22:00.000Z,,
3,"{'urls': [{'start': 198, 'end': 221, 'url': 'h...","“There is early evidence, in the form of long ...",1219278784693768193,1454467380684500997,1454467380684500997,2021-10-30T15:17:00.000Z,,
4,"{'urls': [{'start': 137, 'end': 160, 'url': 'h...",“Activists have been saying the same thing for...,1219278784693768193,1454300278904266758,1454300278904266758,2021-10-30T04:13:00.000Z,,
...,...,...,...,...,...,...,...,...
13,"{'annotations': [{'start': 92, 'end': 114, 'pr...",A transgender woman working as a security guar...,21692297,1454840598947565569,1454840598947565569,2021-10-31T16:00:03.000Z,,
14,"{'annotations': [{'start': 15, 'end': 34, 'pro...",A student at a Missouri high school is the fir...,21692297,1454795301500248072,1454795301500248072,2021-10-31T13:00:03.000Z,,
15,"{'urls': [{'start': 33, 'end': 56, 'url': 'htt...","Make the most of your ""me time."" https://t.co/...",21692297,1454659393685966852,1454659393685966852,2021-10-31T04:00:00.000Z,,
16,"{'mentions': [{'start': 1, 'end': 13, 'usernam...",.@itsJojoSiwa is making history this year as s...,21692297,1454614099279548418,1454614099279548418,2021-10-31T01:00:01.000Z,,


#### 3. Collet Twitter id's to all outlets

see above (line 12)

### Final code requires:
1. Collect Twitter id's to all Outlets
2. Optional: collect number of tweets (count) to verify if loop was correct
3. Run loop over Twitter handles and store one csv for each outlet
