In [None]:
!pip install pandas
!pip install requests

In [None]:
import pandas as pd
import requests
import os
import json
import time
import objectpath
from datetime import datetime, timedelta
import random
import time

In [None]:
ROOT_FOLDER = './'

In [None]:
# To set your environment variables in your terminal run the following line:
# export 'BEARER_TOKEN'='<your_bearer_token>'
bearer_token = "" # fill token here

search_url = "https://api.twitter.com/2/tweets/search/all"


def create_headers(bearer_token):
    """
    Create headers for api query
    """
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def create_params(query, start_time, end_time, next_token=None):
    """
    Create parameters for api query
    """
    # Optional params: start_time,end_time,since_id,until_id,max_results,next_token,
    # expansions,tweet.fields,media.fields,poll.fields,place.fields,user.fields
    query_params = {}
    query_params['query'] = query
    query_params['expansions'] = 'geo.place_id,author_id'
    query_params['start_time'] = start_time
    query_params['end_time'] = end_time
    query_params['user.fields'] = 'id,username,verified,public_metrics,created_at'
    query_params['tweet.fields'] = 'created_at,public_metrics'
    query_params['max_results'] = 100
    if next_token:
        query_params['next_token'] = next_token
    return query_params

def parse_response(res):
    """
    Parse the response from the Twitter API
    """
    df = pd.DataFrame()
    next_token = None
    data = res['data'] if 'data' in res else {}
    includes = res['includes'] if 'includes' in res else {}
    meta = res['meta']

    for d in data:
        dic = {'author_id': d['author_id'],'tweet_id': d['id'],'text': d['text'], 'like_count': d['public_metrics']['like_count'],
                                  'quote_count': d['public_metrics']['quote_count'], 'retweet_count': d['public_metrics']['retweet_count'],
                                  'tweet_created_at': d['created_at']}
        tree_obj = objectpath.Tree(includes)
        user_data = tree_obj.execute("$.users[@.id is '{}']".format(d['author_id']))
        entry = next(user_data)
        dic['user_created_at'] = entry['created_at']
        dic['username'] = entry['username']
        dic['following_count'] = entry['public_metrics']['following_count']
        dic['listed_count'] = entry['public_metrics']['listed_count']
        dic['tweet_count'] = entry['public_metrics']['tweet_count']
        dic['user_verified'] = entry['verified']
        df = df.append(pd.Series(dic), ignore_index=True)
    if df.shape[0] > 0:
        df.drop_duplicates('tweet_id', inplace=True)
        df.drop(df.loc[df['tweet_id'].isin(all_tweet_ids)].index, inplace=True)
        all_tweet_ids.extend(df['tweet_id'].tolist())
    if 'next_token' in meta:
        next_token = meta['next_token']
    return df, next_token

def connect_to_endpoint(url, headers, params):
    """
    Establish connection to the Twitter API
    """
    response = requests.request("GET", search_url, headers=headers, params=params)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [None]:
countries = {'AE' : 'United Arab Emirates', 'BH' : 'Bahrain', 'JO' : 'Jordan', 'LB': 'Lebanon',
            'MA' : 'Morocco',  'SA' : 'Saudi Arabia', 'TR' : 'Turkey', 'EG':'Egypt', 'IL': 'Israel'}

In [None]:
query_words = ['israel', 'gaza', 'jerusalem', 'palestine', 'palestinians', 'hamas', 'rockets', 
                        'el kuds', 'iron dome', 'israelUnderFire', 'SaveSheikhJarrah', 'gazzaUnderAttack', 
                        'israelUnderAttack', 'FreePalestine', 'Gaza_Under_Attack', 'SaveAlAqsaMosque']

In [None]:
query_params = {
    'before':{
        'start_time' : '2021-04-28T00:00:00Z',
        'end_time' : '2021-05-09T23:59:59Z',
        },    
    'during':{
        'start_time' : '2021-05-10T00:00:00Z',
        'end_time' : '2021-05-21T23:59:59Z',
        },
    'after':{
        'start_time' : '2021-05-22T00:00:00Z',
        'end_time' : '2021-06-04T23:59:59Z',
        }
}

In [None]:
all_tweet_ids = []
for (c_shortcut, c_fullname), (_, pop_ratio) in zip(countries.items(), countries_pop.items()):
    print('Country: {}'.format(c_fullname))
    
    query = '(' + ' OR '.join(query_words) + ') place_country:{}'
    query = query.format(c_shortcut)
    for period_type, period_values in query_params.items():
        print('Started \"{}\" period'.format(period_type))
        header = True
        output_file = os.path.join(ROOT_FOLDER, 'data', c_fullname + '_' + period_type + '.csv')
    
        while next_token is not None:
            next_token = None
            params = create_params(query, start_time, end_time, next_token)
            time.sleep(5)
            headers = create_headers(bearer_token)
            json_response = connect_to_endpoint(search_url, headers, params)
            df, next_token = parse_response(json_response)
            df.to_csv(output_file, index=False, header=header, mode='a')
            header = False
            
        end_time_log = time.time()
        print('Finished \"{}\" period in {} minutes'.format(period_type, str((end_time_log-start_time_log)/60)))
    print('Finished: {}'.format(c_fullname))