In [None]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

os.environ['TOKEN'] = '' #Bearer Token

def auth():
    return os.getenv('TOKEN')

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def create_url(keyword, start_date, end_date, max_results=500):
    
    search_url = "https://api.twitter.com/2/tweets/search/all" #Full-archieve search endpoint

    #Query Parameters
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id',
                    'tweet.fields': 'id,text,created_at,entities,source',
                    'user.fields': 'id,username,location',
                    'next_token': {}}
    return (search_url, query_params)

def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def append_to_csv(json_response, fileName):
    #A counter variable
    counter = 0
    #Open OR create the target CSV file
    csvFile = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    #Loop through each tweet
    for tweet in json_response['data']:
        for element in json_response['includes']['users']:
        
        # 1. Author ID
            author_id = tweet['author_id']
            
            user_id = element['id']

            if author_id == user_id:
                
                # 2. Time created
                created_at = dateutil.parser.parse(tweet['created_at'])
                username = element['username']
                # 8. Tweet text
                text = tweet['text'].replace("\n","").replace("\r","")
                source = tweet['source']
                if ('location' in element):   
                    location = element['location']
                else:
                    location = " "
                # Assemble all data in a list
                res = [author_id, created_at, username, text, source, location]
                # Append the result to the CSV file
                csvWriter.writerow(res)
                counter += 1

    csvFile.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter) 

with open('tweets_ukraine_war_data.json', 'w') as f:
    json.dump(json_response, f)

#Inputs for tweets
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "(ukraine OR ukrainewar) lang:en -is:retweet -is:nullcast -has:links -has:mentions -has:media -has:images -is:reply"
start_list =    ['2022-02-24T00:00:00.000Z',
                 '2022-03-01T00:00:00.000Z',
                 '2022-03-06T00:00:00.000Z',
                 '2022-03-16T00:00:00.000Z',
                 '2022-03-26T00:00:00.000Z',
                 '2022-04-06T00:00:00.000Z',
                 '2022-04-16T00:00:00.000Z']

end_list =      ['2022-02-28T00:00:00.000Z',
                 '2022-03-05T00:00:00.000Z',
                 '2022-03-15T00:00:00.000Z',
                 '2022-03-25T00:00:00.000Z',
                 '2022-04-05T00:00:00.000Z',
                 '2022-04-15T00:00:00.000Z',
                 '2022-04-25T00:00:00.000Z']
max_results = 500
#Total number of tweets we collected from the loop
total_tweets = 0
# Create file
csvFile = open("tweets_ukraine_war_data.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
#Create headers 
csvWriter.writerow(['author id', 'created_at', 'username', 'tweet', 'source', 'location'])
csvFile.close()
for i in range(0,len(start_list)):
    # Inputs
    count = 0 # Counting tweets per time period
    max_count = 3000# Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
        print("-------------------")
        print("Token: ", next_token)
        url = create_url(keyword, start_list[i],end_list[i])
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        result_count = json_response['meta']['result_count']

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
            print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
                print("Start Date: ", start_list[i])
                append_to_csv(json_response, "tweets_ukraine_war_data.csv")
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
                print("-------------------")
                print("Start Date: ", start_list[i])
                append_to_csv(json_response, "tweets_ukraine_war_data.csv")
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(5)

            #Turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(5)
    print("Total number of results: ", total_tweets) 



