# What was being said on Twitter on the April 2022 about ENEM Brazil

#### Check ENEM 2022 pt 2 for the Word Cloud Build 

In [1]:
import pandas as pd

In [2]:
with open('twitter_tk.txt','r') as tfile:
    consumer_key = tfile.readline().strip('\n')
    consumer_secret = tfile.readline().strip('\n')
    access_token = tfile.readline().strip('\n')
    access_token_secret = tfile.readline().strip('\n')
    bearer_token = tfile.readline().strip('\n')

In [3]:
# From here I could not follow the original proejct track So I used the one shared on:
# https://towardsdatascience.com/an-extensive-guide-to-collecting-tweets-from-twitter-api-v2-for-academic-research-using-python-3-518fcb71df2a

# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
from datetime import datetime,timedelta,timezone
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [4]:
dm7 = datetime.now(timezone.utc) - timedelta(-7)
dm1 = datetime.now(timezone.utc) - timedelta(-1)
start_time = dm7.astimezone().isoformat('T')[:-21]+'00:00.000Z'
end_time = dm1.astimezone().isoformat('T')[:-21]+'00:00.000Z'

In [5]:
start_time

'2022-04-25T00:00.000Z'

In [6]:
os.environ['BEARER_TOKEN'] = bearer_token

In [7]:
def auth():
    return os.getenv('BEARER_TOKEN')

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'geo.place_id',
                    'tweet.fields': 'id,text,in_reply_to_user_id,geo,conversation_id,created_at,public_metrics,referenced_tweets',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'next_token': {}}
    return (search_url, query_params)

def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "#ENEM"
dm7 = datetime.now(timezone.utc) - timedelta(6)
dm1 = datetime.now(timezone.utc) - timedelta(1)
start_time = dm7.astimezone().isoformat('T')[:-21]+'00:00:00.000Z'
end_time = dm1.astimezone().isoformat('T')[:-21]+'00:00:00.000Z'
max_results = 100

In [8]:
def append_to_csv(json_response, fileName):

    #A counter variable
    counter = 0

    #Open OR create the target CSV file
    csvFile = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    #Loop through each tweet
    for tweet in json_response['data']:
        
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that

        # 2. Time created
        created_at = dateutil.parser.parse(tweet['created_at'])

        # 3. Geolocation
        if ('geo' in tweet):   
            geo = tweet['geo']['place_id']
        else:
            geo = " "

        # 4. Tweet ID
        tweet_id = tweet['id']

        # 6. Tweet metrics
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']

        # 8. Tweet text
        text = tweet['text']
        
        # Assemble all data in a list
        res = [created_at, geo, tweet_id, like_count, quote_count, reply_count, retweet_count, text]
        
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1

    # When done, close the CSV file
    csvFile.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter)

In [9]:
count = 0 # Counting tweets per time period
max_count = 100 # Max tweets per time period
flag = True
next_token = None
total_tweets= 0
#Check if flag is true
while flag:
# Check if max_count reached
    if count >= max_count:
        break
    print("-------------------")
    print("Token: ", next_token)
    url = create_url(keyword, start_time,end_time, max_results)
    json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
    result_count = json_response['meta']['result_count']

    if 'next_token' in json_response['meta']:
        # Save the token to use for next call
        next_token = json_response['meta']['next_token']
        print("Next Token: ", next_token)
        if result_count is not None and result_count > 0 and next_token is not None:
            append_to_csv(json_response, "data.csv")
            count += result_count
            total_tweets += result_count
            print("Total # of Tweets added: ", total_tweets)
            print("-------------------")
            time.sleep(5)                
        # If no next token exists
    else:
        if result_count is not None and result_count > 0:
            print("-------------------")
            append_to_csv(json_response, "data.csv")
            count += result_count
            total_tweets += result_count
            print("Total # of Tweets added: ", total_tweets)
            print("-------------------")
            time.sleep(5)
            
        #Since this is the final request, turn flag to false to move to the next time period.
        flag = False
        next_token = None

-------------------
Token:  None
Endpoint Response Code: 200
Next Token:  b26v89c19zqg8o3fpytmpnworpxvayrt6wvuqd9hbq3r1
# of Tweets added from this response:  100
Total # of Tweets added:  100
-------------------


In [10]:
cols = ["created_at", 'geo',"tweet_id", "like_count", "quote_count", "reply_count", "retweet_count", "text"]
enem_tw = pd.read_csv('data.csv',names = cols)

In [18]:
enem_tw.tail()

Unnamed: 0,created_at,geo,tweet_id,like_count,quote_count,reply_count,retweet_count,text
393,2022-04-15 11:42:19+00:00,,1514932125144956929,0,0,0,0,#enem a página está fora do ar?
394,2022-04-15 11:36:58+00:00,,1514930779922616326,0,0,0,0,Enem 2022: candidatos devem pedir isenção da t...
395,2022-04-15 11:26:47+00:00,,1514928218264027141,0,0,0,35,"RT @MELdicina: ENEM: O QUE É, COMO FUNCIONA, C..."
396,2022-04-15 11:04:13+00:00,,1514922538761179140,1,0,0,0,👍 on @YouTube: Compartilha com quem vai fazer ...
397,2022-04-15 11:00:03+00:00,,1514921488566022144,5,0,0,1,ATENÇÃO! O prazo de isenção de taxa para se in...


In [12]:
#Filter Out the Tweets that are just Retweets from someone else
enem_tw_wRT = enem_tw[~enem_tw['text'].str.contains("RT ")] 

In [16]:
enem_tw_wRT.shape

(282, 8)

In [6]:
#Save just the text as CSV
enem_tw_wRT['text'].to_csv('enem_tw_txt.csv')