In [1]:
%autosave 0

Autosave disabled


# Part 1
## Get tweet data

In [2]:
# Imports

import csv
import pathlib
import sys

from tweepy import API 
from tweepy import Cursor 
from tweepy import OAuthHandler 

sys.path.insert(1, r'../src/')

from twitter_credentials import Config

In [3]:
# Authentication

auth = OAuthHandler(Config.TW_API_KEY, Config.TW_API_KEY_SECRET)
auth.set_access_token(Config.ACCESS_TOKEN, Config.ACCESS_TOKEN_KEY)

# wait_on_rate_limit, The parameter waits until Twitter allows more requests
api = API(auth, wait_on_rate_limit=True)

In [4]:
# Get tweets function

def get_tweets(keyword, num, since, path='', retweets=False, mode='print'):
    
    if not retweets:
        keyword = keyword + '-filter:retweets'
        
    
    if mode == 'print':
        # Avoid retweets using -filter:retweets in q parameter
        for tweet in Cursor(api.search, q=keyword, since=since).items(num):
                print (tweet.created_at, tweet.text, '\n')
                
    elif mode == 'save':
        with open(path / 'data.csv', 'a') as csv_file:
            csv_writer = csv.writer(csv_file)
            
            for tweet in Cursor(api.search, q=keyword, since=since).items(num):
                csv_writer.writerow([
                    tweet.id,
                    tweet.created_at,
                    tweet.text,
                    tweet.favorite_count,
                    tweet.retweet_count
                ])

In [5]:
# Simple test

# COVID-19 declared  pandemic on 11 March
get_tweets('Covid Guatemala', 10, '2020-03-11', mode='print')

2020-09-07 21:09:16 La medida será el tiempo que se considere necesario y no se fija plazo hasta que las condiciones lo hagan viable. https://t.co/LbDQb5WbcH 

2020-09-07 21:00:01 Lucha contra el COVID 😷 se contagió, se recuperó 👏🏼y sigue en la pelea en los hospitales de Guatemala 🇬🇹

En… https://t.co/WmyMpZ2e0I 

2020-09-07 20:45:52 Se disminuye la cantidad de pruebas de COVID-19 que se realizan en Guatemala - https://t.co/EmWUje20rg https://t.co/SWBx9aOvG6 

2020-09-07 20:41:24 Organizaciones aseguran que Guatemala NO está preparado para segunda ola de COVID-19 - https://t.co/1dEwu9wBpS https://t.co/LVbb9uCaKO 

2020-09-07 20:41:12 #ReporteNacional 
🔴 Con un total de 17 mil exámenes realizados en la última semana, Guatemala registró su cifra más… https://t.co/rOFGKHK33x 

2020-09-07 20:37:05 #ReporteNacional 
🔴 Guatemala registró 3.771 casos nuevos de Covid-19 la última semana. Son cerca de dos mil casos… https://t.co/DC4ycXRq2Z 

2020-09-07 20:25:45 Once this covid shit is over I’m 

In [6]:
# Get tweets using multiple keywords

keywords = ['Coronavirus Guatemala','CoronavirusGT', 'Covid19GT', 'CovidGT', 'CovidGuatemala']
num = 1000
since = '2020-03-11'
mode = 'save'

# Save tweets in "data" folder
path = pathlib.Path.cwd() / '../data/'

for kword in keywords:
    get_tweets(kword, num, since, path=path, mode=mode)

# Part 2
## Use the information collected

In [7]:
import pandas as pd
import numpy as np

import pathlib

# data folder
path = pathlib.Path.cwd() / '../data/'

df = pd.read_csv(path / 'data.csv', header=None)
df.head()

Unnamed: 0,0,1,2,3,4
0,1303080841627791360,2020-09-07 21:20:57,"De los inhumados en el Cementerio La Verbena, ...",0,1
1,1303068019963301888,2020-09-07 20:30:00,"El Gobierno de Guatemala, a través de la Secre...",4,3
2,1303064793461350400,2020-09-07 20:17:11,@_SJPeace_ And migrant workers from Mexico and...,0,0
3,1303064170729033729,2020-09-07 20:14:42,-La típica de EL PAPA FRANCISCO DICE *inserte ...,3,0
4,1303060075674730499,2020-09-07 19:58:26,*Paja del día por parte de aquellos mentirosos...,0,0
