In [1]:
import re
import json
import random
import pandas as pd

import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener

In [2]:
# Load Keys
keys = pd.read_csv("archivos/keys_royardo.csv",header=None)
keys = dict(zip(keys[0],keys[1]))

# Authenticate through the API
auth = tweepy.OAuthHandler(keys['api_key'], keys['api_secret_key'])
auth.set_access_token(keys['access_token'], keys['access_token_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True)

In [3]:
ciudades = pd.read_csv('archivos/co_small.csv')
ciudades

Unnamed: 0,city,lat,lng,country,iso2,admin,capital,population,population_proper
0,Bogotá,4.649178,-74.062827,Colombia,CO,Bogotá,primary,7772000.0,6333661.0
1,Medellín,6.25184,-75.563591,Colombia,CO,Antioquia,admin,3297000.0,1999979.0
2,Cali,3.437222,-76.5225,Colombia,CO,Valle del Cauca,admin,2254000.0,2178836.0
3,Barranquilla,10.963889,-74.796389,Colombia,CO,Atlántico,admin,1798000.0,1244491.0
4,Bucaramanga,7.125393,-73.119804,Colombia,CO,Santander,admin,1009000.0,571820.0
5,Cartagena,10.399722,-75.514444,Colombia,CO,Bolívar,admin,887000.0,887000.0
6,Cúcuta,7.893907,-72.507821,Colombia,CO,Norte de Santander,admin,722146.0,721398.0
7,Pereira,4.813333,-75.696111,Colombia,CO,Risaralda,admin,568750.0,440118.0
8,Santa Marta,11.240791,-74.19904,Colombia,CO,Magdalena,admin,431781.0,402641.0
9,Ibagué,4.438889,-75.232222,Colombia,CO,Tolima,admin,421685.0,408627.0


# Per city

In [4]:
all_tweets = []

for ciudad in ciudades.iterrows():
    c = ciudad[1]
    gc = f"{c.lat},{c.lng},45mi"

    tweets = tweepy.Cursor(api.search, 
                       q=['halloween -RT'], 
                       lang="es",
                       since='2020-10-24', 
                       geocode=gc,
                       tweet_mode='extended',
                       include_rts=False).items(100)
    
    
    t = []
    for tweet in tweets:
        t.append([
            tweet.full_text,
            tweet.created_at,
            c.city,
            tweet.user.location,
            tweet.place,
            tweet.coordinates,
            f"https://twitter.com/i/web/status/{tweet.id}"])
        
    all_tweets.extend(t)
    print(c.city)

Bogotá
Medellín
Cali
Barranquilla
Bucaramanga
Cartagena
Cúcuta
Pereira
Santa Marta
Ibagué
Pasto
Manizales
Villavicencio
Neiva
Armenia
Valledupar
Montería
Sincelejo
Popayán
Tunja
Ríohacha
Florencia
Quibdó
Arauca
Yopal
Leticia
San Andrés
San José del Guaviare
Mocoa
Puerto Carreño
Mitú
Inírida


In [5]:
data = pd.DataFrame(all_tweets)
data.columns = ['tweet', 'date', 'city', 'user_location', 'place', 'coordinates', 'link']
data

Unnamed: 0,tweet,date,city,user_location,place,coordinates,link
0,¿Desparchado porque mañana no podrás salir a p...,2020-10-31 00:16:27,Bogotá,"Bogotá, Colombia",,,https://twitter.com/i/web/status/1322331566929...
1,Participa en nuestro concurso de disfraces y l...,2020-10-31 00:15:34,Bogotá,Bogota,,,https://twitter.com/i/web/status/1322331341812...
2,MEGA-ESPECIAL Halloween 🎃 (Parte 53).\n\nYo ve...,2020-10-31 00:13:59,Bogotá,"Bogotá, Colombia",,,https://twitter.com/i/web/status/1322330942871...
3,Un mensaje especial para los padres de familia...,2020-10-31 00:11:46,Bogotá,Bogotá - Colombia,,,https://twitter.com/i/web/status/1322330385573...
4,Luna azul con halloween 🤪👻🤔 https://t.co/gjQhV...,2020-10-31 00:10:04,Bogotá,"bogota,Colombia",,,https://twitter.com/i/web/status/1322329960250...
...,...,...,...,...,...,...,...
2229,#lacurasoyyo cuando celebro Halloween con mi f...,2020-10-26 20:05:38,Mocoa,San Juan de Pasto,,,https://twitter.com/i/web/status/1320818893242...
2230,Este halloween #LaCuraSoyYo 👻🎃\n.\n.\n.\n@gobn...,2020-10-26 17:30:07,Mocoa,San Juan de Pasto,,,https://twitter.com/i/web/status/1320779757378...
2231,Decora su casa para Halloween con un crimen y ...,2020-10-30 09:31:58,Puerto Carreño,Isla Margarita Venezuela,,,https://twitter.com/i/web/status/1322108977778...
2232,Disfruta del mejor #horror en casa🎃\n\n #Infec...,2020-10-26 18:09:31,Puerto Carreño,"Caracas, Venezuela",Place(_api=<tweepy.api.API object at 0x7ff7792...,,https://twitter.com/i/web/status/1320789670872...


In [17]:
data[~data.coordinates.isna()].loc[7]['city']

'Bogotá'

In [15]:
data[~data.coordinates.isna()].loc[7]['coordinates']['coordinates']

[-74.0794, 4.5997]

In [6]:
data.city.value_counts()

Cúcuta                   100
Tunja                    100
Bogotá                   100
Ibagué                   100
Neiva                    100
Montería                 100
Cartagena                100
Medellín                 100
Valledupar               100
Pereira                  100
Ríohacha                 100
Santa Marta              100
Barranquilla             100
Cali                     100
Bucaramanga              100
Sincelejo                100
Manizales                100
Villavicencio            100
Armenia                  100
Popayán                   94
Pasto                     55
Yopal                     48
Arauca                    41
Florencia                 38
Mocoa                     28
San José del Guaviare     14
Quibdó                     6
Leticia                    4
San Andrés                 3
Puerto Carreño             3
Name: city, dtype: int64

In [8]:
data.shape

(2234, 7)

In [9]:
data.date.min()

Timestamp('2020-10-24 02:15:18')

In [7]:
data.to_csv("halloween.csv", index=False)