In [None]:
import os
import tweepy as tw
import pandas as pd
import time
import schedule

In [None]:
#define keys
consumer_key = "YOUR_CONSUMER_KEY"
consumer_secret = "YOUR_CONSUMER_SECRET"
access_token = "YOUT_ACCESS_TOKEN"
access_token_secret = "YOUR_ACCESS_TOKEN_SECRET"

#connection
#wait_on_rate_limit, it manages the amount of requests executed 
#according to the rate limit defined by the Twitter API
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [None]:
#file to save the tweets
tweets_saved = "tweets_crawled.csv"

#columns to describe what is saved
COLS = ["id", "created_at", "text", "name", "screen_name", "user_location", "tweet_place_country", "tweet_place_city"]

In [None]:
#definition of the keywords or queries
#the -filter:retweets is to avoid retweets
search_words = "(viajar AND nordeste) -filter:retweets"

In [None]:
"""
Parameters: path of the file to save the tweets

This method aims at looking for tweets given a keyword or a query. 
In this case, we want only tweets in portuguese and don't want retweets. 
The tweets components that are important to the analysis are going to be 
saved in the file: id, creation data, text, name, username, the location
informed by the user, and the location at moment the user posted the 
message (when exist).


"""

def tweets_crawler(file):
    #if the file exists, then read the existing data
    if os.path.exists(file):
        df = pd.read_csv(file, header = 0)
    else:
        df = pd.DataFrame(columns = COLS)
    
    
    #do the search on Twitter, using the keywords defined, only portuguese.
    tweets = tw.Cursor(api.search, q = search_words, lang='pt').items(10)            
    
    #get the necessary data from tweets
    for tweet in tweets:
        #new_entry append
        new_entry = []
        new_entry = [tweet.id, tweet.created_at, tweet.text, tweet.user.name, 
                             tweet.user.screen_name, tweet.user.location]
        
        #in the case the 'place' has not been informed
        if(tweet.place):
            new_entry.append(tweet.place.country)
            new_entry.append(tweet.place.name)
        else:
            tweet_place_country = None      
            tweet_place_city  = None
            new_entry.append(tweet_place_country)
            new_entry.append(tweet_place_city)
                
        single_tweet_df = pd.DataFrame([new_entry], columns = COLS)
        df = df.append(single_tweet_df, ignore_index = True)
    
    #save the tweets
    csvFile = open(file, 'a')            
    df.to_csv(csvFile, mode = 'a', columns = COLS, index = False, encoding="utf-8")                

In [None]:
def job():
    tweets_crawler(tweets_saved)

In [None]:
def main():
    #execute every friday at 8p.m.
    schedule.every().friday.at("20:00").do(job)
    
    #to keep the container running
    while True:
        schedule.run_pending()
        time.sleep(1)
    

In [None]:
if __name__ == "__main__":
    main()