In [1]:
import pandas as pd
import tweepy as tw
import os
import time

In [2]:
def get_tweets(keyword):
    '''This function submits a query to the twitter API to search for tweets in portuguese from users in Portugal containing
    a keyword (excluding retweets). Tweet ids, date of creation and text are stored as a dataframe and written to a csv file 
    in the data folder. 
    To avoid duplicates, a separate txt file is kept in the data folder where the most recent id is logged; every time the 
    function is used, this id is used in the query and updated in the txt file. When a query is run for a certain keyword for 
    the first time (no txt file present) it is submitted with no id limit.
    '''
    
    
    # Checking if there is already a file with the since_id for this keyword:
    
    try:
        myfile = open(f"data/{keyword}.txt", "r")
        since_id = myfile.read()
        myfile.close()
    except FileNotFoundError:
        since_id = ""
        
        
    # Authentication
    
    consumer_secret = os.getenv("CONSUMER_SECRET")
    consumer_key = os.getenv("CONSUMER_KEY")
    
    auth = tw.OAuthHandler(consumer_key, consumer_secret)
    api = tw.API(auth)
    
    
    # Query submission; full results are stored in a dataframe temporarily
    
    coords = "39.596860,-8.036780,288.85289km" # These coordinates encompass all of mainland Portugal and a chunk of Spain
    
    
    data = []
    
    if since_id:
        for tweet in tw.Cursor(api.search, 
                               q=f"{keyword} -filter:retweets", 
                               since_id=since_id,
                               lang="pt", 
                               tweet_mode="extended", 
                               geocode=coords).items(500):
            data.append(tweet)
    else:
        for tweet in tw.Cursor(api.search, 
                               q=f"{keyword} -filter:retweets",
                               lang="pt", 
                               tweet_mode="extended", 
                               geocode=coords).items(500):
            data.append(tweet)        
    
    
    # Building a dataframe with id, created_at and full_text
    
    tweet_df = pd.DataFrame(
        [[data[i]._json["id"], data[i]._json["created_at"], data[i]._json["full_text"]] for i in range(len(data))], 
        columns = ["id", "created_at", "full_text"]
    )
    
    tweet_df["created_at"] = pd.to_datetime(tweet_df["created_at"])
    
    
    # Appending all search results to the respective csv in the data folder
    
    n_new_lines = len(tweet_df)

    if n_new_lines:
        tweet_df.to_csv(
            f"data/raw_tweets_{keyword}.csv", 
            mode="a", 
            header=False, 
            index=False)
    
    print(f"Successfully wrote {n_new_lines} lines to the data/raw_tweets_{keyword}.csv")
    
    
    # Updating since_id in txt file
    if n_new_lines:
        myfile = open(f"data/{keyword}.txt", "w+")
        myfile.write(str(tweet_df.loc[0,"id"]))
        myfile.close()

In [3]:
keywords = ["feliz",
            "amor",
            "obrigado OR obrigada",
            "ótimo OR ótima",
            "parabéns",
            "fantástico OR fantástica", 
            "maravilha OR maravilhoso OR maravilhosa",
            "fml",
            "péssimo OR péssima",
            "trágico OR trágica",
            "horrível",
            "mau OR má",
            "terrível", 
            "detesto OR detestei"]

In [4]:
for keyword in keywords:
    get_tweets(keyword)

Successfully wrote 500 lines to the data/raw_tweets_feliz.csv
Successfully wrote 500 lines to the data/raw_tweets_amor.csv
Successfully wrote 500 lines to the data/raw_tweets_obrigado OR obrigada.csv
Successfully wrote 369 lines to the data/raw_tweets_ótimo OR ótima.csv
Successfully wrote 500 lines to the data/raw_tweets_parabéns.csv
Successfully wrote 273 lines to the data/raw_tweets_fantástico OR fantástica.csv
Successfully wrote 500 lines to the data/raw_tweets_maravilha OR maravilhoso OR maravilhosa.csv
Successfully wrote 22 lines to the data/raw_tweets_fml.csv
Successfully wrote 333 lines to the data/raw_tweets_péssimo OR péssima.csv
Successfully wrote 31 lines to the data/raw_tweets_trágico OR trágica.csv
Successfully wrote 500 lines to the data/raw_tweets_horrível.csv
Successfully wrote 500 lines to the data/raw_tweets_mau OR má.csv
Successfully wrote 84 lines to the data/raw_tweets_terrível.csv
Successfully wrote 152 lines to the data/raw_tweets_detesto OR detestei.csv
