In [1]:
import config
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from googletrans import Translator
from tweepyclient import TweepyClient

In [2]:
bearer_token = config.BEARER_TOKEN
tweepyClient = TweepyClient(bearer_token)
tweepyClient

<tweepyclient.TweepyClient at 0x1e9228c4510>

In [44]:
def format_for_csv(text):
    return text.replace(',',' ').replace("\n",' ') #Data must be cleaned for the CSV to be readed clearly 


def save_tweets(tweet_list, data=[], columns=['id', 'tweet', 'entidad', 'dominio']):
    for tweet in tweet_list:
        entity = ''
        domain = ''
        for context_annotation in tweet.context_annotations:
            entity = context_annotation['entity']['name']
            domain = context_annotation['domain']['name']
        cleaned_text = format_for_csv(tweet.text)
        data.append([tweet.id, cleaned_text, entity, domain])
    return pd.DataFrame(data, columns=columns)

def translate(word, dest='en', translator = Translator()):
    translation = translator.translate(word, dest)
    return translation.text

def get_tweets_sentiment(tweet_list, data=[], columns=['id', 'tweet', 'sentimiento', 'entidad'], sia = SentimentIntensityAnalyzer()):
    for tweet in tweet_list:
        entity = ''
        translated_text = translate(tweet.text)  #Sentiment analysis only works in english
        for context_annotation in tweet.context_annotations:
            entity = context_annotation['entity']['name']
        # SENTIMENT ONLY WORKS IN ENGLISH
        sentiment_dict = sia.polarity_scores(translated_text)
        positive = sentiment_dict["pos"]
        negative = sentiment_dict["neg"]
        bias = 'neutral'
        if (positive > negative):
            bias = 'positivo'
        elif(positive < negative):
            bias = 'negativo'
        else:
            bias = 'neutral'
        cleaned_text = format_for_csv(tweet.text)
        data.append([tweet.id, cleaned_text, bias, entity])
    # SENTIMENT ANALYSIS DATAFRAME
    return pd.DataFrame(data, columns=columns)

Pregunta #3: Sintomas más consultados en Colombia

In [32]:
query = "(sintomas OR sintoma) context:123.1220701888179359745 lang:es -is:retweet"
symptom_tweets = tweepyClient.search_tweets(query, 10000)

In [33]:
sp_tweets_df = save_tweets(symptom_tweets)

In [34]:
sp_tweets_df.to_csv('./data/symptom_tweets.csv')

Pregunta #4

In [42]:
query = "(dioxido OR azitromicina OR ivermectina) context:123.1220701888179359745 -is:retweet"
medicine_tweets = tweepyClient.search_tweets(query, 10000)

In [None]:
sp_tweets_df = get_tweets_sentiment(medicine_tweets)
sp_tweets_df

In [46]:
sp_tweets_df.to_csv('./data/4medicina_alternativa.csv')