In [2]:
# importar as bibliotecas necessárias 
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import re

In [3]:
# leitura do dataset
df = pd.read_csv("C:\\Users\\melis\\OneDrive\\Documentos\\Mental-Health-Twitter.csv")

In [4]:
# função para limpar os tweets
def limpa_texto(post_text):
    post_text = re.sub(r'http\S+', '', post_text)  # Remove URLs
    post_text = re.sub(r'@\w+', '', post_text)     # Remove menções
    post_text = re.sub(r'#', '', post_text)        # Remove hashtags
    post_text = re.sub(r'\n', ' ', post_text)      # Remove quebras de linha
    post_text = re.sub(r'[^\w\s]', '', post_text)  # Remove pontuações
    return post_text

In [5]:
# função para analise de sentimento com VADER
analyzer = SentimentIntensityAnalyzer()

def analise_sentimento_vader(post_text):
    sentimento = analyzer.polarity_scores(post_text)
    return sentimento['compound']

In [6]:
# aplica a limpeza e análise de sentimento no dataframe
df['texto_limpo'] = df['post_text'].apply(limpa_texto)
df['sentimento_VADER'] = df['texto_limpo'].apply(analise_sentimento_vader)

In [7]:
# exibe os resultados
print(df.head())

   Unnamed: 0             post_id                    post_created  \
0           0  637894677824413696  Sun Aug 30 07:48:37 +0000 2015   
1           1  637890384576778240  Sun Aug 30 07:31:33 +0000 2015   
2           2  637749345908051968  Sat Aug 29 22:11:07 +0000 2015   
3           3  637696421077123073  Sat Aug 29 18:40:49 +0000 2015   
4           4  637696327485366272  Sat Aug 29 18:40:26 +0000 2015   

                                           post_text     user_id  followers  \
0  It's just over 2 years since I was diagnosed w...  1013187241         84   
1  It's Sunday, I need a break, so I'm planning t...  1013187241         84   
2  Awake but tired. I need to sleep but my brain ...  1013187241         84   
3  RT @SewHQ: #Retro bears make perfect gifts and...  1013187241         84   
4  It’s hard to say whether packing lists are mak...  1013187241         84   

   friends  favourites  statuses  retweets  label  \
0      211         251       837         0      1   
1   

In [8]:
def converte_date(date_str):
    return pd.to_datetime(date_str, format='%a %b %d %H:%M:%S %z %Y')

# Aplicar a função de conversão de data na coluna relevante
df['post_created'] = df['post_created'].apply(converte_date)


In [9]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,post_id,post_created,post_text,user_id,followers,friends,favourites,statuses,retweets,label,texto_limpo,sentimento_VADER
0,0,637894677824413696,2015-08-30 07:48:37+00:00,It's just over 2 years since I was diagnosed w...,1013187241,84,211,251,837,0,1,Its just over 2 years since I was diagnosed wi...,-0.6597
1,1,637890384576778240,2015-08-30 07:31:33+00:00,"It's Sunday, I need a break, so I'm planning t...",1013187241,84,211,251,837,1,1,Its Sunday I need a break so Im planning to sp...,0.0
2,2,637749345908051968,2015-08-29 22:11:07+00:00,Awake but tired. I need to sleep but my brain ...,1013187241,84,211,251,837,0,1,Awake but tired I need to sleep but my brain h...,-0.5927
3,3,637696421077123073,2015-08-29 18:40:49+00:00,RT @SewHQ: #Retro bears make perfect gifts and...,1013187241,84,211,251,837,2,1,RT Retro bears make perfect gifts and are gre...,0.9042
4,4,637696327485366272,2015-08-29 18:40:26+00:00,It’s hard to say whether packing lists are mak...,1013187241,84,211,251,837,1,1,Its hard to say whether packing lists are maki...,0.1779


In [10]:
# Condições de saúde mental
condicoes_mental = {
    'depressao': [ 'depressed', 'depression'],
    'ansiedade': [ 'anxiety'],
    'transtorno_bipolar': ['bipolar', 'bipolar disorder']
}

# Função para identificar a condição de saúde mental no texto
def identifica_condicao(texto, condicoes):
    for condicao, palavras in condicoes.items():
        for palavra in palavras:
            if palavra in texto:
                return condicao
    return 'outra'

# Aplicar a função ao dataframe
df['condicao_mental'] = df['texto_limpo'].apply(lambda x: identifica_condicao(x, condicoes_mental))

In [11]:
df.head()

Unnamed: 0.1,Unnamed: 0,post_id,post_created,post_text,user_id,followers,friends,favourites,statuses,retweets,label,texto_limpo,sentimento_VADER,condicao_mental
0,0,637894677824413696,2015-08-30 07:48:37+00:00,It's just over 2 years since I was diagnosed w...,1013187241,84,211,251,837,0,1,Its just over 2 years since I was diagnosed wi...,-0.6597,depressao
1,1,637890384576778240,2015-08-30 07:31:33+00:00,"It's Sunday, I need a break, so I'm planning t...",1013187241,84,211,251,837,1,1,Its Sunday I need a break so Im planning to sp...,0.0,outra
2,2,637749345908051968,2015-08-29 22:11:07+00:00,Awake but tired. I need to sleep but my brain ...,1013187241,84,211,251,837,0,1,Awake but tired I need to sleep but my brain h...,-0.5927,outra
3,3,637696421077123073,2015-08-29 18:40:49+00:00,RT @SewHQ: #Retro bears make perfect gifts and...,1013187241,84,211,251,837,2,1,RT Retro bears make perfect gifts and are gre...,0.9042,outra
4,4,637696327485366272,2015-08-29 18:40:26+00:00,It’s hard to say whether packing lists are mak...,1013187241,84,211,251,837,1,1,Its hard to say whether packing lists are maki...,0.1779,ansiedade


In [12]:
# salva os dados processados
df.to_csv('tweets_limpos.csv', index=False)