In [1]:
import pandas as pd
import re 
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
df = pd.read_csv('data/cleaned_tweets.csv')
df

Unnamed: 0.1,unnamed: 0,post_text,clean_text,Unnamed: 0,text,created_at
0,0.0,It's just over 2 years since I was diagnosed w...,it's just over 2 years since i was diagnosed w...,,,
1,1.0,"It's Sunday, I need a break, so I'm planning t...","it's sunday, i need a break, so i'm planning t...",,,
2,2.0,Awake but tired. I need to sleep but my brain ...,awake but tired. i need to sleep but my brain ...,,,
3,3.0,RT @SewHQ: #Retro bears make perfect gifts and...,rt : retro bears make perfect gifts and are gr...,,,
4,4.0,It’s hard to say whether packing lists are mak...,it’s hard to say whether packing lists are mak...,,,
...,...,...,...,...,...,...
20095,,,that makes sense...sad...they should be the on...,20095.0,@AbnRgr504 @LangmanVince That makes sense...sa...,2025-06-10 19:44:18+00:00
20096,,,alpha will fucked us,20096.0,@stacy_muur @opensea @monad_xyz @aztecnetwork ...,2025-06-10 19:44:18+00:00
20097,,,that’s some fucked up shit right there. how mu...,20097.0,@shellshockkk That’s some fucked up shit right...,2025-06-10 19:44:18+00:00
20098,,,damn ik i look so good that a worker was hitti...,20098.0,damn ik i look so good that a worker was hitti...,2025-06-10 19:44:17+00:00


In [3]:
df.shape
df = df.drop(columns=['unnamed: 0', 'created_at'], axis=0)


In [4]:
df

Unnamed: 0.1,post_text,clean_text,Unnamed: 0,text
0,It's just over 2 years since I was diagnosed w...,it's just over 2 years since i was diagnosed w...,,
1,"It's Sunday, I need a break, so I'm planning t...","it's sunday, i need a break, so i'm planning t...",,
2,Awake but tired. I need to sleep but my brain ...,awake but tired. i need to sleep but my brain ...,,
3,RT @SewHQ: #Retro bears make perfect gifts and...,rt : bears make perfect gifts and are great f...,,
4,It’s hard to say whether packing lists are mak...,it’s hard to say whether packing lists are mak...,,
...,...,...,...,...
20095,,that makes sense...sad...they should be the on...,20095.0,@AbnRgr504 @LangmanVince That makes sense...sa...
20096,,alpha will fucked us,20096.0,@stacy_muur @opensea @monad_xyz @aztecnetwork ...
20097,,that’s some fucked up shit right there. how mu...,20097.0,@shellshockkk That’s some fucked up shit right...
20098,,damn ik i look so good that a worker was hitti...,20098.0,damn ik i look so good that a worker was hitti...


## Sentiment Analysis using Vader


In [5]:
analyzer = SentimentIntensityAnalyzer()

In [6]:
def get_sentiment(text):
  score = analyzer.polarity_scores(str(text))
  if score['compound']>=0.05:
    return 'positive'
  elif score['compound'] <= -0.05:
        return 'negative'
  else:
        return 'neutral'

In [28]:
def classify_depression_if_not_news(post_text, clean_text):
    post_text = str(post_text).strip().lower()
    clean_text = str(clean_text).strip().lower()

    news_starters = ['news:', 'breaking', 'report:', 'headline:', 'update:', 'rt:']
    if any(post_text.startswith(ns) for ns in news_starters):
        return 'undepressed'
    
    keywords = ['depressed', 'hopeless', 'suicidal', 'worthless',
                'anxious', 'lonely', 'crying', 'empty', 'dead', 'kill myself']
    
    return 'depressed' if any(word in clean_text for word in keywords) else 'undepressed'

In [29]:
df['sentiment'] = df['clean_text'].apply(get_sentiment)

In [34]:
df['mental_health'] = df.apply(lambda row: classify_depression_if_not_news(row['post_text'], row['clean_text']), axis=1)

In [35]:
df

Unnamed: 0.1,post_text,clean_text,Unnamed: 0,text,sentiment,mental_health
0,It's just over 2 years since I was diagnosed w...,it's just over 2 years since i was diagnosed w...,,,neutral,undepressed
1,"It's Sunday, I need a break, so I'm planning t...","it's sunday, i need a break, so i'm planning t...",,,neutral,undepressed
2,Awake but tired. I need to sleep but my brain ...,awake but tired. i need to sleep but my brain ...,,,negative,undepressed
3,RT @SewHQ: #Retro bears make perfect gifts and...,rt : bears make perfect gifts and are great f...,,,positive,undepressed
4,It’s hard to say whether packing lists are mak...,it’s hard to say whether packing lists are mak...,,,positive,undepressed
...,...,...,...,...,...,...
20095,,that makes sense...sad...they should be the on...,20095.0,@AbnRgr504 @LangmanVince That makes sense...sa...,positive,undepressed
20096,,alpha will fucked us,20096.0,@stacy_muur @opensea @monad_xyz @aztecnetwork ...,negative,undepressed
20097,,that’s some fucked up shit right there. how mu...,20097.0,@shellshockkk That’s some fucked up shit right...,negative,undepressed
20098,,damn ik i look so good that a worker was hitti...,20098.0,damn ik i look so good that a worker was hitti...,negative,undepressed


In [36]:
df['mental_health'].value_counts()

mental_health
undepressed    19910
depressed        190
Name: count, dtype: int64

In [27]:
depressed_df = df[df['mental_health']=='depressed']
depressed_df

Unnamed: 0.1,post_text,clean_text,Unnamed: 0,text,sentiment,mental_health
64,RT @BBCBreaking: 7 people confirmed dead after...,rt : 7 people confirmed dead after plane crash...,,,negative,depressed
188,RT @richardbranson: Why big game animals are w...,rt : why big game animals are worth more alive...,,,negative,depressed
247,Untreated mental illness leaves 2 dead: #menta...,untreated mental illness leaves 2 dead:,,,negative,depressed
478,Deadly consequences of fat shaming: #emotional...,deadly consequences of fat shaming:,,,neutral,depressed
497,"Young, black and depressed? https://t.co/nU9Xl...","young, black and depressed?",,,negative,depressed
...,...,...,...,...,...,...
20006,,schopenhauer was a depressed doomer.,20006.0,@NaokiQYamamoto Schopenhauer was a depressed d...,negative,depressed
20020,,notes/story on the art: an encounter within th...,20020.0,notes/story on the art: An encounter within th...,negative,depressed
20065,,isreal needs usa to survive...\n\nattacking ir...,20065.0,@sentdefender Isreal needs USA to survive...\n...,negative,depressed
20076,,when im not being productive i feel depressed ...,20076.0,when im not being productive i feel depressed ...,negative,depressed
