In [1]:
# Import the required dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Set the column width to view the statments.
pd.set_option('max_colwidth', 200)

In [3]:
# Load the dataset.
df = pd.read_csv("Combined_Data.csv", index_col="Unnamed: 0")
# Display a sample of the dataset. 
df.sample(5)

Unnamed: 0,statement,status
4921,Alhaji Wasiu Ayinde - Fuji Rapping - Side A,Normal
50196,,Bipolar
52371,"I can feel something in my lower throat or upper lungs. I’m not sick but when I am, the mucus is usually felt in the back of my throat rather than where I can feel something now. I’m at work const...",Anxiety
50919,"I haven't replied to someone for 30 weeks 🥲🥲🥲. Thankfully this person was not a close friend at all, they were an acquaintance, but God I feel so embarrassed that I never replied. What must they t...",Personality disorder
12095,"They say I need to improve my confidence. They say I need to improve my self-esteem. Honestly, to this day, I (30F) still have no clue how to do that. Sometimes I like to imagine what life could b...",Depression


In [4]:
# Check for missing values. 
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 53043 entries, 0 to 53042
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   statement  52681 non-null  object
 1   status     53043 non-null  object
dtypes: object(2)
memory usage: 1.2+ MB


In [5]:
# Null values in the statement column
df['statement'].notnull().value_counts()

statement
True     52681
False      362
Name: count, dtype: int64

In [6]:
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 52681 entries, 0 to 53042
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   statement  52681 non-null  object
 1   status     52681 non-null  object
dtypes: object(2)
memory usage: 1.2+ MB


In [7]:
#  Get the number of different statuses in the status column:
df['status'].value_counts()

status
Normal                  16343
Depression              15404
Suicidal                10652
Anxiety                  3841
Bipolar                  2777
Stress                   2587
Personality disorder     1077
Name: count, dtype: int64

In [8]:
statements = df['statement'].to_list()
statements

['oh my gosh',
 'trouble sleeping, confused mind, restless heart. All out of tune',
 'All wrong, back off dear, forward doubt. Stay in a restless and restless place',
 "I've shifted my focus to something else but I'm still worried",
 "I'm restless and restless, it's been a month now, boy. What do you mean?",
 'every break, you must be nervous, like something is wrong, but what the heck',
 'I feel scared, anxious, what can I do? And may my family or us be protected :)',
 "Have you ever felt nervous but didn't know why?",
 "I haven't slept well for 2 days, it's like I'm restless. why huh :([].",
 "I'm really worried, I want to cry.",
 "always restless every night, even though I don't know why, what's wrong. strange.",
 "I'm confused, I'm not feeling good lately. Every time I want to sleep, I always feel restless",
 'sometimes what is needed when there is a problem is to laugh until you forget that there is a problem, when you remember it, you feel restless like that well, it turns out th

Code is from https://www.geeksforgeeks.org/python-sentiment-analysis-using-vader/

In [9]:
sentiment = []
score = []
analyzer = SentimentIntensityAnalyzer()
for statement in statements:
    statement_sentiment = analyzer.polarity_scores(statement)
    if statement_sentiment['compound'] >= 0.05:
        sentiment.append("Positive")
 
    elif statement_sentiment['compound'] <= - 0.05:
        sentiment.append("Negative")
 
    else:
        sentiment.append("Neutral")
    score.append(statement_sentiment['compound'])

In [10]:
df['sentiment'] = sentiment
df['score'] = score
df.head()

Unnamed: 0,statement,status,sentiment,score
0,oh my gosh,Anxiety,Neutral,0.0
1,"trouble sleeping, confused mind, restless heart. All out of tune",Anxiety,Negative,-0.2263
2,"All wrong, back off dear, forward doubt. Stay in a restless and restless place",Anxiety,Negative,-0.7351
3,I've shifted my focus to something else but I'm still worried,Anxiety,Negative,-0.4215
4,"I'm restless and restless, it's been a month now, boy. What do you mean?",Anxiety,Negative,-0.4939


In [11]:
df.sample(10)

Unnamed: 0,statement,status,sentiment,score
28423,So I got freaked out and cancelled again. Now I’m worried they’ll find out about it later and send me to collections to damage my credit and ruin my life without me ever knowing. I hope this sound...,Stress,Negative,-0.7934
49264,What should I do?? I’m pretty sure I’m burnt out and I have been for over a month now. It’s gotten so bad to the point I CAN’T work even when I try to force myself to. I thought it’d get better wi...,Stress,Positive,0.9716
28784,He went on rant (not aggressive) about how he worked all 50 something year's for the stuff he has and how everyone want to just thrown it away. And at that point I feeling like a gave up (it being...,Stress,Negative,-0.1815
43167,want to go to easterfest,Normal,Positive,0.0772
18090,"Anybody else quit their old job, start a new one (or entrepreneurship) and find yourself less stressed but more depressed than before? The fuck! I am so frustrated with life right now. So hard to ...",Depression,Negative,-0.9829
32717,two guys got into an argument.,Normal,Negative,-0.3612
45202,limburger 00 infrastructure and improving their coverage service most of the wifi hotspot are in adelaide sa though,Normal,Positive,0.4215
29513,"If I ask her anything about her life or about my uncle who she lives with (66 stroke survivor), she answers vaguely “He’s fine” or “ask him yourself”. The unappreciative part come from the fact th...",Stress,Positive,0.228
52118,Feeling unloved and depressed Feeling like a brainless idiot who’s going nowhere in life,Anxiety,Negative,-0.7184
50164,"Trouble \n\nTrouble doesn't knock.\n\nI ask Trouble in. \n\nTrouble sits patiently in the corner\n\nand waits.\n\n​\n\nOh, Trouble. \n\n​\n\nI serve you tea. \n\nWe have a nice conversation. \n\n...",Bipolar,Negative,-0.9216


In [12]:
df['sentiment'].value_counts()

sentiment
Negative    28780
Positive    17394
Neutral      6507
Name: count, dtype: int64

## Split the data into train & test sets:

In [13]:
# Set the features variable.
X = df.drop(columns=['status'])
# Set the target variable.
y = df['status']

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

In [14]:
X_train.head()

Unnamed: 0,statement,sentiment,score
32186,"turn the radio down, please.",Positive,0.3182
4564,QUE JYP QUE??&($((#)@,Neutral,0.0
39188,i ve finally been put on med after year of pushing through my anxiety the idea of going on med wa scary but i m hoping it ll be better in the long run the only issue i m having so far is i m so ti...,Positive,0.9189
8642,It is for the information that the above drug is now out of stock in India. Please take up the matter with the P.M.O. so that the Intas company may be pressed upon to immediately start production ...,Negative,-0.5574
36663,"I GOT MY BOX!!!!!!!!!!!!! i'll pick it up tomorrow, priscilla! @gilmoregirlc",Neutral,0.0


In [15]:
y_train.head()

32186        Normal
4564         Normal
39188    Depression
8642     Depression
36663        Normal
Name: status, dtype: object