In [25]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

# convert df to a list of text document
# depression
depress_df = pd.read_csv("dataset/depression.csv")
depression_quotes_lst = depress_df["post"].tolist()

# normal
tweets_df = pd.read_csv("dataset/Tweets.csv")
# remove negative sentiment
positive_df = tweets_df[tweets_df["sentiment"].str.contains("negative") == False]
normal_quotes_lst = positive_df["text"].values.astype('U').tolist()

# combine the two lists
final_list = depression_quotes_lst + normal_quotes_lst

# encode text data
vectorizer = TfidfVectorizer()
vectorizer.fit(final_list)
vector = vectorizer.transform(final_list)

# depression shape (33554, 41111)
# normal shape (19700, 16101)
depression_target = [1] * len(depression_quotes_lst)
normal_target = [0] * len(normal_quotes_lst)

X = vector
y = np.asarray(depression_target + normal_target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21, stratify = y)

print(X.shape)
print(y_train)

clf = SVC().fit(X_train, y_train)

preds = clf.predict(X_test)

print(preds)

accuracy = np.mean(preds == y_test)
print(f'Accuracy on test data is {accuracy*100}%.')


(53254, 51827)
[0 1 0 ... 0 1 0]
[0 1 0 ... 1 0 0]
Accuracy on test data is 98.30381172936096%.


In [29]:
new_quote = ["""
I’m 25F and I have around 25k of debt and I’m afraid I might have to drop school again because of money issues and I can’t do driving classes or even buying a car because of money issues. I also may have to move or find a roomate and so all my savings will go towards that. I see 30s year old people out of the blue having houses, condos, a car and all that grown up stuff and I’m like how did you do it ? People tell me just invest but where do I even start ? There are so many ways and so many financial institutions to invest in ? I’m from Canada and heard about the " Bons de trésor" something like that ? Knowledge is power and I will obviously improve my financial literacy but I feel like it might be too late. For context, I don’t have a single soul around me and had to cut contact with my family, also even I was in contact with them even asking for $1 is too much for my parents/siblings. My credit score is currently 625. It’s 100% my fault and I should have take care of my emotional needs instead of emotional eating and fucking up my financial situation/credit. Help. Anything. How did you get out of financial insecurity?
"""]
new_vector = vectorizer.transform(new_quote)
new_preds = clf.predict(new_vector)
print(new_preds)


[1]
