In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.pipeline import Pipeline

In [13]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 1000)

In [2]:
df = pd.read_csv("../../data/Mental-Health-Twitter.csv")

In [4]:
useful_columns = ['post_text', 'label']

df = df[useful_columns].copy()

In [15]:
df.head(50)

Unnamed: 0,post_text,label
0,It's just over 2 years since I was diagnosed with #anxiety and #depression. Today I'm taking a moment to reflect on how far I've come since.,1
1,"It's Sunday, I need a break, so I'm planning to spend as little time as possible on the #A14...",1
2,Awake but tired. I need to sleep but my brain has other ideas...,1
3,RT @SewHQ: #Retro bears make perfect gifts and are great for beginners too! Get stitching with October's Sew on sale NOW! #yay http://t.co/…,1
4,It’s hard to say whether packing lists are making life easier or just reinforcing how much still needs doing... #movinghouse #anxiety,1
5,Making packing lists is my new hobby... #movinghouse,1
6,At what point does keeping stuff for nostalgic reasons cross the line into plain old hoarding...? #movinghouse,1
7,Currently in the finding-boxes-of-random-shit packing phase. I think I’m a closet hoarder...,1
8,"Can't be bothered to cook, take away on the way 😁👍🏼 #lazy",1
9,RT @itventsnews: ITV releases promo video for the final series of Downton Abbey http://t.co/mC8Ive72zR http://t.co/CFBKrRvJIs,1


In [16]:
X_train, X_test, y_train, y_test = train_test_split(df['post_text'], df['label'], test_size=0.25, random_state=42)

In [17]:
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('svm', SVC(kernel='rbf'))  # Linear kernel for now, likely going to try RBF/Quadratic
])

In [18]:
pipeline.fit(X_train, y_train)

In [19]:
y_pred = pipeline.predict(X_test)

In [20]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label=1)
recall = recall_score(y_test, y_pred, pos_label=1)
f1 = f1_score(y_test, y_pred, pos_label=1)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 0.8754
Precision: 0.8770329234430781
Recall: 0.8759904912836767
F1 Score: 0.8765113974231913
