In [1]:
# 06_kfold_training_and_eval.ipynb

import pandas as pd, numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, f1_score, accuracy_score
import joblib
import os

In [2]:

# === MODIFY: dataset path ===
DATA = r"C:\Users\ASUS-PC\Desktop\mindcheck_ai_jupyter\data\aimind_cleaned.csv"

# === MODIFY: where to save model artifacts ===
OUT_VEC = r"C:\Users\ASUS-PC\Desktop\mindcheck_ai_jupyter\data\tfidf_vectorizer.pkl"
OUT_CLF = r"C:\Users\ASUS-PC\Desktop\mindcheck_ai_jupyter\data\clf.pkl"

df = pd.read_csv(DATA)
assert {"confession","label"}.issubset(df.columns), "Need 'confession' and 'label' columns"
texts = df["confession"].astype(str).tolist()
y = df["label"].astype(str).tolist()

vec = TfidfVectorizer(min_df=3, ngram_range=(1,2), max_features=200_000)
X = vec.fit_transform(texts)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs, f1s = [], []

for fold, (tr, te) in enumerate(skf.split(X, y), 1):
    clf = LinearSVC()  # try also: LogisticRegression(max_iter=2000, C=2.0)
    clf.fit(X[tr], np.array(y)[tr])
    pred = clf.predict(X[te])

    acc = accuracy_score(np.array(y)[te], pred)
    f1  = f1_score(np.array(y)[te], pred, average="weighted")
    accs.append(acc); f1s.append(f1)

    print(f"\nFold {fold} — Acc: {acc:.4f} | F1(w): {f1:.4f}")
    print(classification_report(np.array(y)[te], pred))

print(f"\n🎯 Mean Acc: {np.mean(accs):.4f} | Mean F1(w): {np.mean(f1s):.4f}")

# Train final on all data
final_clf = LinearSVC()
final_clf.fit(X, y)
joblib.dump(vec, OUT_VEC)
joblib.dump(final_clf, OUT_CLF)
print("✅ Saved:", OUT_VEC, "and", OUT_CLF)

  df = pd.read_csv(DATA)



Fold 1 — Acc: 0.7974 | F1(w): 0.7973


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                                                            precision    recall  f1-score   support

                                                                                      I haven’t felt this mentally healthy in a long time.       0.00      0.00      0.00         1
                                                         anything? What helps you focus on yourself for the purpose of improving yourself?       0.00      0.00      0.00         1
 getting so excited by something that I barge in despite him saying he wanted space earlier (obviously I get too excited to remember that)       0.00      0.00      0.00         1
                                                                                                                            if you do this       0.00      0.00      0.00         1
                                                                                                   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                                                                                                                                                                                                                              precision    recall  f1-score   support

                                                                                                                                                                                                                                    and considering I don't have a lot of practice with talking on the phone       0.00      0.00      0.00         1
 but I actually do have strong feelings (non-romantic and/or non-sexual :P) when I'm doing something I love to do. Like extreme elation over dumb stuff like when I read that a protagonist in a book I like finally overcomes the adversity they've been trying to overcome throughout the entire narrative       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                                            precision    recall  f1-score   support

                        and a thoroughly unwell man. However this does not excuse his nefarious behaviour and manipulation       0.00      0.00      0.00         1
 but it is a little odd to think that the kid would feel the cereal boxes to see how the different types of cardboard felt       0.00      0.00      0.00         1
                                                                                     my awareness levels are out of whack.       0.00      0.00      0.00         1
   once I'm in the social situation I can sort of  handle it; I just don't like it because of masking and how that drains.       0.00      0.00      0.00         1
                                                                                                                       yes       0.00      0.00      0.00         1
               

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                      precision    recall  f1-score   support

                                                 but something else.       0.00      0.00      0.00         1
                                                          motivation       0.00      0.00      0.00         1
                                   or maybe it actually was recently       0.00      0.00      0.00         1
 or validate some of those who have been affected by his ‘treatment’       0.00      0.00      0.00         1
                                                                ADHD       0.82      0.84      0.83      7391
                                                                 OCD       0.85      0.85      0.85      8442
                                                           aspergers       0.73      0.71      0.72      4604
                                                          depression       0.69      0.74      0.71      4645
         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                                                                                                                                            precision    recall  f1-score   support

                                                                                                                                                             full stop. There's too many thoughts and too much information       0.00      0.00      0.00         1
                                                                                                                                                                                    it's ""Am I really good at that though       0.00      0.00      0.00         1
                                                                                                                                                                             it's just starting to get on people's nerves. 