In [1]:
# %%
import sys
!{sys.executable} -m pip install --quiet sentence-transformers transformers datasets textblob vaderSentiment imbalanced-learn xgboost tqdm joblib

# %%
import os, glob, re, joblib
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

from sentence_transformers import SentenceTransformer
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix
from xgboost import XGBClassifier
import torch


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shris\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\shris\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\shris\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\shris\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
from huggingface_hub import login
login()  


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [3]:
# %%
BASE_DATA_DIR = r"D:/depressiondetector/diag-woz"
train_labels_path = r"D:/depressiondetector/labels/train_split.csv"
dev_labels_path   = r"D:/depressiondetector/labels/dev_split.csv"
test_labels_path  = r"D:/depressiondetector/labels/test_split.csv"

def normalize_label(df):
    rename_map = {
        'PHQ_Binary': 'PHQ8_Binary',
        'phq8_binary': 'PHQ8_Binary',
        'PHQ8_binary': 'PHQ8_Binary'
    }
    df.rename(columns={c: rename_map[c] for c in rename_map if c in df.columns}, inplace=True)
    return df

train_labels = normalize_label(pd.read_csv(train_labels_path))
dev_labels   = normalize_label(pd.read_csv(dev_labels_path))
test_labels  = normalize_label(pd.read_csv(test_labels_path))

for df in [train_labels, dev_labels, test_labels]:
    df['Participant_ID'] = df['Participant_ID'].astype(str)

print(f"Train: {len(train_labels)}, Dev: {len(dev_labels)}, Test: {len(test_labels)}")


Train: 163, Dev: 56, Test: 56


In [4]:
# %%
def load_transcript_csv(pid, base_dir=BASE_DATA_DIR):
    pid = str(pid)
    folder = os.path.join(base_dir, f"{pid}_P")
    csv_files = glob.glob(os.path.join(folder, "**", "*.csv"), recursive=True)
    if not csv_files:
        return ""
    
    transcript_files = [f for f in csv_files if 'transcript' in os.path.basename(f).lower()]
    best = transcript_files[0] if transcript_files else sorted(csv_files, key=lambda x: os.path.getsize(x), reverse=True)[0]
    df = pd.read_csv(best)
    
    text_cols = [c for c in df.columns if 'text' in c.lower()]
    if not text_cols:
        return ""
    
    text_col = text_cols[0]
    if "Speaker" in df.columns:
        df = df[df["Speaker"].astype(str).str.lower().isin(["participant","user","patient","client"])]
    return " ".join(df[text_col].astype(str).tolist())

# %%
all_ids = sorted(set(train_labels['Participant_ID'].tolist() + 
                     dev_labels['Participant_ID'].tolist() + 
                     test_labels['Participant_ID'].tolist()))

rows = []
for pid in tqdm(all_ids, desc="Loading transcripts"):
    rows.append({"Participant_ID": pid, "raw_text": load_transcript_csv(pid)})

df_text = pd.DataFrame(rows)
print(f"Loaded {len(df_text)} transcripts")


Loading transcripts:   0%|          | 0/275 [00:00<?, ?it/s]

Loaded 275 transcripts


In [5]:
# %%
def clean(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\.\S+", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

df_text["clean_text"] = df_text["raw_text"].apply(clean)


In [7]:
a = df_text[df_text["Participant_ID"]=="308"]["clean_text"]
print(len(a))

1


In [8]:
df_text.head()

Unnamed: 0,Participant_ID,raw_text,clean_text
0,300,so I'm going to interview in Spanish okay g...,so i'm going to interview in spanish okay good...
1,301,yeah there's also on Craigslist so that's why ...,yeah there's also on craigslist so that's why ...
2,302,just move around a little bit when you're fin...,just move around a little bit when you're fini...
3,303,wow okay when you're finished when she's done...,wow okay when you're finished when she's done ...
4,304,so we'll just move around a little bit tonight...,so we'll just move around a little bit tonight...


In [9]:
# %%
print("Extracting linguistic features...")
vader = SentimentIntensityAnalyzer()

def extract_linguistic_features(text):
    words = text.lower().split()
    word_count = len(words)
    if word_count == 0:
        return [0] * 18  # Changed from 15 to 18

    first_person = ['i', 'me', 'my', 'mine', 'myself']
    neg_words = ['sad','depressed','lonely','hopeless','worthless','tired','empty','numb',
                 'anxious','worried','scared','pain','hurt','alone','crying','awful','terrible',
                 'miserable','helpless','useless','broken']
    absolutist = ['always','never','nothing','everything','completely','totally','absolutely','entire','all','every','none']
    
    # ‚ö†Ô∏è CRITICAL: Suicide/Self-Harm Keywords
    suicide_words = ['suicide','suicidal','kill myself','end my life','die','death','harm myself',
                     'cut myself','overdose','jump','hang myself','gun','pills','razor']

    first_person_ratio = sum(1 for w in words if w in first_person) / word_count
    neg_count = sum(1 for w in words if w in neg_words)
    neg_ratio = neg_count / word_count
    absolutist_count = sum(1 for w in words if w in absolutist)
    absolutist_ratio = absolutist_count / word_count
    
    # **NEW: Suicide detection**
    text_lower = text.lower()
    suicide_count = sum(1 for sw in suicide_words if sw in text_lower)
    suicide_flag = 1 if suicide_count > 0 else 0

    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity
    vader_scores = vader.polarity_scores(text)

    lexical_diversity = len(set(words)) / word_count if word_count > 0 else 0
    avg_word_len = np.mean([len(w) for w in words]) if words else 0

    return [
        word_count, first_person_ratio, neg_count, neg_ratio, absolutist_count, absolutist_ratio,
        polarity, subjectivity, vader_scores['neg'], vader_scores['neu'], vader_scores['pos'],
        vader_scores['compound'], lexical_diversity, avg_word_len, text.count('?'),
        suicide_count, suicide_flag, text_lower.count('!')  # NEW FEATURES
    ]

ling_features = df_text['clean_text'].apply(extract_linguistic_features)
ling_df = pd.DataFrame(ling_features.tolist(), columns=[
    'word_count','first_person_ratio','neg_word_count','neg_ratio','absolutist_count','absolutist_ratio',
    'polarity','subjectivity','vader_neg','vader_neu','vader_pos','vader_compound','lexical_diversity',
    'avg_word_len','question_count',
    'suicide_count','suicide_flag','exclamation_count'  # NEW
])


Extracting linguistic features...


In [11]:
ling_df.head()

Unnamed: 0,word_count,first_person_ratio,neg_word_count,neg_ratio,absolutist_count,absolutist_ratio,polarity,subjectivity,vader_neg,vader_neu,vader_pos,vader_compound,lexical_diversity,avg_word_len,question_count,suicide_count,suicide_flag,exclamation_count
0,322,0.096273,0,0.0,5,0.015528,0.240335,0.512849,0.056,0.62,0.324,0.9989,0.555901,4.068323,0,0,0,0
1,1399,0.096497,1,0.000715,17,0.012152,0.119045,0.604685,0.075,0.74,0.184,0.9996,0.268763,3.961401,0,0,0,0
2,609,0.054187,1,0.001642,5,0.00821,0.167731,0.521223,0.058,0.7,0.243,0.9994,0.481117,4.09688,0,0,0,0
3,1916,0.061065,5,0.00261,9,0.004697,0.218301,0.544658,0.045,0.742,0.213,0.9999,0.241649,3.914927,0,1,1,0
4,992,0.082661,1,0.001008,6,0.006048,0.174526,0.525062,0.069,0.744,0.187,0.999,0.363911,4.021169,0,0,0,0


In [12]:
print("Loading Mental Health BERT model...")
MODEL_NAME = "mental/mental-bert-base-uncased"  # After you get access


sbert = SentenceTransformer(MODEL_NAME)

embeddings = sbert.encode(
    df_text["clean_text"].tolist(),
    batch_size=16,
    show_progress_bar=True
)

emb_dim = embeddings.shape[1]
emb_cols = [f"emb_{i}" for i in range(emb_dim)]

df_emb = pd.DataFrame(embeddings, columns=emb_cols)
df_emb["Participant_ID"] = df_text["Participant_ID"]

# Merge embeddings + linguistic features
df_final = df_text.merge(df_emb, on="Participant_ID")
df_final = pd.concat([df_final.reset_index(drop=True), ling_df], axis=1)

print(f"Final feature dimension: {len(emb_cols) + len(ling_df.columns)}")

Loading Mental Health BERT model...


No sentence-transformers model found with name mental/mental-bert-base-uncased. Creating a new one with mean pooling.
Some weights of BertModel were not initialized from the model checkpoint at mental/mental-bert-base-uncased and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/18 [00:00<?, ?it/s]

Final feature dimension: 786


In [16]:
len(emb_cols)

768

In [15]:
df_final.head()

Unnamed: 0,Participant_ID,raw_text,clean_text,emb_0,emb_1,emb_2,emb_3,emb_4,emb_5,emb_6,...,vader_neg,vader_neu,vader_pos,vader_compound,lexical_diversity,avg_word_len,question_count,suicide_count,suicide_flag,exclamation_count
0,300,so I'm going to interview in Spanish okay g...,so i'm going to interview in spanish okay good...,-0.102613,0.023676,0.175381,-0.07131,0.256039,-0.174053,-0.059907,...,0.056,0.62,0.324,0.9989,0.555901,4.068323,0,0,0,0
1,301,yeah there's also on Craigslist so that's why ...,yeah there's also on craigslist so that's why ...,-0.067665,-0.019338,0.076278,-0.085264,0.16665,-0.169355,0.001108,...,0.075,0.74,0.184,0.9996,0.268763,3.961401,0,0,0,0
2,302,just move around a little bit when you're fin...,just move around a little bit when you're fini...,-0.142839,0.018652,0.144626,-0.067519,0.281602,-0.209833,-0.073179,...,0.058,0.7,0.243,0.9994,0.481117,4.09688,0,0,0,0
3,303,wow okay when you're finished when she's done...,wow okay when you're finished when she's done ...,-0.125157,-0.037072,0.094689,-0.131592,0.223761,-0.188919,0.046842,...,0.045,0.742,0.213,0.9999,0.241649,3.914927,0,1,1,0
4,304,so we'll just move around a little bit tonight...,so we'll just move around a little bit tonight...,-0.246813,-0.090445,0.103634,-0.090123,0.24601,-0.044509,0.008121,...,0.069,0.744,0.187,0.999,0.363911,4.021169,0,0,0,0


In [17]:
feature_cols = emb_cols + ling_df.columns.tolist()

train = train_labels.merge(df_final, on="Participant_ID", how="inner")
dev   = dev_labels.merge(df_final, on="Participant_ID", how="inner")
test  = test_labels.merge(df_final, on="Participant_ID", how="inner")

X_train, y_train = train[feature_cols].values, train["PHQ8_Binary"].values
X_dev,   y_dev   = dev[feature_cols].values,   dev["PHQ8_Binary"].values
X_test,  y_test  = test[feature_cols].values,  test["PHQ8_Binary"].values

print(f"Train: {X_train.shape}, Dev: {X_dev.shape}, Test: {X_test.shape}")

Train: (163, 786), Dev: (56, 786), Test: (56, 786)


163

In [20]:
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_dev_s   = scaler.transform(X_dev)
X_test_s  = scaler.transform(X_test)

# Better SMOTE with lower k_neighbors for small minority class
print(f"Original class distribution: {np.bincount(y_train.astype(int))}")
smote = SMOTE(random_state=42, k_neighbors=min(3, (y_train == 1).sum() - 1))
X_res, y_res = smote.fit_resample(X_train_s, y_train)

print(f"After SMOTE: {X_res.shape}")
print(f"Class distribution: {np.bincount(y_res.astype(int))}")


Original class distribution: [126  37]
After SMOTE: (252, 786)
Class distribution: [126 126]


In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

print("Training models with class balancing...")

# Logistic Regression with balanced weights
lr = LogisticRegression(max_iter=3000, random_state=42, class_weight='balanced', C=0.1)
lr.fit(X_res, y_res)

# Random Forest with balanced weights
rf = RandomForestClassifier(
    n_estimators=300, 
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=4,
    class_weight='balanced_subsample',
    random_state=42, 
    n_jobs=-1
)
rf.fit(X_res, y_res)

# Gradient Boosting
gb = GradientBoostingClassifier(
    n_estimators=200, 
    learning_rate=0.05, 
    max_depth=3,
    min_samples_split=10,
    random_state=42
)
gb.fit(X_res, y_res)

# XGBoost with scale_pos_weight
scale_pos_weight = (y_res == 0).sum() / (y_res == 1).sum()
print(f"Scale pos weight: {scale_pos_weight:.2f}")

xgb = XGBClassifier(
    n_estimators=300, 
    learning_rate=0.05, 
    max_depth=4,
    min_child_weight=3,
    scale_pos_weight=scale_pos_weight,
    eval_metric='logloss',
    random_state=42
)
xgb.fit(X_res, y_res)

print("Training complete!")


Training models with class balancing...
Scale pos weight: 1.00
Training complete!


In [23]:
# Evaluation Function
def evaluate(model, X, y, name):
    pred = model.predict(X)
    prob = model.predict_proba(X)[:,1]
    
    print(f"\n{'='*50}")
    print(f"{name}")
    print(f"{'='*50}")
    print(f"Accuracy: {accuracy_score(y, pred):.4f}")
    print(f"ROC-AUC: {roc_auc_score(y, prob):.4f}")
    print(f"\nConfusion Matrix:")
    print(confusion_matrix(y, pred))
    print(f"\nClassification Report:")
    print(classification_report(y, pred, target_names=['Not Depressed', 'Depressed']))


In [24]:
print("\n" + "="*60)
print("DEVELOPMENT SET RESULTS")
print("="*60)
evaluate(lr, X_dev_s, y_dev, "Logistic Regression")
evaluate(rf, X_dev_s, y_dev, "Random Forest")
evaluate(gb, X_dev_s, y_dev, "Gradient Boosting")
evaluate(xgb, X_dev_s, y_dev, "XGBoost")


DEVELOPMENT SET RESULTS

Logistic Regression
Accuracy: 0.8036
ROC-AUC: 0.7027

Confusion Matrix:
[[41  3]
 [ 8  4]]

Classification Report:
               precision    recall  f1-score   support

Not Depressed       0.84      0.93      0.88        44
    Depressed       0.57      0.33      0.42        12

     accuracy                           0.80        56
    macro avg       0.70      0.63      0.65        56
 weighted avg       0.78      0.80      0.78        56


Random Forest
Accuracy: 0.7679
ROC-AUC: 0.6080

Confusion Matrix:
[[41  3]
 [10  2]]

Classification Report:
               precision    recall  f1-score   support

Not Depressed       0.80      0.93      0.86        44
    Depressed       0.40      0.17      0.24        12

     accuracy                           0.77        56
    macro avg       0.60      0.55      0.55        56
 weighted avg       0.72      0.77      0.73        56


Gradient Boosting
Accuracy: 0.7500
ROC-AUC: 0.5833

Confusion Matrix:
[[40  4]
 [1

In [25]:
print("\n" + "="*60)
print("TEST SET RESULTS (FINAL)")
print("="*60)
evaluate(lr, X_test_s, y_test, "Logistic Regression")
evaluate(rf, X_test_s, y_test, "Random Forest")
evaluate(gb, X_test_s, y_test, "Gradient Boosting")
evaluate(xgb, X_test_s, y_test, "XGBoost")


TEST SET RESULTS (FINAL)

Logistic Regression
Accuracy: 0.6429
ROC-AUC: 0.6154

Confusion Matrix:
[[33  6]
 [14  3]]

Classification Report:
               precision    recall  f1-score   support

Not Depressed       0.70      0.85      0.77        39
    Depressed       0.33      0.18      0.23        17

     accuracy                           0.64        56
    macro avg       0.52      0.51      0.50        56
 weighted avg       0.59      0.64      0.60        56


Random Forest
Accuracy: 0.6964
ROC-AUC: 0.7195

Confusion Matrix:
[[36  3]
 [14  3]]

Classification Report:
               precision    recall  f1-score   support

Not Depressed       0.72      0.92      0.81        39
    Depressed       0.50      0.18      0.26        17

     accuracy                           0.70        56
    macro avg       0.61      0.55      0.53        56
 weighted avg       0.65      0.70      0.64        56


Gradient Boosting
Accuracy: 0.6964
ROC-AUC: 0.6893

Confusion Matrix:
[[33  6]
 [

In [26]:
import joblib
os.makedirs("text_depression_models", exist_ok=True)

# Save all models
joblib.dump(xgb, "text_depression_models/xgb_model.joblib")
joblib.dump(rf, "text_depression_models/rf_model.joblib")
joblib.dump(lr, "text_depression_models/lr_model.joblib")
joblib.dump(gb, "text_depression_models/gb_model.joblib")
joblib.dump(scaler, "text_depression_models/scaler.joblib")
joblib.dump(sbert, "text_depression_models/sbert_model.joblib")

print("‚úÖ Models saved successfully!")


‚úÖ Models saved successfully!


In [53]:
def predict_depression(text, threshold=0.35):
    """Predict depression from raw text with adjustable threshold"""
    clean_text = clean(text)
    ling_feats = extract_linguistic_features(clean_text)
    emb = sbert.encode([clean_text])
    
    features = np.concatenate([emb[0], ling_feats])
    features = scaler.transform([features])
    
    # Use XGBoost with custom threshold
    prob = xgb.predict_proba(features)[0]
    pred = 1 if prob[1] >= threshold else 0
    
    result = "Depressed üòî" if pred == 1 else "Not Depressed üôÇ"
    confidence = prob[1] if pred == 1 else prob[0]
    
    return f"{result} | Confidence: {confidence:.2%} | Depression Probability: {prob[1]:.2%}"


In [8]:
def clean(text):
    text = text.lower()
    # text = re.sub(r"http\S+|www\.\S+", " ", text)
    # text = re.sub(r"\s+", " ", text).strip()
    return text

In [27]:
def predict_with_risk_levels(text):
    """Predict with interpretable risk levels + SUICIDE OVERRIDE"""
    clean_text = clean(text)
    ling_feats = extract_linguistic_features(clean_text)
    emb = sbert.encode([clean_text])
    
    features = np.concatenate([emb[0], ling_feats])
    features = scaler.transform([features])
    
    prob = xgb.predict_proba(features)[0]
    
    # **CRITICAL OVERRIDE: If suicide keywords detected, force HIGH RISK**
    suicide_keywords = ['suicide', 'kill myself', 'end my life', 'want to die', 'gonna die']
    text_lower = text.lower()
    has_suicide = any(kw in text_lower for kw in suicide_keywords)
    
    if has_suicide:
        prob[1] = max(prob[1], 0.75)  # Force at least 75% depression probability
    
    # Risk stratification
    if prob[1] >= 0.50 or has_suicide:
        result = "üî¥ SEVERE Depression Risk - URGENT HELP NEEDED"
    elif prob[1] >= 0.35:
        result = "üü† HIGH Depression Risk"
    elif prob[1] >= 0.15:
        result = "üü° MODERATE Risk"
    elif prob[1] >= 0.05:
        result = "üü¢ LOW Risk"
    else:
        result = "‚úÖ Minimal Risk"
    
    if has_suicide:
        result += " ‚ö†Ô∏è SUICIDE IDEATION DETECTED"
    
    return f"{result} | Depression Score: {prob[1]:.1%}"

In [83]:
# def predict_with_risk_levels(text):
#     """Predict with interpretable risk levels"""
#     clean_text = clean(text)
#     ling_feats = extract_linguistic_features(clean_text)
#     emb = sbert.encode([clean_text])
    
#     features = np.concatenate([emb[0], ling_feats])
#     features = scaler.transform([features])
    
#     prob = lr.predict_proba(features)[0]
    
#     # Risk stratification
#     if prob[1] >= 0.50:
#         result = "üî¥ SEVERE Depression Risk"
#     elif prob[1] >= 0.35:
#         result = "üü† HIGH Depression Risk"
#     elif prob[1] >= 0.15:
#         result = "üü° MODERATE Risk"
#     elif prob[1] >= 0.05:
#         result = "üü¢ LOW Risk"
#     else:
#         result = "‚úÖ Minimal Risk"
    
#     return f"{result} | Depression Score: {prob[1]:.1%}"

In [82]:
# def predict_with_risk_levels(text):
#     """Predict with interpretable risk levels"""
#     clean_text = clean(text)
#     ling_feats = extract_linguistic_features(clean_text)
#     emb = sbert.encode([clean_text])
    
#     features = np.concatenate([emb[0], ling_feats])
#     features = scaler.transform([features])
    
#     prob = rf.predict_proba(features)[0]
    
#     # Risk stratification
#     if prob[1] >= 0.50:
#         result = "üî¥ SEVERE Depression Risk"
#     elif prob[1] >= 0.35:
#         result = "üü† HIGH Depression Risk"
#     elif prob[1] >= 0.15:
#         result = "üü° MODERATE Risk"
#     elif prob[1] >= 0.05:
#         result = "üü¢ LOW Risk"
#     else:
#         result = "‚úÖ Minimal Risk"
    
#     return f"{result} | Depression Score: {prob[1]:.1%}"

In [80]:
# def predict_with_risk_levels(text):
#     """Predict with interpretable risk levels"""
#     clean_text = clean(text)
#     ling_feats = extract_linguistic_features(clean_text)
#     emb = sbert.encode([clean_text])
    
#     features = np.concatenate([emb[0], ling_feats])
#     features = scaler.transform([features])
    
#     prob = gb.predict_proba(features)[0]
    
#     # Risk stratification
#     if prob[1] >= 0.50:
#         result = "üî¥ SEVERE Depression Risk"
#     elif prob[1] >= 0.35:
#         result = "üü† HIGH Depression Risk"
#     elif prob[1] >= 0.15:
#         result = "üü° MODERATE Risk"
#     elif prob[1] >= 0.05:
#         result = "üü¢ LOW Risk"
#     else:
#         result = "‚úÖ Minimal Risk"
    
#     return f"{result} | Depression Score: {prob[1]:.1%}"

In [28]:
test_text = "I don't know if I have what it takes to continue to do  I survived day by day  TriNet  if I can  have you been diagnosed with depression  how long ago were you diagnosed  4 years ago  4 years ago couple years ago  what got you to seek help  my mood was just not right  I was always feeling down and depressed and lack of energy always wanted  lack of Interest  I've gone to therapy it has been useful for me in the past is it useful for me now I don't  I would love to talk to someone I just feel like I don't have anyone  I don't depend on anyone  yeah I'm sorry  did you think you had a problem before you found out  oh yeah I've always felt depressed on my life  what were your symptoms  dr. Banerjee  why is sleep a lot lack of Interest  my appetite was uncontrollable either lack of or I will should mean gluttonous and eating the wrong things  have you noticed any changes in your behavior or thoughts lately  can you give me an example of that  my sister just passed away  I'm sorry to hear that all right to go  Robbie Keane  homeless and jobless 2 weeks ago  I got to a point where  I just missed her and I want to be with her  yeah  how close are you to your family  pretty Dysfunctional Family  really cat depend on them  I just depend on myself  that sounds really hard  it is what it is what are you going to do  yeah  I'd like to give up but  my parents just buried their daughter six months ago they don't want to bury their other daughter  what advice would you give yourself 10 or 20 years ago  I just haven't had good luck  the corporate manager for 20 years  who would have known 20 years ago what advice I would have given myself again  try to commit suicide to return back to work  no it's just too rough trying to pick up all the pieces  together  I don't know what advice I would give myself  when I started with my corporate job.  I didn't know I'd be here  yeah  when is the last time you felt really happy  really  a couple days ago this week  I've been trying to make ends meet  got a lot of things sprouting  got a lot of things  that are happening for me but I need like two more weeks here in LA  I can connect the dots but  I'm actually I don't have what it takes to stay here for two more weeks  so  I was happy because I was getting all back  things are starting to look up for me but it's just time again is it on  my car is packed and I'm hitting the road  how do your best friend describe you  Royal  emotional  you consider yourself an introvert  I have been lately  I was an extrovert  20 years  what made you decide to do that  circumstances in my life I don't feel is confit  okay  which one of your most memorable experiences  I don't know it's hard to say  can't really answer that one  okay  okay I think I've asked everything I need to  thanks for sharing your thoughts with me thank you goodbye"
print(predict_with_risk_levels(test_text))

üî¥ SEVERE Depression Risk - URGENT HELP NEEDED ‚ö†Ô∏è SUICIDE IDEATION DETECTED | Depression Score: 99.1%


In [29]:
test_text2 = "I had a great day today! Went for a walk and met up with friends. Feeling really good about life."
print(predict_with_risk_levels(test_text2))

‚úÖ Minimal Risk | Depression Score: 0.9%


In [30]:
test_text2 = """
I feel completely hopeless and worthless. Nothing ever goes right for me. 
I'm always tired and can't find joy in anything anymore. I feel so alone 
and empty inside. I don't see the point in trying anymore. Everything feels 
like too much effort. I just want to sleep all the time and never wake up.
"""
print(predict_with_risk_levels(test_text2))

üü† HIGH Depression Risk | Depression Score: 41.7%


In [32]:
test_text2 = "I had a bad day today,and i am gonna suicide if life goes this way as my college pressurises me every odd they take a huge lump of fees on name of giving good placements but when it comes to giving placemnets they have nothing to give!! "
print(predict_with_risk_levels(test_text2))

üî¥ SEVERE Depression Risk - URGENT HELP NEEDED ‚ö†Ô∏è SUICIDE IDEATION DETECTED | Depression Score: 75.0%
