In [None]:
pip install pandas numpy scikit-learn imbalanced-learn pytorch-tabnet torch matplotlib seaborn pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0


In [None]:
import pandas as pd
import numpy as np

# Load dataset PHQ-9 Dataset
phq = pd.read_csv("PHQ-9 Dataset.csv")
print("PHQ shape:", phq.shape)
display(phq.head())

PHQ shape: (682, 11)


Unnamed: 0,phq1,phq2,phq3,phq4,phq5,phq6,phq7,phq8,phq9,PHQ_Total,PHQ_Severity
0,More than half the days,Not at all,Not at all,Not at all,Not at all,Not at all,Not at all,More than half the days,Not at all,4,Minimal
1,Not at all,Not at all,Nearly every day,Nearly every day,Nearly every day,Not at all,More than half the days,More than half the days,More than half the days,15,Moderately severe
2,Not at all,Not at all,Not at all,Not at all,Not at all,Not at all,Several days,Not at all,Not at all,1,Minimal
3,Nearly every day,Nearly every day,Not at all,Nearly every day,More than half the days,Not at all,Not at all,Not at all,Not at all,11,Moderate
4,Not at all,Not at all,Not at all,Not at all,Not at all,Not at all,Not at all,More than half the days,Not at all,2,Minimal


In [None]:
# Load dataset Wearable Dataset
wearable = pd.read_csv("Wearable Dataset.csv")
print("Wearable shape:", wearable.shape)
display(wearable.head())

Wearable shape: (1161, 5)


Unnamed: 0,heart_rate_bpm,body_temp_celsius,blood_oxygen_level,sleep_duration_min,environment_temp_celsius
0,80.225449,36.189578,97.708864,378.8,28.2
1,73.968612,36.742176,97.198896,190.5,20.4
2,65.23078,37.105838,98.531,310.5,23.3
3,66.702398,37.056634,97.597078,222.2,18.1
4,68.723379,36.092315,97.079355,175.4,24.2


In [None]:
# Map responses to numbers
mapping = {
    "Not at all": 0,
    "Several days": 1,
    "More than half the days": 2,
    "Nearly every day": 3
}

phq = phq.replace(mapping)

# Separate features and target
X_phq = phq.drop(columns=["PHQ_Severity"])   # predictors
y_phq = phq["PHQ_Severity"]                 # target (0-3)

print(y_phq.value_counts())


PHQ_Severity
Minimal              206
Mild                 155
Moderate             128
Moderately severe    125
Severe                68
Name: count, dtype: int64


  phq = phq.replace(mapping)


In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Train-test split FIRST
X_train_phq, X_test_phq, y_train_phq, y_test_phq = train_test_split(
    X_phq, y_phq, test_size=0.2, random_state=42, stratify=y_phq
)

# ----- CLASS COUNTS FOR TRAIN vs TEST -----

train_counts = y_train_phq.value_counts().sort_index()
test_counts  = y_test_phq.value_counts().sort_index()

split_table = pd.DataFrame({
    "Training (80%)": train_counts,
    "Testing (20%)": test_counts
})

print("\n=== CLASS DISTRIBUTION: TRAIN vs TEST (BEFORE SMOTE) ===\n")
print(split_table)

print("\nTOTAL DATA:", len(X_phq))
print("Total Training:", train_counts.sum())
print("Total Testing :", test_counts.sum())




=== CLASS DISTRIBUTION: TRAIN vs TEST (BEFORE SMOTE) ===

                   Training (80%)  Testing (20%)
PHQ_Severity                                    
Mild                          124             31
Minimal                       165             41
Moderate                      102             26
Moderately severe             100             25
Severe                         54             14

TOTAL DATA: 682
Total Training: 545
Total Testing : 137


In [None]:
# Scale features
scaler_phq = StandardScaler()
X_train_phq_scaled = scaler_phq.fit_transform(X_train_phq)
X_test_phq_scaled = scaler_phq.transform(X_test_phq)

In [None]:
print("\nClass distribution BEFORE SMOTE (training set only):")
print(y_train_phq.value_counts())


Class distribution BEFORE SMOTE (training set only):
PHQ_Severity
Minimal              165
Mild                 124
Moderate             102
Moderately severe    100
Severe                54
Name: count, dtype: int64


In [None]:
# Apply SMOTE
smote = SMOTE(random_state=42)
X_train_phq_bal, y_train_phq_bal = smote.fit_resample(
    X_train_phq_scaled, y_train_phq
)

print("\nClass distribution AFTER SMOTE (training set only):")
print(pd.Series(y_train_phq_bal).value_counts())


Class distribution AFTER SMOTE (training set only):
PHQ_Severity
Mild                 165
Minimal              165
Moderately severe    165
Moderate             165
Severe               165
Name: count, dtype: int64


In [None]:
import torch
from pytorch_tabnet.tab_model import TabNetClassifier


In [None]:
# Convert to numpy arrays
X_train_np = np.array(X_train_phq_bal)
y_train_np = np.array(y_train_phq_bal)
X_test_np = np.array(X_test_phq_scaled)
y_test_np = np.array(y_test_phq)

# Train TabNet
phq_model = TabNetClassifier(
    n_d=8, n_a=8,
    n_steps=3,
    gamma=1.3,
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    scheduler_params={"step_size":10, "gamma":0.9},
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    verbose=1
)

phq_model.fit(
    X_train_np, y_train_np,
    eval_set=[(X_test_np, y_test_np)],
    max_epochs=50,
    patience=50,
    batch_size=32,
    virtual_batch_size=16
)



epoch 0  | loss: 1.26972 | val_0_accuracy: 0.54745 |  0:00:00s
epoch 1  | loss: 0.85753 | val_0_accuracy: 0.70073 |  0:00:01s
epoch 2  | loss: 0.8514  | val_0_accuracy: 0.75182 |  0:00:02s
epoch 3  | loss: 0.64508 | val_0_accuracy: 0.71533 |  0:00:02s
epoch 4  | loss: 0.56541 | val_0_accuracy: 0.82482 |  0:00:03s
epoch 5  | loss: 0.6118  | val_0_accuracy: 0.66423 |  0:00:03s
epoch 6  | loss: 0.50535 | val_0_accuracy: 0.83942 |  0:00:04s
epoch 7  | loss: 0.55585 | val_0_accuracy: 0.78102 |  0:00:04s
epoch 8  | loss: 0.56085 | val_0_accuracy: 0.85401 |  0:00:05s
epoch 9  | loss: 0.51568 | val_0_accuracy: 0.75912 |  0:00:05s
epoch 10 | loss: 0.48665 | val_0_accuracy: 0.83212 |  0:00:06s
epoch 11 | loss: 0.50602 | val_0_accuracy: 0.89051 |  0:00:06s
epoch 12 | loss: 0.46805 | val_0_accuracy: 0.84672 |  0:00:07s
epoch 13 | loss: 0.43475 | val_0_accuracy: 0.83942 |  0:00:07s
epoch 14 | loss: 0.39135 | val_0_accuracy: 0.78102 |  0:00:08s
epoch 15 | loss: 0.49726 | val_0_accuracy: 0.85401 |  0



In [None]:
import torch
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import classification_report, accuracy_score

phq_preds = phq_model.predict(X_test_np)
phq_acc = accuracy_score(y_test_np, phq_preds)

print("PHQ TABNET RESULTS:")
print("PHQ model accuracy:", phq_acc)
print("\nClassification Report:\n")
print(classification_report(y_test_np, phq_preds))

PHQ TABNET RESULTS:
PHQ model accuracy: 0.927007299270073

Classification Report:

                   precision    recall  f1-score   support

             Mild       0.88      0.97      0.92        31
          Minimal       1.00      0.90      0.95        41
         Moderate       0.93      1.00      0.96        26
Moderately severe       0.92      0.88      0.90        25
           Severe       0.86      0.86      0.86        14

         accuracy                           0.93       137
        macro avg       0.92      0.92      0.92       137
     weighted avg       0.93      0.93      0.93       137



In [None]:
X_wear = wearable.copy()

# Create a synthetic "risk label" proxy for training
# (since you don't have mental labels here)
# We create risk levels based on sleep + heart rate patterns.
def create_wearable_risk(row):

    sleep = row["sleep_duration_min"]
    hr = row["heart_rate_bpm"]

   # Very high risk
    if sleep <= 200 or hr >= 95 or hr < 55:
        return 4

    # High risk
    elif sleep <= 300 or hr >= 90:
        return 3

    # Moderate risk
    elif sleep <= 360 or hr >= 85:
        return 2

    # Mild risk
    elif sleep <= 420 or hr >= 80 or hr < 65:
        return 1

    else:
        return 0

wearable["wearable_risk"] = wearable.apply(create_wearable_risk, axis=1)

X_wear = wearable.drop(columns=["wearable_risk"])
y_wear = wearable["wearable_risk"]

print(y_wear.value_counts())


wearable_risk
4    473
3    220
1    192
2    180
0     96
Name: count, dtype: int64


In [None]:
from sklearn.preprocessing import StandardScaler

X_train_w, X_test_w, y_train_w, y_test_w = train_test_split(
    X_wear, y_wear, test_size=0.2, random_state=42, stratify=y_wear
)

# ----- CLASS COUNTS FOR TRAIN vs TEST (WEARABLE) -----

train_counts_w = y_train_w.value_counts().sort_index()
test_counts_w  = y_test_w.value_counts().sort_index()

split_table_w = pd.DataFrame({
    "Training (80%)": train_counts_w,
    "Testing (20%)": test_counts_w
})

print("\n=== WEARABLE: CLASS DISTRIBUTION TRAIN vs TEST (BEFORE SMOTE) ===\n")
print(split_table_w)

print("\nTOTAL WEARABLE DATA:", len(X_wear))
print("Total Training:", train_counts_w.sum())
print("Total Testing :", test_counts_w.sum())



=== WEARABLE: CLASS DISTRIBUTION TRAIN vs TEST (BEFORE SMOTE) ===

               Training (80%)  Testing (20%)
wearable_risk                               
0                          77             19
1                         153             39
2                         144             36
3                         176             44
4                         378             95

TOTAL WEARABLE DATA: 1161
Total Training: 928
Total Testing : 233


In [None]:
scaler_w = StandardScaler()
X_train_w_scaled = scaler_w.fit_transform(X_train_w)
X_test_w_scaled = scaler_w.transform(X_test_w)


In [None]:
print("\nClass distribution BEFORE SMOTE (training set only):")
print(pd.Series(y_train_w).value_counts().sort_index())


Class distribution BEFORE SMOTE (training set only):
wearable_risk
0     77
1    153
2    144
3    176
4    378
Name: count, dtype: int64


In [None]:
smote = SMOTE(random_state=42)
X_train_w_bal, y_train_w_bal = smote.fit_resample(
    X_train_w_scaled, y_train_w
)

print("\nClass distribution AFTER SMOTE (training set only):")
print(pd.Series(y_train_w_bal).value_counts().sort_index())


Class distribution AFTER SMOTE (training set only):
wearable_risk
0    378
1    378
2    378
3    378
4    378
Name: count, dtype: int64


In [None]:
wear_model = TabNetClassifier(
    n_d=8, n_a=8,
    n_steps=3,
    gamma=1.3,
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    scheduler_params={"step_size":10, "gamma":0.9},
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    verbose=1
)

wear_model.fit(
    np.array(X_train_w_bal), np.array(y_train_w_bal),
    eval_set=[(np.array(X_test_w_scaled), np.array(y_test_w))],
    max_epochs=50,
    patience=50,
    batch_size=32,
    virtual_batch_size=16
)



epoch 0  | loss: 1.3874  | val_0_accuracy: 0.5279  |  0:00:01s
epoch 1  | loss: 0.83895 | val_0_accuracy: 0.77682 |  0:00:02s
epoch 2  | loss: 0.75889 | val_0_accuracy: 0.85837 |  0:00:03s
epoch 3  | loss: 0.68991 | val_0_accuracy: 0.91416 |  0:00:04s
epoch 4  | loss: 0.647   | val_0_accuracy: 0.87554 |  0:00:05s
epoch 5  | loss: 0.63036 | val_0_accuracy: 0.87554 |  0:00:07s
epoch 6  | loss: 0.63941 | val_0_accuracy: 0.90129 |  0:00:08s
epoch 7  | loss: 0.61963 | val_0_accuracy: 0.87124 |  0:00:09s
epoch 8  | loss: 0.60645 | val_0_accuracy: 0.87124 |  0:00:11s
epoch 9  | loss: 0.55809 | val_0_accuracy: 0.78541 |  0:00:12s
epoch 10 | loss: 0.61378 | val_0_accuracy: 0.90987 |  0:00:13s
epoch 11 | loss: 0.5396  | val_0_accuracy: 0.90558 |  0:00:14s
epoch 12 | loss: 0.56588 | val_0_accuracy: 0.8927  |  0:00:15s
epoch 13 | loss: 0.52948 | val_0_accuracy: 0.90987 |  0:00:17s
epoch 14 | loss: 0.50537 | val_0_accuracy: 0.86695 |  0:00:18s
epoch 15 | loss: 0.4963  | val_0_accuracy: 0.90987 |  0



In [None]:
wear_preds = wear_model.predict(np.array(X_test_w_scaled))
wear_acc = accuracy_score(y_test_w, wear_preds)

print("WEARABLE TABNET RESULTS:")
print("Wearable model accuracy:", wear_acc)
print("\nClassification Report:\n")
print(classification_report(y_test_w, wear_preds))

WEARABLE TABNET RESULTS:
Wearable model accuracy: 0.9613733905579399

Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.95      0.95        19
           1       0.94      0.85      0.89        39
           2       0.88      0.97      0.92        36
           3       0.98      1.00      0.99        44
           4       1.00      0.99      0.99        95

    accuracy                           0.96       233
   macro avg       0.95      0.95      0.95       233
weighted avg       0.96      0.96      0.96       233



In [None]:
train_preds = phq_model.predict(X_train_np)
train_acc = accuracy_score(y_train_np, train_preds)

print("PHQ Train accuracy:", train_acc)
print("PHQ Test accuracy :", phq_acc)


PHQ Train accuracy: 0.9527272727272728
PHQ Test accuracy : 0.927007299270073


In [None]:
def mental_health_interpretation(phq, wear):

    # Very severe psychological distress dominates all signals
    if phq == 4:
        if wear >= 3:
            return "Critical Risk — severe psychological distress with strong physiological stress"
        else:
            return "Severe Psychological Risk — body signals not yet extreme"

    # High psychological distress
    elif phq == 3:
        if wear >= 3:
            return "Very High Risk — both mind and body under significant strain"
        else:
            return "High Psychological Risk — body partially coping"

    # Moderate psychological distress
    elif phq == 2:
        if wear >= 3:
            return "Elevated Risk — moderate mood symptoms with high physiological stress"
        elif wear >= 2:
            return "Moderate Risk — both systems show some concern"
        else:
            return "Mild to Moderate Risk — primarily psychological"

    # Low PHQ but noticeable physiological stress
    elif phq <= 1 and wear >= 2:
        return "Minimal Risk"

    # Low scores on both systems
    else:
        return "Low Risk / Healthy"


# Example usage
example = mental_health_interpretation(phq=1, wear=4)
print(example)


Minimal Risk
