In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix,precision_score
from sklearn import metrics
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold

In [2]:
PHQ9Post = pd.DataFrame()
PHQ9Post = pd.read_csv('../../dataset/PHQ9/PHQ9PostClassified.csv')

In [3]:
AudioLocationConvoActivityLockMerged = pd.DataFrame()
AudioLocationConvoActivityLockMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/AudioLocationConvoActivityLockMerged.csv')

DarknessLocationConvoActivityLockMerged = pd.DataFrame()
DarknessLocationConvoActivityLockMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/DarknessLocationConvoActivityLockMerged.csv')

ChargeLocationConvoActivityLockMerged = pd.DataFrame()
ChargeLocationConvoActivityLockMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ChargeLocationConvoActivityLockMerged.csv')


In [8]:
AudioLocationConvoActivityLockDarkMerged = pd.DataFrame()
AudioLocationConvoActivityLockDarkMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/AudioLocationConvoActivityLockDarkMerged.csv')

ChargeLocationConvoActivityLockDarkMerged = pd.DataFrame()
ChargeLocationConvoActivityLockDarkMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ChargeLocationConvoActivityLockDarkMerged.csv')

## Model Code

In [4]:
# Function to calculate sensitivity, specificity, and balanced accuracy
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    balanced_accuracy = (sensitivity + specificity) / 2
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    return sensitivity, specificity, balanced_accuracy, accuracy, precision

# Function to train and evaluate with k-fold cross-validation
def TrainandEvalWithCrossValidation(model, features, target, name, cv=5):
    print(name)
    # Perform k-fold cross-validation
    kfold = KFold(n_splits=cv, shuffle=True, random_state=42)
    
    accuracies = []
    sensitivities = []
    specificities = []
    balanced_accuracies = []
    precisions = []


    for train_idx, test_idx in kfold.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = target[train_idx], target[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        sensitivity, specificity, balanced_accuracy, accuracy, precision = calculate_metrics(y_test, y_pred)


        sensitivities.append(sensitivity)
        specificities.append(specificity)
        balanced_accuracies.append(balanced_accuracy)
        accuracies.append(accuracy)
        precisions.append(precision)

    # Print average metrics across all folds
    print("Mean Accuracy:", sum(accuracies) / len(accuracies))
    print("Mean Balanced Accuracy:", sum(balanced_accuracies) / len(balanced_accuracies))
    print("Mean Sensitivity:", sum(sensitivities) / len(sensitivities))
    print("Mean Specificity:", sum(specificities) / len(specificities))
    print("Mean Precision:", sum(precisions) / len(precisions))

In [5]:
#declare all model vars:
LR = LogisticRegression()
SVM = SVC()
RF = RandomForestClassifier() 
XGB = XGBClassifier()

In [12]:
# Amend function to run all models with k-fold cross-validation
def RunAllModelsNormOversampledWithCrossValidation(data, PHQ9, cv=5):
    data = pd.merge(data, PHQ9, on='userId')
    data = data.drop(columns=['userId', 'WeekId'])
    X = data.drop(['PHQ9'], axis=1)
    y = data['PHQ9']

    scaler = MinMaxScaler()
    XNorm = scaler.fit_transform(X)



     # Oversampling
    oversampler = SMOTE(random_state=42)
    features_resampled, target_resampled = oversampler.fit_resample(XNorm, y)

    # TrainandEvalWithCrossValidation(LR, features_resampled, target_resampled, 'LR', cv)
    TrainandEvalWithCrossValidation(SVM, features_resampled, target_resampled, 'SVM', cv)
    # TrainandEvalWithCrossValidation(RF, features_resampled, target_resampled, 'RF', cv)
    TrainandEvalWithCrossValidation(XGB, features_resampled, target_resampled, 'XGB', cv)
   

## Location Convo Activity Lock Combinations

In [7]:
print('Audio')
RunAllModelsNormOversampledWithCrossValidation(AudioLocationConvoActivityLockMerged,PHQ9Post, 10)
print('')
print("Darkness")
RunAllModelsNormOversampledWithCrossValidation(DarknessLocationConvoActivityLockMerged,PHQ9Post, 10)
print('')
print("Charge")
RunAllModelsNormOversampledWithCrossValidation(ChargeLocationConvoActivityLockMerged,PHQ9Post, 10)


Audio
SVM
Mean Accuracy: 0.9193121693121693
Mean Balanced Accuracy: 0.9203962703962704
Mean Sensitivity: 0.9578001165501167
Mean Specificity: 0.8829924242424243
Mean Precision: 0.8911055920343844
XGB
Mean Accuracy: 0.9267195767195766
Mean Balanced Accuracy: 0.9288621794871794
Mean Sensitivity: 0.9425854700854701
Mean Specificity: 0.9151388888888888
Mean Precision: 0.9265262515262516

Darkness
SVM
Mean Accuracy: 0.9411375661375662
Mean Balanced Accuracy: 0.9427049217122747
Mean Sensitivity: 0.9933333333333334
Mean Specificity: 0.892076510091216
Mean Precision: 0.9006016880436386
XGB
Mean Accuracy: 0.9482804232804234
Mean Balanced Accuracy: 0.9512826348488114
Mean Sensitivity: 0.9734188034188035
Mean Specificity: 0.9291464662788191
Mean Precision: 0.931936274509804

Charge
SVM
Mean Accuracy: 0.9193121693121693
Mean Balanced Accuracy: 0.9186036879786881
Mean Sensitivity: 0.9480244755244754
Mean Specificity: 0.8891829004329004
Mean Precision: 0.8959747360482655
XGB
Mean Accuracy: 0.9082010

## Location Convo Activity Lock Darkness Combinations

In [9]:
print('Audio')
RunAllModelsNormOversampledWithCrossValidation(AudioLocationConvoActivityLockDarkMerged,PHQ9Post, 10)
print('')
print("Charge")
RunAllModelsNormOversampledWithCrossValidation(ChargeLocationConvoActivityLockDarkMerged,PHQ9Post, 10)

Audio
SVM
Mean Accuracy: 0.9447089947089946
Mean Balanced Accuracy: 0.9464397489765138
Mean Sensitivity: 0.9800854700854702
Mean Specificity: 0.9127940278675574
Mean Precision: 0.9176948215183509
XGB
Mean Accuracy: 0.9485449735449736
Mean Balanced Accuracy: 0.9525540921496803
Mean Sensitivity: 0.9611965811965811
Mean Specificity: 0.9439116031027798
Mean Precision: 0.9432167832167833

Charge
SVM
Mean Accuracy: 0.9410052910052908
Mean Balanced Accuracy: 0.9402828503196151
Mean Sensitivity: 0.9933333333333334
Mean Specificity: 0.8872323673058966
Mean Precision: 0.8992307692307693
XGB
Mean Accuracy: 0.951851851851852
Mean Balanced Accuracy: 0.9548906934568698
Mean Sensitivity: 0.9800854700854702
Mean Specificity: 0.9296959168282697
Mean Precision: 0.9293910256410257


## Run ALL Models

In [11]:
RunAllModelsNormOversampledWithCrossValidation(DarknessLocationConvoActivityLockMerged,PHQ9Post, 10)

LR
Mean Accuracy: 0.8972222222222221
Mean Balanced Accuracy: 0.9045265029088558
Mean Sensitivity: 0.9323076923076924
Mean Specificity: 0.8767453135100194
Mean Precision: 0.876821805645335
SVM
Mean Accuracy: 0.9411375661375662
Mean Balanced Accuracy: 0.9427049217122747
Mean Sensitivity: 0.9933333333333334
Mean Specificity: 0.892076510091216
Mean Precision: 0.9006016880436386
RF
Mean Accuracy: 0.955952380952381
Mean Balanced Accuracy: 0.9586841916253681
Mean Sensitivity: 0.9434188034188035
Mean Specificity: 0.9739495798319326
Mean Precision: 0.9684848484848485
XGB
Mean Accuracy: 0.9482804232804234
Mean Balanced Accuracy: 0.9512826348488114
Mean Sensitivity: 0.9734188034188035
Mean Specificity: 0.9291464662788191
Mean Precision: 0.931936274509804
