In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix,precision_score
from sklearn import metrics
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold

In [13]:
PHQ9Post = pd.DataFrame()
PHQ9Post = pd.read_csv('../../dataset/PHQ9/PHQ9PostClassified.csv')

In [14]:
AudioLocationConvoMerged = pd.DataFrame()
AudioLocationConvoMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/AudioLocationConvoMerged.csv')

ActivityLocationConvoMerged = pd.DataFrame()
ActivityLocationConvoMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ActivityLocationConvoMerged.csv')

DarknessLocationConvoMerged = pd.DataFrame()
DarknessLocationConvoMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/DarknessLocationConvoMerged.csv')

ChargeLocationConvoMerged = pd.DataFrame()
ChargeLocationConvoMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ChargeLocationConvoMerged.csv')

LockLocationConvoMerged = pd.DataFrame()
LockLocationConvoMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/LockLocationConvoMerged.csv')

## Model Code

In [15]:
# Function to calculate sensitivity, specificity, and balanced accuracy
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    balanced_accuracy = (sensitivity + specificity) / 2
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    return sensitivity, specificity, balanced_accuracy, accuracy, precision

# Function to train and evaluate with k-fold cross-validation
def TrainandEvalWithCrossValidation(model, features, target, name, cv=5):
    print(name)
    # Perform k-fold cross-validation
    kfold = KFold(n_splits=cv, shuffle=True, random_state=42)
    
    accuracies = []
    sensitivities = []
    specificities = []
    balanced_accuracies = []
    precisions = []


    for train_idx, test_idx in kfold.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = target[train_idx], target[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        sensitivity, specificity, balanced_accuracy, accuracy, precision = calculate_metrics(y_test, y_pred)


        sensitivities.append(sensitivity)
        specificities.append(specificity)
        balanced_accuracies.append(balanced_accuracy)
        accuracies.append(accuracy)
        precisions.append(precision)

    # Print average metrics across all folds
    print("Mean Accuracy:", sum(accuracies) / len(accuracies))
    print("Mean Balanced Accuracy:", sum(balanced_accuracies) / len(balanced_accuracies))
    print("Mean Sensitivity:", sum(sensitivities) / len(sensitivities))
    print("Mean Specificity:", sum(specificities) / len(specificities))
    print("Mean Precision:", sum(precisions) / len(precisions))

In [16]:
#declare all model vars:
LR = LogisticRegression()
SVM = SVC()
RF = RandomForestClassifier() 
XGB = XGBClassifier()

In [17]:
# Amend function to run all models with k-fold cross-validation
def RunAllModelsNormOversampledWithCrossValidation(data, PHQ9, cv=5):
    data = pd.merge(data, PHQ9, on='userId')
    data = data.drop(columns=['userId', 'WeekId'])
    X = data.drop(['PHQ9'], axis=1)
    y = data['PHQ9']

    scaler = MinMaxScaler()
    XNorm = scaler.fit_transform(X)



     # Oversampling
    oversampler = SMOTE(random_state=42)
    features_resampled, target_resampled = oversampler.fit_resample(XNorm, y)


    TrainandEvalWithCrossValidation(SVM, features_resampled, target_resampled, 'SVM', cv)
    TrainandEvalWithCrossValidation(XGB, features_resampled, target_resampled, 'XGB', cv)
   

## Location Convo Combinations

In [23]:
print('Audio')
RunAllModelsNormOversampledWithCrossValidation(AudioLocationConvoMerged,PHQ9Post, 10)
print('')
print('Activity')
RunAllModelsNormOversampledWithCrossValidation(ActivityLocationConvoMerged,PHQ9Post, 10)
print('')
print("Darkness")
RunAllModelsNormOversampledWithCrossValidation(DarknessLocationConvoMerged,PHQ9Post, 10)
print('')
print("Charge")
RunAllModelsNormOversampledWithCrossValidation(ChargeLocationConvoMerged,PHQ9Post, 10)
print('')
print("Lock")
RunAllModelsNormOversampledWithCrossValidation(LockLocationConvoMerged,PHQ9Post, 10)

Audio
SVM
Mean Accuracy: 0.8787037037037037
Mean Balanced Accuracy: 0.8814063714063713
Mean Sensitivity: 0.930653651903652
Mean Specificity: 0.8321590909090908
Mean Precision: 0.8512271062271063
XGB
Mean Accuracy: 0.941005291005291
Mean Balanced Accuracy: 0.9390073815073814
Mean Sensitivity: 0.9482925407925409
Mean Specificity: 0.9297222222222222
Mean Precision: 0.9391561482892753

Activity
SVM
Mean Accuracy: 0.8603174603174603
Mean Balanced Accuracy: 0.8637692862692863
Mean Sensitivity: 0.9453001165501167
Mean Specificity: 0.7822384559884561
Mean Precision: 0.8121893676498939
XGB
Mean Accuracy: 0.940873015873016
Mean Balanced Accuracy: 0.9446260683760684
Mean Sensitivity: 0.9671688034188035
Mean Specificity: 0.9220833333333335
Mean Precision: 0.9230823098470158

Darkness
SVM
Mean Accuracy: 0.9115079365079366
Mean Balanced Accuracy: 0.9082062055591468
Mean Sensitivity: 0.9689743589743589
Mean Specificity: 0.8474380521439345
Mean Precision: 0.873193546649429
XGB
Mean Accuracy: 0.9448412