In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix,precision_score
from sklearn import metrics
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold

In [2]:
PHQ9Post = pd.DataFrame()
PHQ9Post = pd.read_csv('../../dataset/PHQ9/PHQ9PostClassified.csv')

In [3]:
AudioLocationConvoActivityMerged = pd.DataFrame()
AudioLocationConvoActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/AudioLocationConvoActivityMerged.csv')

DarknessLocationConvoActivityMerged = pd.DataFrame()
DarknessLocationConvoActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/DarknessLocationConvoActivityMerged.csv')

ChargeLocationConvoActivityMerged = pd.DataFrame()
ChargeLocationConvoActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ChargeLocationConvoActivityMerged.csv')

LockLocationConvoActivityMerged = pd.DataFrame()
LockLocationConvoActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/LockLocationConvoActivityMerged.csv')

## Model Code

In [4]:
# Function to calculate sensitivity, specificity, and balanced accuracy
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    balanced_accuracy = (sensitivity + specificity) / 2
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    return sensitivity, specificity, balanced_accuracy, accuracy, precision

# Function to train and evaluate with k-fold cross-validation
def TrainandEvalWithCrossValidation(model, features, target, name, cv=5):
    print(name)
    # Perform k-fold cross-validation
    kfold = KFold(n_splits=cv, shuffle=True, random_state=42)
    
    accuracies = []
    sensitivities = []
    specificities = []
    balanced_accuracies = []
    precisions = []


    for train_idx, test_idx in kfold.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = target[train_idx], target[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        sensitivity, specificity, balanced_accuracy, accuracy, precision = calculate_metrics(y_test, y_pred)


        sensitivities.append(sensitivity)
        specificities.append(specificity)
        balanced_accuracies.append(balanced_accuracy)
        accuracies.append(accuracy)
        precisions.append(precision)

    # Print average metrics across all folds
    print("Mean Accuracy:", sum(accuracies) / len(accuracies))
    print("Mean Balanced Accuracy:", sum(balanced_accuracies) / len(balanced_accuracies))
    print("Mean Sensitivity:", sum(sensitivities) / len(sensitivities))
    print("Mean Specificity:", sum(specificities) / len(specificities))
    print("Mean Precision:", sum(precisions) / len(precisions))

In [5]:
#declare all model vars:
LR = LogisticRegression()
SVM = SVC()
RF = RandomForestClassifier() 
XGB = XGBClassifier()

In [6]:
# Amend function to run all models with k-fold cross-validation
def RunAllModelsNormOversampledWithCrossValidation(data, PHQ9, cv=5):
    data = pd.merge(data, PHQ9, on='userId')
    data = data.drop(columns=['userId', 'WeekId'])
    X = data.drop(['PHQ9'], axis=1)
    y = data['PHQ9']

    scaler = MinMaxScaler()
    XNorm = scaler.fit_transform(X)



     # Oversampling
    oversampler = SMOTE(random_state=42)
    features_resampled, target_resampled = oversampler.fit_resample(XNorm, y)


    TrainandEvalWithCrossValidation(SVM, features_resampled, target_resampled, 'SVM', cv)
    TrainandEvalWithCrossValidation(XGB, features_resampled, target_resampled, 'XGB', cv)
   

## Location Convo Combinations

In [7]:
print('Audio')
RunAllModelsNormOversampledWithCrossValidation(AudioLocationConvoActivityMerged,PHQ9Post, 10)
print('')
print("Darkness")
RunAllModelsNormOversampledWithCrossValidation(DarknessLocationConvoActivityMerged,PHQ9Post, 10)
print('')
print("Charge")
RunAllModelsNormOversampledWithCrossValidation(ChargeLocationConvoActivityMerged,PHQ9Post, 10)
print('')
print("Lock")
RunAllModelsNormOversampledWithCrossValidation(LockLocationConvoActivityMerged,PHQ9Post, 10)

Audio
SVM
Mean Accuracy: 0.8566137566137566
Mean Balanced Accuracy: 0.8597793872793874
Mean Sensitivity: 0.9362092074592075
Mean Specificity: 0.7833495670995672
Mean Precision: 0.8135897435897436
XGB
Mean Accuracy: 0.9300264550264551
Mean Balanced Accuracy: 0.9328205128205129
Mean Sensitivity: 0.9550854700854702
Mean Specificity: 0.9105555555555556
Mean Precision: 0.9181929181929182

Darkness
SVM
Mean Accuracy: 0.9043650793650793
Mean Balanced Accuracy: 0.9069859674639089
Mean Sensitivity: 0.9622222222222222
Mean Specificity: 0.8517497127055951
Mean Precision: 0.8717334626158155
XGB
Mean Accuracy: 0.9373015873015872
Mean Balanced Accuracy: 0.9357400524312289
Mean Sensitivity: 0.9784981684981686
Mean Specificity: 0.8929819363642894
Mean Precision: 0.9075320512820513

Charge
SVM
Mean Accuracy: 0.904365079365079
Mean Balanced Accuracy: 0.9061036879786881
Mean Sensitivity: 0.943857808857809
Mean Specificity: 0.868349567099567
Mean Precision: 0.8782198709443291
XGB
Mean Accuracy: 0.93756613