In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix,precision_score
from sklearn import metrics
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold

In [7]:
PHQ9Post = pd.DataFrame()
PHQ9Post = pd.read_csv('../../dataset/PHQ9/PHQ9PostClassified.csv')

In [8]:
ActivityData = pd.DataFrame()
ActivityData = pd.read_csv('../../dataset/BasicFeatures/activityCounts.csv')

In [9]:
AudioData = pd.DataFrame()
AudioData = pd.read_csv('../../dataset/BasicFeatures/audioCounts.csv')

In [10]:
ConversationData = pd.DataFrame()
ConversationData = pd.read_csv('../../dataset/BasicFeatures/conversationCounts.csv')

In [11]:
locationData = pd.DataFrame()
locationData = pd.read_csv('../../dataset/BasicFeatures/locationAvgs.csv')

In [12]:
DarknessData = pd.DataFrame()
DarknessData = pd.read_csv('../../dataset/BasicFeatures/darknessCounts.csv')

In [13]:
ChargeData = pd.DataFrame()
ChargeData = pd.read_csv('../../dataset/BasicFeatures/chargeCounts.csv')

In [14]:
LockData = pd.DataFrame()
LockData = pd.read_csv('../../dataset/BasicFeatures/lockCounts.csv')

## Model Code

In [15]:
# Function to calculate sensitivity, specificity, and balanced accuracy
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    balanced_accuracy = (sensitivity + specificity) / 2
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    return sensitivity, specificity, balanced_accuracy, accuracy, precision

# Function to train and evaluate with k-fold cross-validation
def TrainandEvalWithCrossValidation(model, features, target, name, cv=5):
    print(name)
    # Perform k-fold cross-validation
    kfold = KFold(n_splits=cv, shuffle=True, random_state=42)
    
    accuracies = []
    sensitivities = []
    specificities = []
    balanced_accuracies = []
    precisions = []


    for train_idx, test_idx in kfold.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = target[train_idx], target[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        sensitivity, specificity, balanced_accuracy, accuracy, precision = calculate_metrics(y_test, y_pred)


        sensitivities.append(sensitivity)
        specificities.append(specificity)
        balanced_accuracies.append(balanced_accuracy)
        accuracies.append(accuracy)
        precisions.append(precision)

    # Print average metrics across all folds
    print("Mean Sensitivity:", sum(sensitivities) / len(sensitivities))
    print("Mean Specificity:", sum(specificities) / len(specificities))
    print("Mean Balanced Accuracy:", sum(balanced_accuracies) / len(balanced_accuracies))
    print("Mean Accuracy:", sum(accuracies) / len(accuracies))
    print("Mean Precision:", sum(precisions) / len(precisions))

In [16]:
#declare all model vars:
LR = LogisticRegression()
SVM = SVC()
RF = RandomForestClassifier() 
XGB = XGBClassifier()

In [17]:
# Amend function to run all models with k-fold cross-validation
def RunAllModelsNormOversampledWithCrossValidation(data, PHQ9, cv=5):
    data = pd.merge(data, PHQ9, on='userId')
    data = data.drop(columns=['userId', 'WeekId'])
    X = data.drop(['PHQ9'], axis=1)
    y = data['PHQ9']

    scaler = MinMaxScaler()
    XNorm = scaler.fit_transform(X)



     # Oversampling
    oversampler = SMOTE(random_state=42)
    features_resampled, target_resampled = oversampler.fit_resample(XNorm, y)


    cv = 5
    TrainandEvalWithCrossValidation(SVM, features_resampled, target_resampled, 'SVM', cv)
    TrainandEvalWithCrossValidation(XGB, features_resampled, target_resampled, 'XGB', cv)
   

## Activity

In [18]:
RunAllModelsNormOversampledWithCrossValidation(ActivityData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.7964044886247181
Mean Specificity: 0.7030166390281168
Mean Balanced Accuracy: 0.7497105638264174
Mean Accuracy: 0.7461754385964913
Mean Precision: 0.7261225051663003
XGB
Mean Sensitivity: 0.8565271230443396
Mean Specificity: 0.854363971752781
Mean Balanced Accuracy: 0.8554455473985604
Mean Accuracy: 0.8570526315789474
Mean Precision: 0.8477818913113031


## Audio

In [19]:
RunAllModelsNormOversampledWithCrossValidation(AudioData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.6501935164309627
Mean Specificity: 0.7511420000372656
Mean Balanced Accuracy: 0.7006677582341141
Mean Accuracy: 0.7009473684210527
Mean Precision: 0.7227511545158605
XGB
Mean Sensitivity: 0.7616268702602993
Mean Specificity: 0.8122890309116994
Mean Balanced Accuracy: 0.7869579505859994
Mean Accuracy: 0.7830526315789473
Mean Precision: 0.7971717171717171


## Conversation

In [20]:
RunAllModelsNormOversampledWithCrossValidation(ConversationData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.7222497319753417
Mean Specificity: 0.6034491978609626
Mean Balanced Accuracy: 0.6628494649181522
Mean Accuracy: 0.663859649122807
Mean Precision: 0.6451801664793354
XGB
Mean Sensitivity: 0.8034269186098454
Mean Specificity: 0.7861119599354893
Mean Balanced Accuracy: 0.7947694392726674
Mean Accuracy: 0.7988421052631579
Mean Precision: 0.793980463980464


## Location

In [21]:
RunAllModelsNormOversampledWithCrossValidation(locationData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.9642989025849067
Mean Specificity: 0.5726282051282052
Mean Balanced Accuracy: 0.7684635538565558
Mean Accuracy: 0.7611447811447811
Mean Precision: 0.6882410579971555
XGB
Mean Sensitivity: 0.8749280902535158
Mean Specificity: 0.8031135531135531
Mean Balanced Accuracy: 0.8390208216835344
Mean Accuracy: 0.8381144781144781
Mean Precision: 0.8134361054766733


## Darkness

In [22]:
RunAllModelsNormOversampledWithCrossValidation(DarknessData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.6711904761904762
Mean Specificity: 0.7694244232607403
Mean Balanced Accuracy: 0.7203074497256082
Mean Accuracy: 0.7195789473684211
Mean Precision: 0.7414215891635247
XGB
Mean Sensitivity: 0.7373809523809524
Mean Specificity: 0.8265811568369112
Mean Balanced Accuracy: 0.7819810546089319
Mean Accuracy: 0.7831228070175438
Mean Precision: 0.8059529954101157


## Charge

In [23]:
RunAllModelsNormOversampledWithCrossValidation(ChargeData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.615547393782688
Mean Specificity: 0.7267384077140175
Mean Balanced Accuracy: 0.6711429007483527
Mean Accuracy: 0.6729473684210526
Mean Precision: 0.6912908091276212
XGB
Mean Sensitivity: 0.8071485377367731
Mean Specificity: 0.7323641728519776
Mean Balanced Accuracy: 0.7697563552943754
Mean Accuracy: 0.7714736842105263
Mean Precision: 0.7565881642512077


## Lock

In [24]:
RunAllModelsNormOversampledWithCrossValidation(LockData,PHQ9Post, 10)

SVM
Mean Sensitivity: 0.6725966728154676
Mean Specificity: 0.6158972591253796
Mean Balanced Accuracy: 0.6442469659704237
Mean Accuracy: 0.6401052631578947
Mean Precision: 0.644089646243869
XGB
Mean Sensitivity: 0.8230058242809319
Mean Specificity: 0.792479457414895
Mean Balanced Accuracy: 0.8077426408479134
Mean Accuracy: 0.8068771929824562
Mean Precision: 0.7945688545688545
