In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix,precision_score
from sklearn import metrics
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold

In [2]:
PHQ9Post = pd.DataFrame()
PHQ9Post = pd.read_csv('../../dataset/PHQ9/PHQ9PostClassified.csv')

In [3]:

AudioActivityMerged = pd.DataFrame()
AudioActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/AudioActivityMerged.csv')

ConvoActivityMerged = pd.DataFrame()
ConvoActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ConvoActivityMerged.csv')

LocationActivityMerged = pd.DataFrame()
LocationActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/LocationActivityMerged.csv')

DarknessActivityMerged = pd.DataFrame()
DarknessActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/DarknessActivityMerged.csv')

ChargeActivityMerged = pd.DataFrame()
ChargeActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ChargeActivityMerged.csv')

LockActivityMerged = pd.DataFrame()
LockActivityMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/LockActivityMerged.csv')

In [4]:

AudioLocationMerged = pd.DataFrame()
AudioLocationMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/AudioLocationMerged.csv')

ConvoLocationMerged = pd.DataFrame()
ConvoLocationMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ConvoLocationMerged.csv')

ActivityLocationMerged = pd.DataFrame()
ActivityLocationMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ActivityLocationMerged.csv')

DarknessLocationMerged = pd.DataFrame()
DarknessLocationMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/DarknessLocationMerged.csv')

ChargeLocationMerged = pd.DataFrame()
ChargeLocationMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/ChargeLocationMerged.csv')

LockLocationMerged = pd.DataFrame()
LockLocationMerged= pd.read_csv('../../dataset/BasicFeatures/Merged/LockLocationMerged.csv')

## Model Code

In [5]:
# Function to calculate sensitivity, specificity, and balanced accuracy
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    balanced_accuracy = (sensitivity + specificity) / 2
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    return sensitivity, specificity, balanced_accuracy, accuracy, precision

# Function to train and evaluate with k-fold cross-validation
def TrainandEvalWithCrossValidation(model, features, target, name, cv=5):
    print(name)
    # Perform k-fold cross-validation
    kfold = KFold(n_splits=cv, shuffle=True, random_state=42)
    
    accuracies = []
    sensitivities = []
    specificities = []
    balanced_accuracies = []
    precisions = []


    for train_idx, test_idx in kfold.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = target[train_idx], target[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        sensitivity, specificity, balanced_accuracy, accuracy, precision = calculate_metrics(y_test, y_pred)


        sensitivities.append(sensitivity)
        specificities.append(specificity)
        balanced_accuracies.append(balanced_accuracy)
        accuracies.append(accuracy)
        precisions.append(precision)

    # Print average metrics across all folds
    print("Mean Accuracy:", sum(accuracies) / len(accuracies))
    print("Mean Balanced Accuracy:", sum(balanced_accuracies) / len(balanced_accuracies))
    print("Mean Sensitivity:", sum(sensitivities) / len(sensitivities))
    print("Mean Specificity:", sum(specificities) / len(specificities))
    print("Mean Precision:", sum(precisions) / len(precisions))

In [6]:
#declare all model vars:
LR = LogisticRegression()
SVM = SVC()
RF = RandomForestClassifier() 
XGB = XGBClassifier()

In [7]:
# Amend function to run all models with k-fold cross-validation
def RunAllModelsNormOversampledWithCrossValidation(data, PHQ9, cv=5):
    data = pd.merge(data, PHQ9, on='userId')
    data = data.drop(columns=['userId', 'WeekId'])
    X = data.drop(['PHQ9'], axis=1)
    y = data['PHQ9']

    scaler = MinMaxScaler()
    XNorm = scaler.fit_transform(X)



     # Oversampling
    oversampler = SMOTE(random_state=42)
    features_resampled, target_resampled = oversampler.fit_resample(XNorm, y)


    TrainandEvalWithCrossValidation(SVM, features_resampled, target_resampled, 'SVM', cv)
    TrainandEvalWithCrossValidation(XGB, features_resampled, target_resampled, 'XGB', cv)
   

## Activity Pairs

### Audio


In [8]:
RunAllModelsNormOversampledWithCrossValidation(AudioActivityMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.7225462304409673
Mean Balanced Accuracy: 0.7232513573457846
Mean Sensitivity: 0.733023825160048
Mean Specificity: 0.713478889531521
Mean Precision: 0.7148830409356726
XGB
Mean Accuracy: 0.8731863442389759
Mean Balanced Accuracy: 0.8689479506561548
Mean Sensitivity: 0.8546237786980821
Mean Specificity: 0.8832721226142279
Mean Precision: 0.8858800044984255


### Conversation

In [9]:
RunAllModelsNormOversampledWithCrossValidation(ConvoActivityMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.6983641536273115
Mean Balanced Accuracy: 0.7002971530963016
Mean Sensitivity: 0.688407064899325
Mean Specificity: 0.7121872412932786
Mean Precision: 0.7109445428115505
XGB
Mean Accuracy: 0.8704836415362731
Mean Balanced Accuracy: 0.8685107009433635
Mean Sensitivity: 0.8895654599958005
Mean Specificity: 0.8474559418909265
Mean Precision: 0.8498916494133884


### Location

In [10]:
RunAllModelsNormOversampledWithCrossValidation(LocationActivityMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.7826719576719576
Mean Balanced Accuracy: 0.7840714146964147
Mean Sensitivity: 0.9707167832167833
Mean Specificity: 0.5974260461760462
Mean Precision: 0.7090732959850607
XGB
Mean Accuracy: 0.9337301587301587
Mean Balanced Accuracy: 0.9345194388944389
Mean Sensitivity: 0.9316579254079255
Mean Specificity: 0.9373809523809523
Mean Precision: 0.9295995670995671


### Darkness

In [11]:
RunAllModelsNormOversampledWithCrossValidation(DarknessActivityMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.7300853485064012
Mean Balanced Accuracy: 0.7315627571845602
Mean Sensitivity: 0.6949062049062048
Mean Specificity: 0.7682193094629156
Mean Precision: 0.7614606145488498
XGB
Mean Accuracy: 0.8967994310099572
Mean Balanced Accuracy: 0.8977221198097158
Mean Sensitivity: 0.893903318903319
Mean Specificity: 0.9015409207161126
Mean Precision: 0.895301841560423


### Charge

In [12]:
RunAllModelsNormOversampledWithCrossValidation(ChargeActivityMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.7017069701280227
Mean Balanced Accuracy: 0.7031156064811335
Mean Sensitivity: 0.5887945241892611
Mean Specificity: 0.8174366887730059
Mean Precision: 0.7600790059613589
XGB
Mean Accuracy: 0.9014935988620201
Mean Balanced Accuracy: 0.9019446462101935
Mean Sensitivity: 0.9169233690286323
Mean Specificity: 0.8869659233917545
Mean Precision: 0.8926738950763718


### Lock

In [13]:
RunAllModelsNormOversampledWithCrossValidation(LockActivityMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.660881934566145
Mean Balanced Accuracy: 0.6599929615448191
Mean Sensitivity: 0.6093135691278107
Mean Specificity: 0.7106723539618276
Mean Precision: 0.6796195652173914
XGB
Mean Accuracy: 0.8887624466571836
Mean Balanced Accuracy: 0.8824928237826846
Mean Sensitivity: 0.9038677084869035
Mean Specificity: 0.8611179390784655
Mean Precision: 0.8752211624543026


## Location Pairs

### Audio

In [14]:
RunAllModelsNormOversampledWithCrossValidation(AudioLocationMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.8456349206349205
Mean Balanced Accuracy: 0.8450652819402821
Mean Sensitivity: 0.9357925407925409
Mean Specificity: 0.7543380230880231
Mean Precision: 0.7971177944862156
XGB
Mean Accuracy: 0.9042328042328043
Mean Balanced Accuracy: 0.8991210178710178
Mean Sensitivity: 0.8935198135198135
Mean Specificity: 0.9047222222222222
Mean Precision: 0.9190974161717197


### Conversation

In [15]:
RunAllModelsNormOversampledWithCrossValidation(ConvoLocationMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.8568783068783068
Mean Balanced Accuracy: 0.8628268259518259
Mean Sensitivity: 0.9397445609945609
Mean Specificity: 0.7859090909090909
Mean Precision: 0.8149983130904183
XGB
Mean Accuracy: 0.9338624338624338
Mean Balanced Accuracy: 0.933901862026862
Mean Sensitivity: 0.9664743589743591
Mean Specificity: 0.9013293650793651
Mean Precision: 0.9107548518732729


### Activity

In [16]:
RunAllModelsNormOversampledWithCrossValidation(ActivityLocationMerged,PHQ9Post, 10)


SVM
Mean Accuracy: 0.7904761904761904
Mean Balanced Accuracy: 0.7923656204906205
Mean Sensitivity: 0.99375
Mean Specificity: 0.5909812409812409
Mean Precision: 0.7108136175904382
XGB
Mean Accuracy: 0.9226190476190477
Mean Balanced Accuracy: 0.9170079226329225
Mean Sensitivity: 0.922194055944056
Mean Specificity: 0.9118217893217893
Mean Precision: 0.9259399445583656


### Darkness

In [17]:
RunAllModelsNormOversampledWithCrossValidation(DarknessLocationMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.8339947089947091
Mean Balanced Accuracy: 0.8353236102133161
Mean Sensitivity: 0.9321611721611722
Mean Specificity: 0.73848604826546
Mean Precision: 0.7794561157796451
XGB
Mean Accuracy: 0.9079365079365079
Mean Balanced Accuracy: 0.9105918264741796
Mean Sensitivity: 0.9301648351648352
Mean Specificity: 0.8910188177835237
Mean Precision: 0.8959868421052631


### Charge

In [18]:
RunAllModelsNormOversampledWithCrossValidation(ChargeLocationMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.8238095238095238
Mean Balanced Accuracy: 0.8222012709512707
Mean Sensitivity: 0.939628010878011
Mean Specificity: 0.704774531024531
Mean Precision: 0.7598872683044509
XGB
Mean Accuracy: 0.8636243386243386
Mean Balanced Accuracy: 0.8653349428349429
Mean Sensitivity: 0.8896814296814297
Mean Specificity: 0.840988455988456
Mean Precision: 0.8623992673992674


### Lock

In [19]:
RunAllModelsNormOversampledWithCrossValidation(LockLocationMerged,PHQ9Post, 10)

SVM
Mean Accuracy: 0.857010582010582
Mean Balanced Accuracy: 0.8604786879786879
Mean Sensitivity: 0.9779487179487181
Mean Specificity: 0.743008658008658
Mean Precision: 0.7910674482036711
XGB
Mean Accuracy: 0.9298941798941799
Mean Balanced Accuracy: 0.9286998418248418
Mean Sensitivity: 0.9709945609945609
Mean Specificity: 0.8864051226551226
Mean Precision: 0.902215758098111
