# ML MODEL CREATION

Importing the necessary libraries

In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

Importing the Data

In [17]:
df = pd.read_excel("Data/ML_Data.xlsx")

Splitting training and testing Data

In [18]:
# Splitting the dataset into the features and the target variable
X = df.drop('depression_status', axis=1)  # Features
y = df['depression_status']  # Target variable

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Training the models

In [33]:
# Set random seed for reproducibility
np.random.seed(42)

# Define the models
models = {
    'Logistic Regression': LogisticRegression(penalty='l2', random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': SVC(kernel='rbf', random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'KNN': KNeighborsClassifier() 

}

# Prepare cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Collect results
results = []

for name, model in models.items():
    cv_results = cross_validate(model, X_train, y_train, cv=cv, scoring=['accuracy', 'precision', 'recall', 'f1'], return_train_score=False)
    results.append({
        'Model': name,
        'Accuracy': np.mean(cv_results['test_accuracy']),
        'Precision': np.mean(cv_results['test_precision']),
        'Recall': np.mean(cv_results['test_recall']),
        'F1 Score': np.mean(cv_results['test_f1'])
    })

# Create DataFrame to display results
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
0,Logistic Regression,0.700893,0.695988,0.706855,0.700195
1,Random Forest,0.79246,0.800972,0.770363,0.784725
2,SVM,0.795337,0.772604,0.833871,0.801039
3,Gradient Boosting,0.75129,0.761811,0.725202,0.74192
4,KNN,0.757639,0.692027,0.923589,0.790666


Feature Selection