In [5]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.optimizers import SGD

from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV, KFold

# add confusion matrix, f1 + auc score, precision/recall + roc curves

In [6]:
data = pd.read_csv('../Data/clean_df_engineering.csv')
data.drop(['Course'], axis=1, inplace=True)

data.head()

Unnamed: 0,Age,Gender,CGPA,Stress_Level,Depression_Score,Anxiety_Score,Sleep_Quality,Physical_Activity,Diet_Quality,Social_Support,Relationship_Status,Substance_Use,Counseling_Service_Use,Family_History,Chronic_Illness,Financial_Stress,Extracurricular_Involvement,Semester_Credit_Load,Residence_Type
0,24,0,2.44,0,3,0,0.5,0.0,0.5,0.0,0.0,0.5,0.5,0,0,3,0.0,27,0.0
1,22,0,3.62,3,2,1,0.5,1.0,0.5,0.5,0.0,0.0,1.0,1,0,5,0.0,24,0.0
2,22,0,3.16,2,1,5,1.0,0.0,0.5,0.5,0.0,0.0,0.0,0,0,3,0.0,21,0.0
3,25,0,3.48,2,3,2,0.5,0.0,0.0,1.0,0.0,0.0,0.5,0,0,2,0.5,29,1.0
4,23,0,3.65,2,5,1,1.0,0.5,0.5,0.0,0.0,0.0,0.0,1,0,0,0.5,19,1.0


In [7]:
features = ['Age', 'CGPA', 'Semester_Credit_Load', 'Anxiety_Score']
N_FEATURES = len(features)

In [8]:
BINARY_CLASSIFICATION = True
BINARY_ENCODE_THRESHOLD = 3 # values greater than this will be encoded as 1

# for multi-class classification:
N_CLASSES = len(data['Depression_Score'].unique())

In [9]:
# Define build_model function for KerasClassifier
# def build_model(num_layers, num_neurons, activation, dropout_rate, momentum, reg_method, reg_rate, learning_rate):
def build_model(num_layers, num_neurons, activation, momentum, reg_method, reg_rate, learning_rate):
    regularizer = l1(reg_rate) if reg_method=='l1' else l2(reg_rate)

    model = Sequential()

    # Input layer
    model.add(Input((N_FEATURES,)))
    # Hidden layers
    for _ in range(num_layers):
        model.add(Dense(num_neurons, activation=activation, kernel_regularizer=regularizer))
        # model.add(Dropout(dropout_rate))
    # Output layer
    if BINARY_CLASSIFICATION:
        model.add(Dense(1, activation='sigmoid')) # binary classification
    else:
        model.add(Dense(N_CLASSES, activation='softmax')) # multi-class classification

    optimizer = SGD(learning_rate=learning_rate, momentum=momentum)
    loss_string = 'binary_crossentropy' if BINARY_CLASSIFICATION else 'sparse_categorical_crossentropy'
    model.compile(loss=loss_string, optimizer=optimizer, metrics=['accuracy'])
    return model

## engineering dataset

In [6]:
# Load dataset
X = data[features]

if BINARY_CLASSIFICATION:
    y = data['Depression_Score'].apply(lambda x: 1 if x > BINARY_ENCODE_THRESHOLD else 0)
else:
    y = data['Depression_Score']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12, shuffle=True)

# Preprocess data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define parameters grid for grid search
param_grid = {
    # parameters for Sequential model
    'model__num_layers': [2, 3], # number of hidden layers
    'model__num_neurons': [32, 64],
    'model__dropout_rate': [0.2, 0.5],
    'model__momentum': [0.5, 0.9],
    'model__activation': ['relu', 'tanh'],
    'model__reg_method': ['l1', 'l2'],
    'model__reg_rate': [0.001, 0.01, 0.1],
    'model__learning_rate': [0.001, 0.01, 0.1],
    # parameters for KerasClassifier
    'batch_size': [32, 64]
}

# Create KerasClassifier wrapper for scikit-learn
model = KerasClassifier(build_fn=build_model, epochs=10, verbose=1)

# Perform grid search with cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=kfold, scoring='accuracy')
grid_result = grid_search.fit(X_train, y_train)

  _data = np.array(data, dtype=dtype, copy=copy,
  X, y = self._initialize(X, y)


Epoch 1/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6940 - loss: 1.3467  
Epoch 2/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7173 - loss: 1.3377 
Epoch 3/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7403 - loss: 1.3352 
Epoch 4/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7330 - loss: 1.3323 
Epoch 5/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7682 - loss: 1.3276 
Epoch 6/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7964 - loss: 1.3121 
Epoch 7/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7693 - loss: 1.3094 
Epoch 8/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7753 - loss: 1.3086 
Epoch 9/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━

In [7]:
# Print results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, std, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, std, param))

Best: 0.809709 using {'batch_size': 64, 'model__activation': 'relu', 'model__dropout_rate': 0.5, 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 64, 'model__reg_method': 'l2', 'model__reg_rate': 0.01}
0.808367 (0.024431) with: {'batch_size': 32, 'model__activation': 'relu', 'model__dropout_rate': 0.2, 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 32, 'model__reg_method': 'l1', 'model__reg_rate': 0.001}
0.808367 (0.024431) with: {'batch_size': 32, 'model__activation': 'relu', 'model__dropout_rate': 0.2, 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 32, 'model__reg_method': 'l1', 'model__reg_rate': 0.01}
0.808367 (0.024431) with: {'batch_size': 32, 'model__activation': 'relu', 'model__dropout_rate': 0.2, 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 32, 'model__reg_method':

## iteration 2

going to stick w relu activation and no dropout (commented out in build function)

In [10]:
BINARY_CLASSIFICATION = True
BINARY_ENCODE_THRESHOLD = 4 # values greater than this will be encoded as 1

In [11]:
# Load dataset
X = data[features]

if BINARY_CLASSIFICATION:
    y = data['Depression_Score'].apply(lambda x: 1 if x > BINARY_ENCODE_THRESHOLD else 0)
else:
    y = data['Depression_Score']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12, shuffle=True)

# Preprocess data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define parameters grid for grid search
param_grid = {
    # parameters for Sequential model
    'model__num_layers': [2, 3, 4], # number of hidden layers
    'model__num_neurons': [8, 10, 12],
    # 'model__dropout_rate': [0.2, 0.5],
    'model__momentum': [0.5, 0.9],
    'model__activation': ['relu'],
    'model__reg_method': ['l1', 'l2'],
    'model__reg_rate': [0.001, 0.01],
    'model__learning_rate': [0.001, 0.01],
    # parameters for KerasClassifier
    'batch_size': [32, 64]
}

# Create KerasClassifier wrapper for scikit-learn
model = KerasClassifier(build_fn=build_model, epochs=10, verbose=1)

# Perform grid search with cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=kfold, scoring='accuracy')
grid_result = grid_search.fit(X_train, y_train)

  _data = np.array(data, dtype=dtype, copy=copy,
  X, y = self._initialize(X, y)


Epoch 1/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8121 - loss: 0.6751   
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8398 - loss: 0.6589 
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8630 - loss: 0.6398 
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8899 - loss: 0.6216 
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9076 - loss: 0.6018 
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9079 - loss: 0.5873 
Epoch 7/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9076 - loss: 0.5724 
Epoch 8/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9014 - loss: 0.5590 
Epoch 9/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━

In [12]:
# Print results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, std, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, std, param))

Best: 0.911499 using {'batch_size': 32, 'model__activation': 'relu', 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 12, 'model__reg_method': 'l2', 'model__reg_rate': 0.001}
0.910157 (0.020720) with: {'batch_size': 32, 'model__activation': 'relu', 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 8, 'model__reg_method': 'l1', 'model__reg_rate': 0.001}
0.894157 (0.026677) with: {'batch_size': 32, 'model__activation': 'relu', 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 8, 'model__reg_method': 'l1', 'model__reg_rate': 0.01}
0.872582 (0.076356) with: {'batch_size': 32, 'model__activation': 'relu', 'model__learning_rate': 0.001, 'model__momentum': 0.5, 'model__num_layers': 2, 'model__num_neurons': 8, 'model__reg_method': 'l2', 'model__reg_rate': 0.001}
0.837673 (0.117275) with: {'batch_size': 32, 'model__activation': 'relu', 'model_