# 06 Cross-Validation Study

In this notebook, we use cross-validation to tune hyperparameters of the logistic regression function.

In [18]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
# Imports
import sys
import os

import numpy as np

from data_loading import load_data
from models import logistic_regression, reg_l2_logistic_regression, reg_l1_logistic_regression
from train_pipeline import grid_search_with_cross_validation

#### Data loading and std processing

In [20]:
# Load and preprocessed data
data_path = "../data"
x_train, x_test, y_train, train_ids, test_ids, feature_names, feature_dict = load_data(data_path, preprocessed=True)

Loading raw data...
Data loaded.
Preprocessing data...
Data preprocessed.


#### Logistic Regresion Cross Val

In [26]:
# Model parameters
# Grid search for hyperparameters
gammas = [0.01]
thresholds = [0.2, 0.5, 0.7]

max_iters = 500
class_weights = [1, 4.5, 5, 5.5]

hyperparameters_ranges = {
    "gamma": gammas,
    "threshold": thresholds,
    "max_iters": [max_iters],
    "class_weight": class_weights,
}


In [27]:
# Grid search with cross validation
best_params, best_avg_f1_score, best_avg_accuracy, best_weights = grid_search_with_cross_validation(
    x_train, y_train, logistic_regression, hyperparameters_ranges, n_splits=3, verbose=True
)

Fold 1/3, Threshold: 0.2, F1 Score: 0.3638, Accuracy: 0.8672
Fold 1/3, Threshold: 0.5, F1 Score: 0.0268, Accuracy: 0.9118
Fold 1/3, Threshold: 0.7, F1 Score: 0.0004, Accuracy: 0.9117
Fold 2/3, Threshold: 0.2, F1 Score: 0.3727, Accuracy: 0.8699
Fold 2/3, Threshold: 0.5, F1 Score: 0.0279, Accuracy: 0.9121
Fold 2/3, Threshold: 0.7, F1 Score: 0.0002, Accuracy: 0.9117
Fold 3/3, Threshold: 0.2, F1 Score: 0.3621, Accuracy: 0.8673
Fold 3/3, Threshold: 0.5, F1 Score: 0.0294, Accuracy: 0.9120
Fold 3/3, Threshold: 0.7, F1 Score: 0.0006, Accuracy: 0.9117
********************
New best F1 Score: 0.3662 with parameters {'gamma': 0.01, 'max_iters': 500, 'class_weight': 1, 'threshold': 0.2}
********************
Fold 1/3, Threshold: 0.2, F1 Score: 0.2970, Accuracy: 0.6189
Fold 1/3, Threshold: 0.5, F1 Score: 0.3984, Accuracy: 0.8561
Fold 1/3, Threshold: 0.7, F1 Score: 0.2865, Accuracy: 0.9073
Fold 2/3, Threshold: 0.2, F1 Score: 0.2961, Accuracy: 0.6182
Fold 2/3, Threshold: 0.5, F1 Score: 0.3998, Accuracy

In [29]:
# Model parameters
# Grid search for hyperparameters
gammas = [0.1]
thresholds = [0.2, 0.5, 0.7]

max_iters = 500
class_weights = [1, 4.5, 5, 5.5]

hyperparameters_ranges = {
    "gamma": gammas,
    "threshold": thresholds,
    "max_iters": [max_iters],
    "class_weight": class_weights,
}

# Grid search with cross validation
best_params, best_avg_f1_score, best_avg_accuracy, best_weights = grid_search_with_cross_validation(
    x_train, y_train, logistic_regression, hyperparameters_ranges, n_splits=3, verbose=True)

Fold 1/3, Threshold: 0.2, F1 Score: 0.3635, Accuracy: 0.7541
Fold 1/3, Threshold: 0.5, F1 Score: 0.4073, Accuracy: 0.8838
Fold 1/3, Threshold: 0.7, F1 Score: 0.3089, Accuracy: 0.9092
Fold 2/3, Threshold: 0.2, F1 Score: 0.0000, Accuracy: 0.9117
Fold 2/3, Threshold: 0.5, F1 Score: 0.0000, Accuracy: 0.9117
Fold 2/3, Threshold: 0.7, F1 Score: 0.0000, Accuracy: 0.9117
Fold 3/3, Threshold: 0.2, F1 Score: 0.3406, Accuracy: 0.9048
Fold 3/3, Threshold: 0.5, F1 Score: 0.0831, Accuracy: 0.9134
Fold 3/3, Threshold: 0.7, F1 Score: 0.0154, Accuracy: 0.9122
********************
New best F1 Score: 0.2347 with parameters {'gamma': 0.1, 'max_iters': 500, 'class_weight': 1, 'threshold': 0.2}
********************
Fold 1/3, Threshold: 0.2, F1 Score: 0.2347, Accuracy: 0.9116
Fold 1/3, Threshold: 0.5, F1 Score: 0.1770, Accuracy: 0.9134
Fold 1/3, Threshold: 0.7, F1 Score: 0.1371, Accuracy: 0.9133
Fold 2/3, Threshold: 0.2, F1 Score: 0.2764, Accuracy: 0.5639
Fold 2/3, Threshold: 0.5, F1 Score: 0.2966, Accuracy:

In [30]:
# Model parameters
# Grid search for hyperparameters
gammas = [0.1, 0.01]
thresholds = [0.2, 0.5, 0.7]
max_iters = 500
class_weights = [1, 4.5, 5, 5.5]
lambdas = [0.1, 0.01]

hyperparameters_ranges = {
    "gamma": gammas,
    "threshold": thresholds,
    "max_iters": [max_iters],
    "class_weight": class_weights,
    "lambda_": lambdas,
}

# Grid search with cross validation
best_params, best_avg_f1_score, best_avg_accuracy, best_weights = grid_search_with_cross_validation(
    x_train, y_train, reg_l2_logistic_regression, hyperparameters_ranges, n_splits=3, verbose=True
)


Fold 1/3, Threshold: 0.2, F1 Score: 0.0302, Accuracy: 0.9120
Fold 1/3, Threshold: 0.5, F1 Score: 0.0000, Accuracy: 0.9117
Fold 1/3, Threshold: 0.7, F1 Score: 0.0000, Accuracy: 0.9117
Fold 2/3, Threshold: 0.2, F1 Score: 0.0000, Accuracy: 0.9117
Fold 2/3, Threshold: 0.5, F1 Score: 0.0000, Accuracy: 0.9117
Fold 2/3, Threshold: 0.7, F1 Score: 0.0000, Accuracy: 0.9117
Fold 3/3, Threshold: 0.2, F1 Score: 0.0000, Accuracy: 0.9117
Fold 3/3, Threshold: 0.5, F1 Score: 0.0000, Accuracy: 0.9117
Fold 3/3, Threshold: 0.7, F1 Score: 0.0000, Accuracy: 0.9117
********************
New best F1 Score: 0.0101 with parameters {'gamma': 0.1, 'max_iters': 500, 'class_weight': 1, 'lambda_': 0.1, 'threshold': 0.2}
********************
Fold 1/3, Threshold: 0.2, F1 Score: 0.3129, Accuracy: 0.6520
Fold 1/3, Threshold: 0.5, F1 Score: 0.4023, Accuracy: 0.8331
Fold 1/3, Threshold: 0.7, F1 Score: 0.3747, Accuracy: 0.8890
Fold 2/3, Threshold: 0.2, F1 Score: 0.1482, Accuracy: 0.9133
Fold 2/3, Threshold: 0.5, F1 Score: 0

KeyboardInterrupt: 

In [None]:
# Model parameters
# Grid search for hyperparameters
gammas = [0.1, 0.01]
thresholds = [0.2, 0.5, 0.7]
max_iters = 500
class_weights = [1, 4.5, 5, 5.5]
lambdas = [0.1, 0.01]

hyperparameters_ranges = {
    "gamma": gammas,
    "threshold": thresholds,
    "max_iters": [max_iters],
    "class_weight": class_weights,
    "lambda_": lambdas,
}

# Grid search with cross validation
best_params, best_avg_f1_score, best_avg_accuracy, best_weights = grid_search_with_cross_validation(
    x_train, y_train, reg_l1_logistic_regression, hyperparameters_ranges, n_splits=3, verbose=True
)

In [None]:
7