SIB - P7 A Jupyter Notebook with examples of how to use cross validation and grid search. 30/10/2023

In [1]:
# imports
from si.io.csv_file import read_csv
from si.models.logistic_regression import LogisticRegression
from si.model_selection.cross_validation import k_fold_cross_validation
from si.model_selection.grid_search import grid_search_cv

In [2]:
# datasets
breast_bin_dataset = read_csv(r'C:\Users\pc\PycharmProjects\si\datasets\breast_bin\breast-bin.csv', features=False, label=True)

In [3]:
# cross validation
lg = LogisticRegression()
scores = k_fold_cross_validation(lg, breast_bin_dataset, cv=5)
scores

[0.9712230215827338,
 0.9568345323741008,
 0.9424460431654677,
 0.9856115107913669,
 0.9784172661870504]

In [4]:
# grid search cv

lg = LogisticRegression()

# parameter grid
parameter_grid = {
    'l2_penalty': (1, 10),
    'alpha': (0.001, 0.0001, 0.00001),
    'max_iter': (1000, 2000, 3000, 4000, 5000, 6000)
}

# cross validate the model
scores = grid_search_cv(lg,
                        breast_bin_dataset,
                        hyperparameter_grid=parameter_grid,
                        cv=3)

scores

{'scores': [0.9669540229885056,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885056,
  0.9669540229885057,
  0.9655172413793104,
  0.9669540229885057,
  0.9669540229885057,
  0.9655172413793104,
  0.9669540229885056,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9655172413793104,
  0.9669540229885056,
  0.9669540229885056,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9655172413793104,
  0.9669540229885057,
  0.9683908045977012,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9655172413793104,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885056,
  0.9669540229885057],
 'hyperparameters': [{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000},
  {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000},
  {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 3000},
  {'l2_penalt

In [5]:
#scores
scores['scores']

[0.9669540229885056,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885056,
 0.9669540229885057,
 0.9655172413793104,
 0.9669540229885057,
 0.9669540229885057,
 0.9655172413793104,
 0.9669540229885056,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9655172413793104,
 0.9669540229885056,
 0.9669540229885056,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9655172413793104,
 0.9669540229885057,
 0.9683908045977012,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9655172413793104,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885056,
 0.9669540229885057]

In [6]:
# best hyperparameters
scores['best_hyperparameters']

{'l2_penalty': 10, 'alpha': 0.0001, 'max_iter': 1000}

In [7]:
# best score
scores['best_score']

0.9683908045977012