## Teste Cross Validate

In [1]:
from src.si.io.csv_file import read_csv
from sklearn.preprocessing import StandardScaler
from src.si.model_selection.cross_validate import cross_validate
from src.si.model_selection.grid_search import grid_search_cv
from src.si.model_selection.randomize_grid_search import randomized_search_cv
from src.si.linear_model.logistic_regression import LogisticRegression
import numpy as np

In [3]:
dataset = read_csv(r"C:\Users\35193\Desktop\sib\SIB\datasets\breast-bin.csv",label=True)
dataset.X = StandardScaler().fit_transform(dataset.X)
lg_model = LogisticRegression()
score = cross_validate(lg_model, dataset=dataset,cv=5)
score

{'seeds': [965, 116, 45, 612, 421],
 'train': [0.9695885509838998,
  0.9677996422182469,
  0.9713774597495528,
  0.964221824686941,
  0.9731663685152058],
 'test': [0.9568345323741008,
  0.9640287769784173,
  0.9496402877697842,
  0.9784172661870504,
  0.9424460431654677]}

## Grid Search test

In [5]:
lg_model = LogisticRegression()
lg_model_parameters = {'l2_penalty': [1, 10],
             'alpha': [0.001, 0.0001],
            'max_iter': [1000, 2000]}

scores = grid_search_cv(lg_model, dataset, lg_model_parameters, cv=3)
scores

[{'seeds': [433, 141, 550],
  'train': [0.960644007155635, 0.962432915921288, 0.9677996422182469],
  'test': [0.9928057553956835, 0.9856115107913669, 0.9640287769784173],
  'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}},
 {'seeds': [965, 116, 45],
  'train': [0.9695885509838998, 0.9695885509838998, 0.9713774597495528],
  'test': [0.9568345323741008, 0.9640287769784173, 0.9496402877697842],
  'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}},
 {'seeds': [612, 421, 700],
  'train': [0.964221824686941, 0.9731663685152058, 0.964221824686941],
  'test': [0.9784172661870504, 0.9424460431654677, 0.9784172661870504],
  'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}},
 {'seeds': [555, 572, 70],
  'train': [0.9695885509838998, 0.9731663685152058, 0.964221824686941],
  'test': [0.9568345323741008, 0.9424460431654677, 0.9784172661870504],
  'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}},
 {'seeds': [768, 128, 827],
  'tr

## Randomized Search test

In [6]:
lg_model = LogisticRegression()

lg_model_parameters = {'l2_penalty': np.linspace(1, 10, 10),
             'alpha': np.linspace(0.001, 0.0001, 100),
            'max_iter': np.linspace(1000, 2000, 200)}

scores = randomized_search_cv(lg_model, dataset, lg_model_parameters, cv=3)
scores

{'parameters': [{'l2_penalty': 8.0,
   'alpha': 0.0008818181818181819,
   'max_iter': 1025.1256281407036},
  {'l2_penalty': 4.0,
   'alpha': 0.0005454545454545455,
   'max_iter': 1874.3718592964824},
  {'l2_penalty': 3.0,
   'alpha': 0.0002909090909090909,
   'max_iter': 1934.6733668341708},
  {'l2_penalty': 9.0,
   'alpha': 0.0005181818181818182,
   'max_iter': 1693.4673366834172},
  {'l2_penalty': 8.0, 'alpha': 0.0001, 'max_iter': 1587.9396984924624},
  {'l2_penalty': 3.0,
   'alpha': 0.0004090909090909091,
   'max_iter': 1904.5226130653268},
  {'l2_penalty': 7.0,
   'alpha': 0.0007272727272727272,
   'max_iter': 1673.3668341708544},
  {'l2_penalty': 1.0,
   'alpha': 0.0008818181818181819,
   'max_iter': 1994.9748743718594},
  {'l2_penalty': 9.0,
   'alpha': 0.0005181818181818182,
   'max_iter': 1155.7788944723618},
  {'l2_penalty': 1.0,
   'alpha': 0.00011818181818181817,
   'max_iter': 1025.1256281407036}],
 'seeds': [592, 283, 783, 255, 134, 51, 51, 37, 777, 30],
 'train': [[0.964