[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Omerdan03/DanzDSTools/blob/master/Model_selection.ipynb)

In [None]:
# cross validation k-Fold

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

reg = LogisticRegression()

kf = KFold(n_splits=10, shuffle=True, random_state=42)
for train_index, val_index in kf.split(X_train):
    X_train2, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train2, val = y[train_index], y[val_index]
    
final_score = cross_val_score(reg, X_train, y_train, cv= kf, scoring="accuracy")
print(f'Scores for each fold: {final_score}')
print('Final Model Score: %.2f' %(final_score.mean()))

In [None]:
# grid search CV

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit

cls = LogisticRegression()

params_dict = {'penalty': ['l1', 'l2', 'elasticnet'],
               'tol': [10**-5, 10**-4, 10**-3],
               'C': [1.5, 1, 0.7],
               'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

print(f'{type(reg).__name__} Tuning hyper-parameters with grid')

ss_cv = ShuffleSplit(n_splits=5)

clf_forest = GridSearchCV(cls, params_dict, cv = ss_cv, verbose=10, n_jobs=-1)

clf_forest.fit(X_train, y_train)

print("Best parameters set found on validation set:")
print(clf_forest.best_params_, '\n')

In [None]:
# Random search CV

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import ShuffleSplit

reg = LogisticRegression()

params_dict = {'penalty': ['l1', 'l2', 'elasticnet'],
               'tol': [10**-6, 10**-5, 10**-4, 10**-3],
               'C': [2, 1.5, 1, 0.7],
               'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

print(f'{type(reg).__name__} Tuning hyper-parameters with grid')

ss_cv = ShuffleSplit(n_splits=5)

clf_forest = RandomizedSearchCV(reg, params_dict, random_state=42, 
                                cv = ss_cv, verbose=10, n_iter=200, n_jobs=-1)

clf_forest.fit(X_train, y_train)

print("Best parameters set found on validation set:")
print(clf_forest.best_params_, '\n')