In [None]:
# cross validation k-Fold

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

reg = LogisticRegression()

kf = KFold(n_splits=10, shuffle=True, random_state=42)
for train_index, val_index in kf.split(X_train):
    X_train2, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train2, val = y[train_index], y[val_index]
    
final_score = cross_val_score(reg, X_train, y_train, cv= kf, scoring="accuracy")
print(f'Scores for each fold: {final_score}')
print('Final Model Score: %.2f' %(final_score.mean()))

In [None]:
# grid search CV

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit

clf = RandomForestClassifier()

params_dict = {'n_estimators': [50, 100, 150],
               'criterion': ["gini", "entropy"],
               'max_depth': [None, 50, 100]}

print(f'{type(reg).__name__} Tuning hyper-parameters with grid')

ss_cv = ShuffleSplit(n_splits=5)

clf_forest = GridSearchCV(cls, params_dict, cv = ss_cv, verbose=10, n_jobs=-1)

clf_forest.fit(X_train, y_train)

print("Best parameters set found on validation set:")
print(clf_forest.best_params_, '\n')

In [None]:
# Random search CV

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import ShuffleSplit

clf = RandomForestClassifier()

params_dict = {'n_estimators': [50, 100, 150],
               'criterion': ["gini", "entropy"],
               'max_depth': [None, 50, 100]}

print(f'{type(clf).__name__} Tuning hyper-parameters with grid')

ss_cv = ShuffleSplit(n_splits=5)

clf_forest = RandomizedSearchCV(clf, params_dict, random_state=42, 
                                cv = ss_cv, verbose=10, n_iter=200, n_jobs=-1)

clf_forest.fit(X_train, y_train)

print("Best parameters set found on validation set:")
print(clf_forest.best_params_, '\n')

In [None]:
# Pipeline

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

steps = []
steps.append(('scaler', StandardScaler()))
steps.append(('clf', LinearSVC()))

pipeline = Pipeline(steps=steps)

pipeline.fit(X_train, y_train);
preds = pipeline.predict(X_val)

In [None]:
# CV pipeline

param_grid = {
    'clf__C': np.logspace(1, 3, num=10),
    'clf__class_weight': [None, 'balanced']
}

from sklearn.model_selection import GridSearchCV
def cv_fit_pipeline(X_train, y_train, pipeline, param_grid):
    gs = GridSearchCV(estimator=pipeline, param_grid=param_grid, scoring='f1',
                  cv=3, n_jobs=-1, verbose=10)
    gs_results = gs.fit(X_train, y_train)
    return gs_results


gs_results = cv_fit_pipeline(X_train, y_train, pipeline, param_grid)