### Now hyperparameter tuning using randomized search cv

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline


from importnb import imports
with imports("ipynb"):
    import setUp 
    
X = setUp.X
y = setUp.y
X_train , X_test , y_train , y_test = setUp.X_train , setUp.X_test , setUp.y_train , setUp.y_test

In [2]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

model.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'deprecated',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [3]:
np.random.seed(42)

#for logistic regression:
log_reg_grid = {
    "C" : np.logspace(-4 , 4 , 20),
    "solver" : ['liblinear']
}

#for random forest:
rf_grid = {
    "n_estimators" : np.arange(10 , 1000 , 50),
    "max_depth" :[None , 3 , 5 , 10],
    "min_samples_split" : np.arange(2 , 20 , 2),
    "min_samples_leaf" : np.arange(2 , 20 , 2),
}

In [4]:
np.random.seed(42)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV


#for logistic regression
rs_log_reg = RandomizedSearchCV(
    LogisticRegression(),
    param_distributions=log_reg_grid,
    cv=5,
    n_iter=20,
    verbose=1,
    n_jobs=-1,
)

rs_log_reg.fit(X_train, y_train)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [5]:
rs_log_reg.best_params_

{'solver': 'liblinear', 'C': 0.23357214690901212}

In [6]:
rs_log_reg.score(X_test, y_test)

0.8852459016393442

In [7]:
np.random.seed(42)


#for random forest

rs_rf = RandomizedSearchCV(
    RandomForestClassifier(),
    param_distributions=rf_grid,
    cv=5,
    n_iter=20,
    verbose=1,
    n_jobs=-1,
)

rs_rf.fit(X_train, y_train)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [8]:
rs_rf.best_params_  

{'n_estimators': 560,
 'min_samples_split': 12,
 'min_samples_leaf': 16,
 'max_depth': 10}

In [9]:
rs_rf.score(X_test, y_test)

0.8688524590163934

Our base model of logistic regression did 0.88.

Hypertunning random forest is below 0.88

So we will try more hyperparameter tuning on logistic regression