# Hyperparameter Tuning

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [15]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X, y = data.data, data.target

In [16]:
model_params = {
    'knn': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 10]
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [1, 5, 10]
        }
    },
}

In [17]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import pandas as pd
scores = []

for model_name, mp in model_params.items():
    print('Running', model_name)
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(X, y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

df2 = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df2

Running knn


Running random_forest


Unnamed: 0,model,best_score,best_params
0,knn,0.93147,{'n_neighbors': 10}
1,random_forest,0.957848,{'n_estimators': 10}


From the Grid Search, the values of n is 10 for KNN for the breast cancer dataset.

In [18]:
scores = []

for model_name, mp in model_params.items():
    print('Running', model_name)
    clf = RandomizedSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(X, y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

df3 = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df3

Running knn
Running random_forest


Unnamed: 0,model,best_score,best_params
0,knn,0.93147,{'n_neighbors': 10}
1,random_forest,0.950815,{'n_estimators': 10}
