In [14]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn import svm, datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Hyper-parameter ---> svm, RandomForestClassifier, LogisticRegression

In [2]:
iris = datasets.load_iris()

In [3]:
clf = GridSearchCV(svm.SVC(gamma='auto'), {'C': [1,3, 6, 10,20], 'kernel': ['rbf', 'linear']}, cv=5, return_train_score=False)
clf.fit(iris.data, iris.target)
myCvResults = clf.cv_results_

df = pd.DataFrame(data=myCvResults)
df.head(2)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001011,0.000165,0.000509,4.4e-05,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.00072,6.8e-05,0.00038,6.5e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1


In [4]:
# filter your dataframe
df[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [5]:
# check the best score
clf.best_score_

0.9800000000000001

In [6]:
# best params
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

# Dynamic hyper parameter tuning

In [7]:
modelAndParams = {
    'svm' : { 
        'model': svm.SVC(gamma='auto'), 
        'params': {'C': [1,10,20], 'kernel': ['rbf', 'linear']}
        },
    
    'random_forest' : { 
        'model': RandomForestClassifier(), 
        'params': {'n_estimators': [1, 5, 10]} 
        },
    
    'logestic_regression' : { 
        'model': LogisticRegression(solver='liblinear', multi_class='auto'),
        'params': {'C': [1,5,10]} 
    }
}



scores_box = []
for key, val in modelAndParams.items():
    clf = GridSearchCV(val['model'], val['params'], cv=5, return_train_score=False)
    clf.fit(iris.data, iris.target)
    scores_box.append({'model': key, 'best_score': clf.best_score_, 'best_params': clf.best_params_ })
    print(clf.best_score_)
newDf = pd.DataFrame(scores_box)
newDf

0.9800000000000001
0.9533333333333334
0.9666666666666668


Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.953333,{'n_estimators': 5}
2,logestic_regression,0.966667,{'C': 5}


# KNN Hyper-parameter

In [1]:
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
iris = datasets.load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

KNNmodel = KNeighborsClassifier(n_neighbors=5)

KNNmodel.fit(X_train, y_train)
xTrainPred = KNNmodel.predict(X_train)
xTrainAccu = accuracy_score(y_train, xTrainPred)
print('xTrainAccu:- >>> ', xTrainAccu)



xTestPred = KNNmodel.predict(X_test)
xTestAccu = accuracy_score(y_test, xTestPred)
print('xTestAccu:- >>> ', xTestAccu)
print(' ')



xTrainAccu:- >>>  0.9553571428571429
xTestAccu:- >>>  1.0
 


In [2]:
from sklearn.model_selection import GridSearchCV

grid_params = { 
               'n_neighbors' : [5,7,10, 11],
               'weights' : ['uniform','distance'],
               'metric' : ['minkowski','euclidean','manhattan']
                }

clf = GridSearchCV(KNNmodel, grid_params, verbose = 1, cv=3, n_jobs = -1)
clf.fit(X_train, y_train)
print('best-score >>>', clf.best_score_, clf.best_index_, clf.best_params_)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
best-score >>> 0.9732100521574206 20 {'metric': 'manhattan', 'n_neighbors': 10, 'weights': 'uniform'}


In [13]:
myDf= pd.DataFrame(data=clf.cv_results_)
# myDf
# myDf[['params', 'param_weights', 'mean_test_score']]