In [300]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris


In [301]:
iris = load_iris()

In [302]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [303]:
df = pd.DataFrame(iris.data, columns = iris.feature_names)

In [304]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [305]:
df['flower']  = iris['target']

In [306]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [307]:
df.flower = df.flower.apply(lambda x: iris.target_names[x])

In [308]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [309]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.2)

In [310]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(x_train, y_train)

In [311]:
svm.score(x_test, y_test)

1.0

In [312]:
from sklearn.model_selection import cross_val_score

In [313]:
cross_val_score(SVC(kernel = 'rbf', C = 10, gamma = 'auto'), x_train, y_train, cv = 5)

array([1.        , 0.95833333, 1.        , 0.95833333, 1.        ])

In [314]:
cross_val_score(SVC(kernel = 'linear', C = 10, gamma = 'auto'), x_train, y_train, cv = 5)

array([0.95833333, 0.95833333, 1.        , 0.91666667, 1.        ])

In [315]:
cross_val_score(SVC(kernel = 'rbf', C = 1, gamma = 'auto'), x_train, y_train, cv = 5)

array([0.95833333, 1.        , 1.        , 0.95833333, 1.        ])

In [316]:
cross_val_score(SVC(kernel = 'linear', C = 20, gamma = 'auto'), x_train, y_train, cv = 5)

array([0.95833333, 0.95833333, 1.        , 0.91666667, 1.        ])

In [317]:
kernel = ['linear', 'rbf']
C = [1, 10, 20]
scores = {}
for k in kernel:
    for c in C:
        scores[k + '_'+ str(c)] = (cross_val_score(SVC(kernel = k, C = c, gamma = 'auto'), x_train, y_train, cv = 5)).mean()

In [318]:
scores

{'linear_1': 0.9916666666666668,
 'linear_10': 0.9666666666666668,
 'linear_20': 0.9666666666666668,
 'rbf_1': 0.9833333333333334,
 'rbf_10': 0.9833333333333334,
 'rbf_20': 0.9583333333333333}

In [319]:
from sklearn.model_selection import GridSearchCV

In [320]:
from sklearn.model_selection import GridSearchCV
gscv = GridSearchCV( SVC(gamma = 'auto'), 
    { 'kernel': ['linear', 'rbf'], 'C': [1, 10, 20]}, 
    cv = 5, return_train_score= False 
    )
gscv.fit(x_train, y_train)
gscv.score(x_test, y_test)

0.9666666666666667

In [321]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [322]:
param_model = {
    
    'svm':{
        'model': SVC(gamma = 'auto'), 
        'params': {'C' : [1, 10, 20], 'kernel' : ['rbf', 'linear']}
    }, 
    'linear_regression':{
        
        'model': LogisticRegression(solver = 'liblinear', multi_class = 'auto' ), 
        'params': {'C' : [1, 10, 20]}
        
    }, 
    'random_forest':{
        'model': RandomForestClassifier(),
        'params': {'n_estimators': [1, 5, 10]}
    }
    
}

In [323]:
from sklearn.model_selection import GridSearchCV
scores = []
for model_name, mp in param_model.items():
    gscv = GridSearchCV(mp['model'], mp['params'], cv = 5, return_train_score = False)
    gscv.fit(x_train, y_train)
    gscv.score(x_test, y_test)
    scores.append({
        'model': model_name, 
        'best_score': gscv.best_score_,
        'best_params': gscv.best_params_
    })

In [324]:
scores

[{'model': 'svm',
  'best_score': 0.9916666666666668,
  'best_params': {'C': 1, 'kernel': 'linear'}},
 {'model': 'linear_regression', 'best_score': 0.975, 'best_params': {'C': 10}},
 {'model': 'random_forest',
  'best_score': 0.9666666666666666,
  'best_params': {'n_estimators': 5}}]

In [333]:
data = pd.DataFrame(scores, columns = ['model', 'best_score', 'best_params'])

In [334]:
data

Unnamed: 0,model,best_score,best_params
0,svm,0.991667,"{'C': 1, 'kernel': 'linear'}"
1,linear_regression,0.975,{'C': 10}
2,random_forest,0.966667,{'n_estimators': 5}
