In [36]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
import numpy as np

In [2]:
df = load_iris()

In [3]:
dir(df)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [4]:
iris_df = pd.DataFrame(df.data, columns=df.feature_names)
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [5]:
iris_df["flower"] = df.target
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [6]:
iris_df['flower'] = iris_df['flower'].apply(lambda x: df.target_names[x])
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [12]:
X_train, X_test, y_train, y_test = train_test_split(iris_df.drop(['flower'],axis=1), df.target, test_size=0.3)

In [13]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(105, 4) (45, 4) (105,) (45,)


In [28]:
cross_val_score(SVC(kernel = "linear", C=10, gamma = "auto"), X_train, y_train ,cv=6)

array([1.        , 0.88888889, 0.88888889, 0.94117647, 0.94117647,
       1.        ])

In [30]:
cross_val_score(SVC(kernel = "poly", C=10,gamma = "auto"), X_train, y_train ,cv=6)

array([0.94444444, 0.94444444, 0.83333333, 0.94117647, 0.88235294,
       1.        ])

In [31]:
cross_val_score(SVC(kernel = "sigmoid",C=10, gamma = "auto"), X_train, y_train ,cv=6)

array([0.33333333, 0.27777778, 0.33333333, 0.17647059, 0.23529412,
       0.17647059])

In [32]:
cross_val_score(SVC(kernel = "rbf", C=10,gamma = "auto"), X_train, y_train ,cv=6)

array([1.        , 0.94444444, 0.83333333, 0.82352941, 0.88235294,
       1.        ])

In [39]:
kernels = ["rbf", "linear", "sigmoid", "poly"]
c = [1,10,20,100]
arg_scores = {}
for kval in kernels:
    for cval in c:
        cv_score = cross_val_score(SVC(kernel=kval, C=cval, gamma="auto"), X_train, y_train, cv=6)
        arg_scores[kval + "_" + str(cval)] = np.average(cv_score)
        
arg_scores

{'rbf_1': 0.9330065359477123,
 'rbf_10': 0.9139433551198257,
 'rbf_20': 0.914488017429194,
 'rbf_100': 0.9052287581699346,
 'linear_1': 0.9330065359477123,
 'linear_10': 0.9433551198257081,
 'linear_20': 0.9340958605664489,
 'linear_100': 0.9340958605664489,
 'sigmoid_1': 0.25544662309368193,
 'sigmoid_10': 0.25544662309368193,
 'sigmoid_20': 0.25544662309368193,
 'sigmoid_100': 0.25544662309368193,
 'poly_1': 0.9335511982570806,
 'poly_10': 0.9242919389978214,
 'poly_20': 0.9242919389978214,
 'poly_100': 0.9052287581699346}

In [40]:
from sklearn.model_selection import GridSearchCV

In [43]:
clf = GridSearchCV(SVC(),{"gamma":["scale", "auto"], 
                              "kernel":["linear", "rbf", "sigmoid", "poly"], 
                              "C":[1,10,20,10]}, 
                   cv=6, return_train_score=False)

In [45]:
clf.fit(X_train,y_train)

GridSearchCV(cv=6, estimator=SVC(),
             param_grid={'C': [1, 10, 20, 10], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'sigmoid', 'poly']})

In [58]:
df = pd.DataFrame(clf.cv_results_)

In [59]:
df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,mean_test_score,std_test_score,rank_test_score
0,0.011338,0.003499,0.007895,0.002994,1,scale,linear,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",1.0,0.944444,0.888889,0.882353,0.882353,1.0,0.933007,0.051937,11
1,0.009516,0.003092,0.008331,0.002359,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.944444,0.944444,0.944444,0.882353,0.882353,1.0,0.933007,0.04085,11
2,0.014183,0.001871,0.007686,0.002351,1,scale,sigmoid,"{'C': 1, 'gamma': 'scale', 'kernel': 'sigmoid'}",0.333333,0.277778,0.333333,0.117647,0.235294,0.235294,0.255447,0.073521,25
3,0.007504,0.002502,0.005002,8e-06,1,scale,poly,"{'C': 1, 'gamma': 'scale', 'kernel': 'poly'}",1.0,0.888889,0.833333,0.941176,0.882353,1.0,0.924292,0.06196,17
4,0.008334,0.002358,0.0075,0.002508,1,auto,linear,"{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}",1.0,0.944444,0.888889,0.882353,0.882353,1.0,0.933007,0.051937,11


In [60]:
df = df[["param_C", "param_gamma", "param_kernel", "mean_test_score"]]
df

Unnamed: 0,param_C,param_gamma,param_kernel,mean_test_score
0,1,scale,linear,0.933007
1,1,scale,rbf,0.933007
2,1,scale,sigmoid,0.255447
3,1,scale,poly,0.924292
4,1,auto,linear,0.933007
5,1,auto,rbf,0.933007
6,1,auto,sigmoid,0.255447
7,1,auto,poly,0.933551
8,10,scale,linear,0.943355
9,10,scale,rbf,0.933007


In [64]:
clf.best_params_

{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}

In [67]:
from sklearn.model_selection import RandomizedSearchCV
rclf = RandomizedSearchCV(SVC(),{"gamma":["scale", "auto"], 
                                 "kernel":["linear", "rbf", "sigmoid", "poly"],
                                 "C":[1,10,20,10]},
                          cv=6,
                          return_train_score=False,
                         n_iter=2)

In [69]:
rclf.fit(iris_df.drop(['flower'],axis=1), iris_df.flower)

RandomizedSearchCV(cv=6, estimator=SVC(), n_iter=2,
                   param_distributions={'C': [1, 10, 20, 10],
                                        'gamma': ['scale', 'auto'],
                                        'kernel': ['linear', 'rbf', 'sigmoid',
                                                   'poly']})

In [70]:
rclf.best_params_

{'kernel': 'linear', 'gamma': 'scale', 'C': 1}

In [71]:
rclfl_df = pd.DataFrame(rclf.cv_results_)

In [72]:
rclfl_df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,mean_test_score,std_test_score,rank_test_score
0,0.011218,0.003572,0.007504,0.002499,linear,scale,1,"{'kernel': 'linear', 'gamma': 'scale', 'C': 1}",0.96,1.0,0.96,0.96,1.0,1.0,0.98,0.02,1
1,0.009169,0.001864,0.008781,0.002835,linear,scale,10,"{'kernel': 'linear', 'gamma': 'scale', 'C': 10}",1.0,1.0,0.92,0.92,0.96,1.0,0.966667,0.035901,2


In [73]:
rclfl_df.columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_kernel', 'param_gamma', 'param_C', 'params', 'split0_test_score',
       'split1_test_score', 'split2_test_score', 'split3_test_score',
       'split4_test_score', 'split5_test_score', 'mean_test_score',
       'std_test_score', 'rank_test_score'],
      dtype='object')

In [74]:
rclf_df = rclfl_df[['param_kernel','param_gamma','param_C','mean_test_score']]
rclf_df

Unnamed: 0,param_kernel,param_gamma,param_C,mean_test_score
0,linear,scale,1,0.98
1,linear,scale,10,0.966667


# how to choose the best model

In [75]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [77]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}


In [80]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(iris_df.drop(['flower'],axis=1), iris_df.flower)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.96,{'n_estimators': 10}
2,logistic_regression,0.966667,{'C': 5}
