In [1]:
import pandas as pd

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
df['flowers'] = iris.target
df['flowers'] = df['flowers'].apply(lambda x: iris.target_names[x])
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flowers
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
x = df.drop(['flowers'], axis=1)

In [6]:
y = df.flowers

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [8]:
from sklearn.svm import SVC
svm_model = SVC()
svm_model.fit(x_train, y_train)
svm_model.score(x_test, y_test)

1.0

In [9]:
from sklearn.model_selection import cross_val_score

In [10]:
cross_val_score(SVC(kernel='linear', C=10, gamma = 'auto'), iris.data, iris.target)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [11]:
cross_val_score(SVC(kernel='rbf', C=10, gamma = 'auto'), iris.data, iris.target)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [12]:
cross_val_score(SVC(kernel='rbf', C=20, gamma = 'auto'), iris.data, iris.target)

array([0.96666667, 1.        , 0.9       , 0.96666667, 1.        ])

In [13]:
import numpy as np

In [14]:
kernels = ['linear', 'rbf']
C = [1, 10, 20]
avg_score = {}
for kval in kernels:
    for cval in C:
        c_score= cross_val_score(SVC(kernel=kval, C=cval), iris.data, iris.target)
        avg_score[kval + "_" + str(cval)] = np.average(c_score)
avg_score

{'linear_1': 0.9800000000000001,
 'linear_10': 0.9733333333333334,
 'linear_20': 0.9666666666666666,
 'rbf_1': 0.9666666666666666,
 'rbf_10': 0.9800000000000001,
 'rbf_20': 0.9800000000000001}

In [15]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(SVC(),                  #first provide the method then parameters
                   {                   
    'kernel' :['rbf','linear'],
    'C':[1,10,20]                          #{inn this bracket we provide parameters}  { }
},cv=5, return_train_score=False)         #cv means number of folds we want to train and test

clf.fit(iris.data, iris.target)
clf.cv_results_                            #cv_results_ show how every parameter performs

{'mean_fit_time': array([0.00099587, 0.00120511, 0.00039892, 0.00040007, 0.00059924,
        0.00079765]),
 'std_fit_time': array([0.00062842, 0.0003972 , 0.00048858, 0.00048998, 0.00048929,
        0.00039883]),
 'mean_score_time': array([0.00019941, 0.00019965, 0.00039892, 0.00039897, 0.00039802,
        0.        ]),
 'std_score_time': array([0.00039883, 0.0003993 , 0.00048858, 0.00048864, 0.00048747,
        0.        ]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],


In [16]:
df1 = pd.DataFrame(clf.cv_results_)
df1

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000996,0.000628,0.000199,0.000399,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,0.966667,0.966667,0.933333,1.0,0.966667,0.021082,5
1,0.001205,0.000397,0.0002,0.000399,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.000399,0.000489,0.000399,0.000489,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.0004,0.00049,0.000399,0.000489,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.000599,0.000489,0.000398,0.000487,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
5,0.000798,0.000399,0.0,0.0,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,5


In [17]:
df1[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.966667
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.98
5,20,linear,0.966667


In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [19]:
model_params = {
    'svm' : { 
        'model' : {SVC(gamma='auto'),
        'params' : {
        'kernel' :['rbf','linear'],
        'C':[1,10,20]
        },
    
    'rf': {
        'model' : {RandomForestClassifier()},
        'params' : {
            'n_estimators': [1,10,20]
        }
    },
    'lr' : {
        'model' : {LogisticRegression()},
        'params' : {
            'C' : [1,5,10]
        } 
    }
}

# model_params = {
#     'svm': {
#         'model': svm.SVC(gamma='auto'),
#         'params' : {
#             'C': [1,10,20],
#             'kernel': ['rbf','linear']
#         }  
#     },
#     'random_forest': {
#         'model': RandomForestClassifier(),
#         'params' : {
#             'n_estimators': [1,5,10]
#         }
#     },
#     'logistic_regression' : {
#         'model': LogisticRegression(solver='liblinear',multi_class='auto'),
#         'params': {
#             'C': [1,5,10]
#         }
#     }
# }

SyntaxError: invalid syntax (<ipython-input-19-cbd341f526fd>, line 4)

In [None]:
score = []
for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(iris.data, iris.target)
    score.append({
        'model' : model_name,
        'best score' : clf.best_score
    })

##randomizd search cv

In [25]:
from sklearn.model_selection import RandomizedSearchCV
rs = RandomizedSearchCV(SVC(), 
                       {'kernel' : ['rbf','linear'], 
                       'C' : [1, 10, 20]}, 
                       cv = 5,
                       return_train_score = False, 
                       n_iter = 2              #n_iter defines number of combinations
                       )
rs.fit(iris.data, iris.target)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=2,
                   param_distributions={'C': [1, 10, 20],
                                        'kernel': ['rbf', 'linear']})

In [27]:
rs.cv_results_

{'mean_fit_time': array([0.00100245, 0.00039101]),
 'std_fit_time': array([1.46215721e-05, 4.79046685e-04]),
 'mean_score_time': array([0.00040174, 0.00060582]),
 'std_score_time': array([0.00049202, 0.0004949 ]),
 'param_kernel': masked_array(data=['rbf', 'rbf'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[1, 20],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'kernel': 'rbf', 'C': 1}, {'kernel': 'rbf', 'C': 20}],
 'split0_test_score': array([0.96666667, 0.96666667]),
 'split1_test_score': array([0.96666667, 1.        ]),
 'split2_test_score': array([0.96666667, 0.96666667]),
 'split3_test_score': array([0.93333333, 0.96666667]),
 'split4_test_score': array([1., 1.]),
 'mean_test_score': array([0.96666667, 0.98      ]),
 'std_test_score': array([0.02108185, 0.01632993]),
 'rank_test_score': array([2, 1])}

In [29]:
df2 = pd.DataFrame(rs.cv_results_)

In [30]:
df2[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.966667
1,20,rbf,0.98
