In [2]:
from sklearn import svm,datasets
import pandas as pd
iris_data_set = datasets.load_iris()
df = pd.DataFrame(iris_data_set.data, columns=iris_data_set.feature_names)

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
df['flower'] = iris_data_set.target
df['flower'] = df['flower'].apply(lambda x:iris_data_set.target_names[x])
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [18]:
from sklearn.model_selection import train_test_split
X_train,x_test,Y_train,y_test = train_test_split(iris_data_set.data, iris_data_set.target, test_size=0.3)

model = svm.SVC(kernel='rbf', C=70,gamma='auto')
model.fit(X_train, Y_train)
model.score(x_test, y_test)

0.9333333333333333

In [17]:
# K - Fold Cross Validation
from sklearn.model_selection import cross_val_score
model = svm.SVC(kernel='rbf', C=70,gamma='auto') # C=70 means total fold count
cross_val_score(model, iris_data_set.data, iris_data_set.target, cv=10).mean() # Here cv means cross_vaidation

0.9600000000000002

In [16]:
model = svm.SVC(kernel='linear', C=70,gamma='auto')
cross_val_score(model, iris_data_set.data, iris_data_set.target, cv=10).mean() # Here cv means cross_vaidation

0.9800000000000001

In [27]:
# GridSearch Cross Vaidation
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(svm.SVC(gamma='auto'), {
    'C': [1,10,20],
    'kernel': ['rbf','linear']
}, cv=5, return_train_score=False)
clf.fit(iris_data_set.data, iris_data_set.target)
clfdf = pd.DataFrame(clf.cv_results_)
clfdf.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001113,0.000652,0.000879,0.000862,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.000448,3.4e-05,0.000288,1.7e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.000544,2.1e-05,0.000289,1e-05,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.000438,2.8e-05,0.000267,1e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.000564,2.7e-05,0.000302,3.1e-05,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5


In [31]:
# Ananalyze the Best kernel
cx = clfdf[['param_C', 'param_kernel', 'mean_test_score']]
cx

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [34]:
clf.best_score_

0.9800000000000001

In [35]:
clf.best_params_

{'C': 1, 'kernel': 'rbf'}

In [39]:
# Randomized Search Cross Vaidation
from sklearn.model_selection import RandomizedSearchCV
rsclf = RandomizedSearchCV(svm.SVC(gamma='auto'), {
    'C': [1,10,20],
    'kernel': ['rbf','linear']
}, cv=5, return_train_score=False, n_iter=3)
rsclf.fit(iris_data_set.data, iris_data_set.target)
fgs = pd.DataFrame(rsclf.cv_results_)
fgs[['param_C', 'param_kernel', 'mean_test_score']]


Unnamed: 0,param_C,param_kernel,mean_test_score
0,20,linear,0.966667
1,10,rbf,0.98
2,1,rbf,0.98


In [41]:
# Test Many Model Pick onde best

from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}


scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(iris_data_set.data, iris_data_set.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

#Based on above, I can conclude that SVM with C=1 and kernel='rbf' is the best model for solving my problem of iris flower classification

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.96,{'n_estimators': 10}
2,logistic_regression,0.966667,{'C': 5}
