# SVM on Iris dataset to identify Versicolor flowers
Steps:
- list all imports
- load dataset 
- find 2 best features (from pairplot or heatmap or knowledge) => _2 as 2D plot_ and define X
- define y for Versicolor = 1, 0 otherwise
- split dataset into train and test
- scale (fit and transform training data)
- perform svm and fit model
- compare classification metrics for actual test targets and predicted targets
- apply grid search cv and randomised search cv
- find which one is better from cv score
- compare classification metrics for actual test targets and cv predicted targets
- decision boundary

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# import seaborn as sns
# import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [8]:
iris=load_iris()
# df=pd.DataFrame(iris.data)
# sns.pairplot(df)
X,y=iris.data[:, [2,3]],(iris.target==1).astype(int)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [10]:
svm=SVC()
svm.fit(X_train,y_train)
y_pred=svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Confusion Matrix:
 [[32  0]
 [ 0 13]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [16]:
param_grid={
    'C': [0.1, 1, 10], 
    'gamma': [0.001, 0.01, 0.1], 
    'kernel': ['rbf', 'linear', 'poly']
}
grid=GridSearchCV(svm,param_grid,cv=5)
grid.fit(X_train,y_train)
print("Best Parameters:", grid.best_params_)
print("Best CV Score:", grid.best_score_)

Best Parameters: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
Best CV Score: 0.9428571428571428


In [18]:
param_grid={
    'C': np.logspace(-2,2,10), 
    'gamma': np.logspace(-3,1,10), 
    'kernel': ['rbf', 'linear', 'poly']
}
grid=RandomizedSearchCV(svm,param_grid,cv=5,n_iter=10,random_state=42)
grid.fit(X_train,y_train)
print("Best Parameters:", grid.best_params_)
print("Best CV Score:", grid.best_score_)

Best Parameters: {'kernel': 'poly', 'gamma': np.float64(3.593813663804626), 'C': np.float64(35.93813663804626)}
Best CV Score: 0.8


In [19]:
# Use best model from GridSearch
best_model = grid.best_estimator_
y_pred_best = best_model.predict(X_test)

print("Final Accuracy:", accuracy_score(y_test, y_pred_best))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_best))
print("Classification Report:\n", classification_report(y_test, y_pred_best))

Final Accuracy: 0.8444444444444444
Confusion Matrix:
 [[25  7]
 [ 0 13]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.78      0.88        32
           1       0.65      1.00      0.79        13

    accuracy                           0.84        45
   macro avg       0.82      0.89      0.83        45
weighted avg       0.90      0.84      0.85        45

