Support Vector Machine

Step 1: Import the libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

Step 2: Load the dataset

In [4]:
data = sns.load_dataset("penguins")
data = data.dropna()

print(data.head())

  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \
0  Adelie  Torgersen            39.1           18.7              181.0   
1  Adelie  Torgersen            39.5           17.4              186.0   
2  Adelie  Torgersen            40.3           18.0              195.0   
4  Adelie  Torgersen            36.7           19.3              193.0   
5  Adelie  Torgersen            39.3           20.6              190.0   

   body_mass_g     sex  
0       3750.0    Male  
1       3800.0  Female  
2       3250.0  Female  
4       3450.0  Female  
5       3650.0    Male  


Step 3: Feature Engineering and Scaling

In [8]:
X = data[['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g']]
y = data['species']

#Encode categorical target (species)
y = y.astype("category").cat.codes    #Convert species to numeric lables

#Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#Polynomial Features
poly = PolynomialFeatures(degree=2, interaction_only=True,include_bias =False)
X_poly = poly.fit_transform(X_scaled)

Step 4: Train Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X,y, test_size=0.2, random_state=42, stratify=y
)

Step 5: SVM classifier + Hyperparameter tuning

In [10]:
svm = SVC()

param_grid = {
    "C" : [0.1,1,10],
    "kernel": ["linear","rbf","poly"],
    "gamma": ["scale","auto"]
}

grid = GridSearchCV(svm,param_grid,cv=5, scoring ="accuracy",n_jobs = -1)
grid.fit(X_train,y_train)

print("Best params:", grid.best_params_)
print("Best CV Accuracy:",grid.best_score_)

Best params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Best CV Accuracy: 0.9774283717679942


Step 6: Evaluate the set

In [11]:
best_svm = grid.best_estimator_
y_pred = best_svm.predict(X_test)

print("\ntest Accuracy:",accuracy_score(y_test,y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test,y_pred))


test Accuracy: 1.0

Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        24

    accuracy                           1.00        67
   macro avg       1.00      1.00      1.00        67
weighted avg       1.00      1.00      1.00        67


Confusion Matrix:
 [[29  0  0]
 [ 0 14  0]
 [ 0  0 24]]
