
# Advanced Certification in AIML
## A Program by IIIT-H and TalentSprint
## Not for grading

## Learning Objective

At the end of the experiment, you will be able to :

* Perform SVM linear classifier

## Dataset

### Description

The dataset consists of the below 7 columns,

- **species:** penguin species (Chinstrap, Adélie, or Gentoo)
- **culmen length & depth:** The culmen is the upper ridge of a bird's beak
- **flipper_length_mm:** flipper length
- **body_mass_g:** body mass
- **island:** island name (Dream, Torgersen, or Biscoe)
- **sex:** penguin sex

In [None]:
!  wget -qq https://cdn.iiith.talentsprint.com/aiml/Experiment_related_data/Penguin.csv

### Importing Required Packages

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### Loading the data

In [None]:
data = pd.read_csv("/content/Penguin.csv")
data.head()

In [None]:
# Drop the NaN values from the data
data.dropna(inplace=True)
data

In [None]:
data.isna().sum()

###From the given data, we will select the 'culmen_depth_mm' and 'flipper_length_mm' features for the Gento and Chinstrap species as they are linearly separable 

In [None]:
# Create a small dataset for SVM classification, consisting of culmen_depth_mm and flipper_length_mm from Gento and Chinstrap 

df = data[(data['species']!='Adelie')]
df = df[['culmen_depth_mm','flipper_length_mm', 'species']]
df.head()

# Above we have selected only Gento and Chinstrap and not Adelie (!='Adelie')

### Convert categorical values to numerical targets and plot the graph for Gento and Chinstrap species

In [None]:
# Convert categorical values to numerical targets
df = df.replace('Gentoo', 0)
df = df.replace('Chinstrap', 1)

# Assign data to 'X' and labels to 'y'
X1 = df.iloc[:, :2]
print(X1)
y1 = df['species']

# View the scatter plot
plt.scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=y1, s=50, cmap='autumn')
plt.show()

### Train a SVM Linear Classifier

In [None]:
from sklearn.svm import SVC 
model = SVC(kernel='linear')

### Splitting the data into train and test sets

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size=0.3)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# Fit the model using training data
from sklearn.metrics import accuracy_score
model.fit(X_train, y_train)

# Predict the model 
predict = model.predict(X_test)

# Calculate the accuracy
accuracy_score(y_test, predict)

### View the coordinates of the support vectors and Visualize them

In [None]:
model.support_vectors_

In [None]:
# Visualize the support vectors in the plot 
plt.scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=y1, s=50, cmap='autumn')
plt.scatter(model.support_vectors_[:,0], model.support_vectors_[:,1])

In [None]:
#@title ### Visualize the coefficients of the hyperplane that maximize the margin
ax = plt.gca()
plt.scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=y1, s=50, cmap='autumn')
xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = model.decision_function(xy).reshape(XX.shape)

ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])

ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
           linewidth=1, facecolors='none', edgecolors='k')
plt.show()

### Exercise 1: From the above dataset we will select the 'culmen_depth_mm' and 'flipper_length_mm' features for the Gento and Adelie species as they are linearly separable 

In [None]:
# YOUR CODE HERE for preparing the dataset


### Exercise 2: Convert categorical values to numerical targets and plot the graph for Gento and Adelie species

In [None]:
# YOUR CODE HERE for applying linear SVM and plot the graph for Gento and Adelie 


### Exercise 3: Visualize the support vectors in the plot 


In [None]:
# YOUR CODE HERE


In [None]:
#@title ### Visualize the coefficients of the hyperplane that maximize the margin
ax = plt.gca()
plt.scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=y2, s=50, cmap='autumn')
xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = model.decision_function(xy).reshape(XX.shape)

ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])

ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
           linewidth=1, facecolors='none', edgecolors='k')
plt.show()