
# Advanced Certification in AIML
## A Program by IIIT-H and TalentSprint
## Not for grading

## Learning Objective

At the end of the experiment, you will be able to :

* Perform SVM linear classifier

## Dataset

### Description

The dataset consists of the below 7 columns,

- **species:** penguin species (Chinstrap, Adélie, or Gentoo)
- **culmen length & depth:** The culmen is the upper ridge of a bird's beak
- **flipper_length_mm:** flipper length
- **body_mass_g:** body mass
- **island:** island name (Dream, Torgersen, or Biscoe)
- **sex:** penguin sex

In [None]:
!  wget -qq https://cdn.iiith.talentsprint.com/aiml/Experiment_related_data/Penguin.csv

#### Importing Required Packages

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

**Load dataset**

In [None]:
data = pd.read_csv("/content/Penguin.csv")
data.head()

In [None]:
# Drop the records where sex column has NaN values
data.dropna(inplace=True)

# # Print the unique() elements from the sex column after dropping
# print("Unique values after dropping NA values : ",data.sex.unique())

In [None]:
data.isna().sum()

In [None]:
import seaborn as sns
sns.pairplot(data, hue="species", palette="husl", markers=["o", "s", "D"])

###From the given data, we will select the 'culmen_depth_mm' and 'flipper_length_mm' features for the Gento and Chinstrap species as they are linearly separable 

In [None]:
df = data[(data['species']!='Adelie')]
df = df[['culmen_depth_mm','flipper_length_mm', 'species']]
df.head()

In [None]:
# convert categorical values to numerical targets
df = df.replace('Gentoo', 0)
df = df.replace('Chinstrap', 1)

# Assign data to 'X' and labels to 'y'
X1 = df.iloc[:, :2]
y1 = df['species']

# view the scatter plot
plt.scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=y1, s=50, cmap='autumn')
plt.show()

In [None]:
from sklearn.svm import SVC 
model = SVC(kernel='linear')

In [None]:
# Perform train-test split of the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size=0.2)

In [None]:
print (X_train.shape, y_train.shape)
print (X_test.shape, y_test.shape)

In [None]:
from sklearn.metrics import accuracy_score
model.fit(X_train, y_train)

predict = model.predict(X_test)

accuracy_score(y_test, predict)

**View the coordinates of the support vectors**

In [None]:
model.support_vectors_

In [None]:
# Visualize the support vectors in the plot 
plt.scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=y1, s=50, cmap='autumn')
plt.scatter(model.support_vectors_[:,0],model.support_vectors_[:,1])
plt.show()

In [None]:
#@title ### Visualize the coefficients of the hyperplane that maximize the margin
ax = plt.gca()
plt.scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=y1, s=50, cmap='autumn')
xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = model.decision_function(xy).reshape(XX.shape)

ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])

ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
           linewidth=1, facecolors='none', edgecolors='k')
plt.show()

**Reference**: https://medium.com/swlh/visualizing-svm-with-python-4b4b238a7a92

### Exercise 1: From the above dataset we will select the 'culmen_depth_mm' and 'flipper_length_mm' features for the Gento and Adelie species as they are linearly separable 

In [None]:
df2 = data[(data['species']!='Chinstrap')]
df2 = df2[['culmen_depth_mm','flipper_length_mm', 'species']]
df2.head()

In [None]:
# convert categorical values to numerical targets
df2 = df2.replace('Gentoo', 0)
df2 = df2.replace('Adelie', 1)

# Assign data to 'X' and labels to 'y'
X2 = df2.iloc[:, :2]
y2 = df2['species']

# view the scatter plot
plt.scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=y2, s=50, cmap='autumn')
plt.show()

In [None]:
from sklearn.svm import SVC 
model = SVC(kernel='linear')

In [None]:
# Perform train-test split of the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.3)

In [None]:
print (X_train.shape, y_train.shape)
print (X_test.shape, y_test.shape)

In [None]:
from sklearn.metrics import accuracy_score
model.fit(X_train, y_train)

predict = model.predict(X_test)

accuracy_score(y_test, predict)

**View the coordinates of the support vectors**

In [None]:
model.support_vectors_

In [None]:
# Visualize the support vectors in the plot 
plt.scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=y2, s=50, cmap='autumn')
plt.scatter(model.support_vectors_[:,0],model.support_vectors_[:,1])
plt.show()

In [None]:
#@title ### Visualize the coefficients of the hyperplane that maximize the margin
ax = plt.gca()
plt.scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=y2, s=50, cmap='autumn')
xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = model.decision_function(xy).reshape(XX.shape)

ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])

ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
           linewidth=1, facecolors='none', edgecolors='k')
plt.show()