# Tutorial 04: Classification II SVM

In this notebook, we perform classification of Robot movements in an environment with walls using real dataset. 

Install the necessary libraries in the PC or in the Virtual Environment using provided Requirements.txt.

## Import Important Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import cvxopt
from sklearn.metrics import accuracy_score
from functions.plot_confusion_matrix import plot_confusion_matrix

cvxopt.solvers.options['show_progress'] = False


## Task 1: Simple SVM Classifier

Illustration of SVM:

1. Seperation Problem
 
<img src="figures/separation_problem.jpg">

2. SVM Classification

<img src="figures/svm-all.jpg">    
 


Source: https://www.baeldung.com/

Answer the following:

1. What is SVM and why do we need it?   
2. What is the basic idea of SVM?   
3. What are the support vectors?   
4. What is hyperplane?   

In [None]:
# Implement a simple SVM Classifier for binary classification of data.
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        y_ = np.where(y <= 0, -1, 1)

        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = ...
                if condition:
                    self.w -= ...
                else:
                    self.w -= ...
                    self.b -= ...

    def predict(self, X):
        approx = ...
        return np.sign(approx)

In [None]:
from sklearn import datasets

X, y = datasets.make_blobs(n_samples=50, n_features=2, centers=2, cluster_std=1.05, random_state=40)
y = np.where(y == 0, -1, 1)

model = SVM()
model.fit(X, y)
y_pred = model.predict(X)
print(model.w, model.b)
print(y_pred)

In [None]:
def get_hyperplane_value(x, w, b, offset):
    return (-w[0] * x + b + offset) / w[1]

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
plt.scatter(X[:, 0], X[:, 1], marker="o", c=y)

x0_1 = np.amin(X[:, 0])
x0_2 = np.amax(X[:, 0])

x1_1 = get_hyperplane_value(x0_1, model.w, model.b, 0)
x1_2 = get_hyperplane_value(x0_2, model.w, model.b, 0)

x1_1_m = get_hyperplane_value(x0_1, model.w, model.b, -1)
x1_2_m = get_hyperplane_value(x0_2, model.w, model.b, -1)

x1_1_p = get_hyperplane_value(x0_1, model.w, model.b, 1)
x1_2_p = get_hyperplane_value(x0_2, model.w, model.b, 1)

ax.plot([x0_1, x0_2], [x1_1, x1_2], "y--")
ax.plot([x0_1, x0_2], [x1_1_m, x1_2_m], "k")
ax.plot([x0_1, x0_2], [x1_1_p, x1_2_p], "k")

x1_min = np.amin(X[:, 1])
x1_max = np.amax(X[:, 1])
ax.set_ylim([x1_min - 3, x1_max + 3])

plt.show()

## Task 2: Non-Linear SVM Classifier
1. Is above data linearly seperable ? 
2. How to deal with non-linear seperable data ? 
3. What is kernel trick and how can we use it in SVM ?

Kernel Trick:   

<img src="figures/Kernel_Trick_01.jpg">
<img src="figures/Kernel_Trick_02.jpg">
<img src="figures/Kernel_Trick_03.jpg">

In [None]:
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
data = datasets.load_iris()
X = normalize((data.data[data.target != 0]), norm='l2')
y = data.target[data.target != 0]
y[y == 1] = -1
y[y == 2] = 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

In [None]:
# Define kernel functions
def linear_kernel(**kwargs):
    def f(x1, x2):
        return ...
    return f


def polynomial_kernel(power, coef, **kwargs):
    def f(x1, x2):
        return ...
    return f


def rbf_kernel(gamma, **kwargs):
    def f(x1, x2):
        distance = np.linalg.norm(x1 - x2) ** 2
        return ...
    return f

class SupportVectorMachine():
    def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
        self.C = C
        self.power = power
        self.gamma = gamma
        self.coef = coef
        self.kernel = kernel
        self.lagr_multipliers = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.intercept = None

    def fit(self, X, y):

        n_samples, n_features = np.shape(X)
        if self.gamma == None:
            self.gamma = 1 / n_features

        self.kernel = self.kernel(power=self.power,gamma=self.gamma,coef=self.coef)

        # Calculate kernel matrix
        kernel_matrix = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                ...

        # Define the quadratic optimization problem
        P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1, n_samples), tc='d')
        b = cvxopt.matrix(0, tc='d')

        if not self.C:
            G = cvxopt.matrix(np.identity(n_samples) * -1)
            h = cvxopt.matrix(np.zeros(n_samples))
        else:
            G_max = np.identity(n_samples) * -1
            G_min = np.identity(n_samples)
            G = cvxopt.matrix(np.vstack((G_max, G_min)))
            h_max = cvxopt.matrix(np.zeros(n_samples))
            h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
            h = cvxopt.matrix(np.vstack((h_max, h_min)))

        # Solve the quadratic optimization problem using cvxopt
        minimization = cvxopt.solvers.qp(P, q, G, h, A, b)

        # Lagrange multipliers
        lagr_mult = ...

        # Extract support vectors
        # Get indexes of non-zero lagr. multipiers
        idx = ...
        # Get the corresponding lagr. multipliers
        self.lagr_multipliers = ...
        # Get the samples that will act as support vectors
        self.support_vectors = ...
        # Get the corresponding labels
        self.support_vector_labels = ...

        # Calculate intercept with first support vector
        self.intercept = self.support_vector_labels[0]
        for i in range(len(self.lagr_multipliers)):
            self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
                i] * self.kernel(self.support_vectors[i], self.support_vectors[0])

    def predict(self, X):
        y_pred = []

        for sample in X:
            prediction = 0
            for i in range(len(self.lagr_multipliers)):
                prediction += ...
            prediction += self.intercept
            y_pred.append(np.sign(prediction))
        return np.array(y_pred)

In [None]:
model = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [None]:
print ("Performance - " + str(100*accuracy_score(y_pred, y_test)) + "%")

## Task3: Multiclass SVM Classifier using Standard Library

In basic form, SVM doesnot support Multiclass classification. For multiclass classification, the problem is subdivided into multiple binary classification problems. 

The popular methods which are used to perform multi-classification using SVM are as follows:

1. One vs One (OVO) approach

<img src="figures/multiclass-svm-1.jpg">

2. One vs All (OVA) approach

<img src="figures/multiclass-svm-2.jpg">

In [None]:
# Load the data from csv to Pandas Dataframe

data = np.loadtxt("Data/sensor_readings_24.csv", delimiter=',', dtype=str)

df = pd.DataFrame(data[:,:24], dtype=np.float64)
df = pd.concat([df, pd.DataFrame(data[:, 24], columns=['Label'])], axis=1)

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
# generate dataset

# define feature selection
fs = SelectKBest(score_func=f_classif, k=3)
# apply feature selection
df_selected_1 = fs.fit_transform(df.iloc[:, 0:24], df['Label'])
# Test and Train data splitting
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


labelEn = LabelEncoder()
encoded_labels = ...
class_names = ...

X_train, X_test, y_train, y_test = ...


In [None]:
from sklearn import svm
# Apply SVM Classifier for rbf kernel
model = ...
model.fit(...)
y_pred = ...

In [None]:
# Plot The confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, title='Confusion matrix For SVM Classification with rbf kernel')

In [None]:
print ("Performance - " + str(100*accuracy_score(y_pred, y_test)) + "%")

In [None]:
# Apply SVM Classifier for Polynomial kernel
model = ...
model.fit(...)
y_pred = ...

In [None]:
# Plot The confusion matrix
plot_confusion_matrix(y_test, y_pred, classes=class_names, title='Confusion matrix For KNN Classification')

In [None]:
print ("Performance - " + str(100*accuracy_score(y_pred, y_test)) + "%")