[![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)](https://fixelalgorithms.gitlab.io/)

# Machine Learning Methods

## Supervised Learning - Classification - SVM Classifier

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 18/01/2023 | Royi Avital | First version                                                      |
|         |            |             |                                                                    |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/MachineLearningMethods/2023_01/0007ClassifierSvm.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning
from sklearn.datasets import load_breast_cancer, make_circles
from sklearn.svm import SVC

# Misc
import os
from platform import python_version
import random

# Typing
from typing import Tuple

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show

# Jupyter
from IPython import get_ipython
from IPython.display import Image, display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout

## Notations

* <font color='red'>(**?**)</font> Question to answer interactively.
* <font color='blue'>(**!**)</font> Simple task to add code for the notebook.
* <font color='green'>(**@**)</font> Optional / Extra self practice.
* <font color='brown'>(**#**)</font> Note / Useful resource / Food for thought.

In [None]:
# Configuration
%matplotlib inline

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

In [None]:
# Constants

FIG_SIZE_DEF = (8, 8)
ELM_SIZE_DEF = 50
CLASS_COLOR = ('b', 'r')


In [None]:
# Fixel Algorithms Packages


In [None]:
# Parameters

# Data Generation
numSamples0 = 250
numSamples1 = 250


# Data Visualization
figSize     = (8, 8)
elmSize     = 50
classColor0 = CLASS_COLOR[0]
classColor1 = CLASS_COLOR[1]

numGridPts = 250

In [None]:
# Auxiliary Functions

def PlotBinaryClassData( mX: np.ndarray, vY: np.ndarray, hA:plt.Axes = None, figSize: Tuple[int, int] = FIG_SIZE_DEF, elmSize: int = ELM_SIZE_DEF, classColor: Tuple[str, str] = CLASS_COLOR, axisTitle: str = None ) -> plt.Axes:

    if hA is None:
        hF, hA = plt.subplots(figsize = figSize)
    else:
        hF = hA.get_figure()
    
    vC, vN = np.unique(vY, return_counts = True)

    numClass = len(vC)
    if (len(vC) != 2):
        raise ValueError(f'The input data is not binary, the number of classes is: {numClass}')

    vIdx0 = vY == vC[0]
    vIdx1 = vY == vC[1] #<! Basically ~vIdx0

    hA.scatter(mX[vIdx0, 0], mX[vIdx0, 1], s = elmSize, color = classColor[0], edgecolor = 'k', label = f'$C_\u007b {vC[0]} \u007d$')
    hA.scatter(mX[vIdx1, 0], mX[vIdx1, 1], s = elmSize, color = classColor[1], edgecolor = 'k', label = f'$C_\u007b {vC[1]} \u007d$')
    hA.axvline(x = 0, color = 'k')
    hA.axhline(y = 0, color = 'k')
    hA.axis('equal')
    if axisTitle is not None:
        hA.set_title(axisTitle)
    hA.legend()
    
    return hA

## Generate / Load Data


In [None]:
# Generate Data 
numSamples = numSamples0 + numSamples1
mX = np.random.rand(numSamples, 2) - 0.5
mX[numSamples0:, 0] += 2
vY = np.ones((numSamples, ), dtype = np.integer)
vY[:numSamples0] = 0

# One hard sample
mX[0, 0]    = 0.75
vY[0]       = 1

vAxis = np.array([-1, 3, -1, 1])

mX.shape, vY.shape

### Plot Data

In [None]:
# Display the Data

hA = PlotBinaryClassData(mX, vY)

## Train a SVM Classifier

### The SciKit Learn Package

In the course, from now on, we'll mostly use modules and functions from the [SciKit Learn](https://scikit-learn.org) package.  
It is mostly known for its API of `<model>.fit()` and `<model>.predict()`.  
This simple choice of convention created the ability to scale in the form of pipelines, chaining models for a greater model.

In [None]:
# Plotting Function

def PlotSVM(C):
    if C == 0:
        C = 1e-20

    # Train the linear SVM
    oSvmClassifier = SVC(C = C, kernel = 'linear').fit(mX, vY)
    
    # Get model params
    vW =  oSvmClassifier.coef_[0]
    b  = -oSvmClassifier.intercept_  

    axisTitle = f'SVM Classifier: $C = {C}$'
    
    hF, hA = plt.subplots(figsize = (8, 8))
    PlotBinaryClassData(mX, vY, hA = hA, axisTitle = axisTitle)

    vXlim = vAxis[:2]
    
    hA.plot(vXlim, (b + 1 - vW[0] * vXlim) / vW[1], lw = 2, color = 'orange', ls = '--')
    hA.plot(vXlim, (b + 0 - vW[0] * vXlim) / vW[1], lw = 4, color = 'orange', ls = '-' )
    hA.plot(vXlim, (b - 1 - vW[0] * vXlim) / vW[1], lw = 2, color = 'orange', ls = '--')

    hA.axis(vAxis)

$$ \min_{\boldsymbol{w},b}\frac{1}{2} {\left\| \boldsymbol{w} \right\|}^{2} + C \sum_{i} {\xi}_{i} $$

$$ \xi_{i} := \max \left\{ 0, 1 - {y}_{i} \left( \boldsymbol{w}^{T} \boldsymbol{x}_{i} - b \right) \right\} $$

In [None]:
# Display the Geometry of the Classifier

cSlider = FloatSlider(min = 0, max = 100, step = 1, value = 1, layout = Layout(width = '30%'))
interact(PlotSVM, C = cSlider)

plt.show()