[![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)](https://fixelalgorithms.gitlab.io/)

# Machine Learning Methods

## Supervised Learning - Features Transform - Exercise

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 26/01/2023 | Royi Avital | First version                                                      |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/MachineLearningMethods/2023_01/0016FeaturesTransformExercise.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning
from sklearn.base import TransformerMixin
from sklearn.datasets import make_circles
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC

# Miscellaneous
import os
from platform import python_version
import random

# Typing
from typing import Tuple

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython
from IPython.display import Image, display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout

## Notations

* <font color='red'>(**?**)</font> Question to answer interactively.
* <font color='blue'>(**!**)</font> Simple task to add code for the notebook.
* <font color='green'>(**@**)</font> Optional / Extra self practice.
* <font color='brown'>(**#**)</font> Note / Useful resource / Food for thought.

In [None]:
# Configuration
%matplotlib inline

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

In [None]:
# Constants

FIG_SIZE_DEF = (8, 8)
ELM_SIZE_DEF = 50
CLASS_COLOR = ('b', 'r')
EDGE_COLOR  = 'k'


In [None]:
# Fixel Algorithms Packages


## Features Transform

In this exercise we'll apply a feature transform to solve a classification problem.  
We'll apply 2 different transforms:

1. Polynomial Transform  
2. Polar Coordinates.

In the exercise we'll learn about 2 features of SciKit Learn:

1. Pre Processing Module.
2. Pipelines.

The tasks are:

1. Train a linear SVM classifier on the data to have a base line.
2. Apply polynomial feature transform using [`PolynomialFeatures`](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html).
3. Train a linear SVM classifier on the transformed features.
4. Change coordinates of the original features to [Polar Coordinate System](https://en.wikipedia.org/wiki/Polar_coordinate_system).
5. Train a linear SVM classifier on the transformed features.

In [None]:
# Parameters

# Data Generation
numSamples = 400 #<! Per Quarter

# Pre Processing
polyDeg = 2

# Model
paramC      = 1
kernelType  = 'linear'
lC          = [0.1, 0.25, 0.75, 1, 1.5, 2, 3]

# Data Visualization
numGridPts = 500

In [None]:
# Auxiliary Functions

def PlotBinaryClassData( mX: np.ndarray, vY: np.ndarray, hA:plt.Axes = None, figSize: Tuple[int, int] = FIG_SIZE_DEF, elmSize: int = ELM_SIZE_DEF, classColor: Tuple[str, str] = CLASS_COLOR, axisTitle: str = None ) -> plt.Axes:

    if hA is None:
        hF, hA = plt.subplots(figsize = figSize)
    else:
        hF = hA.get_figure()
    
    vC, vN = np.unique(vY, return_counts = True)

    numClass = len(vC)
    if (len(vC) != 2):
        raise ValueError(f'The input data is not binary, the number of classes is: {numClass}')

    vIdx0 = vY == vC[0]
    vIdx1 = vY == vC[1] #<! Basically ~vIdx0

    hA.scatter(mX[vIdx0, 0], mX[vIdx0, 1], s = elmSize, color = classColor[0], edgecolor = 'k', label = f'$C_\u007b {vC[0]} \u007d$')
    hA.scatter(mX[vIdx1, 0], mX[vIdx1, 1], s = elmSize, color = classColor[1], edgecolor = 'k', label = f'$C_\u007b {vC[1]} \u007d$')
    hA.axvline(x = 0, color = 'k')
    hA.axhline(y = 0, color = 'k')
    hA.axis('equal')
    if axisTitle is not None:
        hA.set_title(axisTitle)
    hA.legend()
    
    return hA

def PlotLabelsHistogram(vY: np.ndarray, hA = None):

    if hA is None:
        hF, hA = plt.subplots(figsize = (8, 6))
    
    vLabels, vCounts = np.unique(vY, return_counts = True)

    hA.bar(vLabels, vCounts, width = 0.9, align = 'center')
    hA.set_xticks(vLabels)
    hA.set_title('Histogram of Classes / Labels')
    hA.set_xlabel('Class')
    hA.set_ylabel('Number of Samples')

    return hA

def PlotConfusionMatrix(vY: np.ndarray, vYPred: np.ndarray, hA: plt.Axes = None, lLabels: list = None, dScore: dict = None, titleStr: str = 'Confusion Matrix') -> plt.Axes:

    # Calculation of Confusion Matrix
    mConfMat = confusion_matrix(vY, vYPred)
    oConfMat = ConfusionMatrixDisplay(mConfMat, display_labels = lLabels)
    oConfMat = oConfMat.plot(ax = hA)
    hA = oConfMat.ax_
    if dScore is not None:
        titleStr += ':'
        for scoreName, scoreVal in  dScore.items():
            titleStr += f' {scoreName} = {scoreVal:0.2},'
        titleStr = titleStr[:-1]
    hA.set_title(titleStr)
    hA.grid(False)

    return hA


def PlotDecisionBoundaryClosure( numGridPts, gridXMin, gridXMax, gridYMin, gridYMax, numDigits = 1 ):

    # v0       = np.linspace(gridXMin, gridXMax, numGridPts)
    # v1       = np.linspace(gridYMin, gridYMax, numGridPts)
    roundFctr = 10 ** numDigits
    
    # For equal axis
    minVal = np.floor(roundFctr * min(gridXMin, gridYMin)) / roundFctr
    maxVal = np.ceil(roundFctr * max(gridXMax, gridYMax)) / roundFctr
    v0     = np.linspace(minVal, maxVal, numGridPts)
    v1     = np.linspace(minVal, maxVal, numGridPts)
    
    XX0, XX1 = np.meshgrid(v0, v1)
    XX       = np.c_[XX0.ravel(), XX1.ravel()]

    def PlotDecisionBoundary(hDecFun, hA = None):
        
        if hA is None:
            hF, hA = plt.subplots(figsize = (8, 6))

        Z = hDecFun(XX)
        Z = Z.reshape(XX0.shape)
            
        hA.contourf(XX0, XX1, Z, colors = CLASS_COLOR, alpha = 0.3, levels = [-0.5, 0.5, 1.5])

        return hA

    return PlotDecisionBoundary
    




## Generate / Load Data


In [None]:
# Loading / Generating Data

mX, vY  = make_circles(n_samples = numSamples, shuffle = True, noise = 0.075, factor = 0.50)

PlotDecisionBoundary = PlotDecisionBoundaryClosure(numGridPts, -1.5, 1.5, -1.5, 1.5)

### Plot Data

In [None]:
# Display the Data
hA = PlotBinaryClassData(mX, vY, axisTitle = 'Samples Data')

## Solution by Linear SVM Classifier

In this section we'll try optimize the best _Linear SVM_ model for the problem. 

* <font color='red'>(**?**)</font> What do you think the decision boundary will be?

In [None]:
# SVM Linear Model

vAcc = np.zeros(shape = len(lC)) #<! Array of accuracy


#===========================Fill This===========================#
# Iterate over the parameters in `lC`.
# Score each model.
# Extract the best model.

for ???, ??? in enumerate(lC):
    oLinSvc  = SVC(C = ???, kernel = ???).fit(???, ???)#<! Model definition and training
    vAcc[ii] = ??? #<! Accuracy

bestModelIdx    = np.argmax(???)
bestC           = lC[???]

oLinSvc = SVC(C = ???, kernel = ???).fit(???, ???)

#===============================================================#

print(f'The best model with C = {bestC:0.2f} achieved accuracy of {vAcc[bestModelIdx]:0.2%}')


In [None]:
# Plot the Decision Boundary

hF, hA = plt.subplots(figsize = FIG_SIZE_DEF)
hA = PlotDecisionBoundary(oLinSvc.predict, hA)
hA = PlotBinaryClassData(mX, vY, hA = hA, axisTitle = 'Classifier Decision Boundary')
plt.show()



## Feature Transform

In this section we'll create a new set of features.  
We'll have 2 types of transformations:

1. Polynomial.
2. Polar Coordinates.

In order to apply the chain of operation: `X -> PolyFeatures -> Model` we'll create a SciKit Pipeline using [`Pipeline`](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html).

### Polynomial Features Transform

The features will model a higher degree polynomial base don the 2 given features (Coordinates).

Given the data as:

$$ X = \begin{bmatrix} \boldsymbol{x}_{1} & \boldsymbol{x}_{2} \end{bmatrix} $$

We'll generate the matrix:

$$ \bar{X} = \begin{bmatrix} \boldsymbol{x}_{1} & \boldsymbol{x}_{2} & \boldsymbol{x}_{1} \otimes \boldsymbol{x}_{1} & \boldsymbol{x}_{2} \otimes \boldsymbol{x}_{2} & \boldsymbol{x}_{1} \otimes \boldsymbol{x}_{2} & \dots \end{bmatrix} $$

Where $\otimes$ is the [Kronecker Product](https://en.wikipedia.org/wiki/Kronecker_product), element wise multiplication. 

Basically we create all inter multiplication up to order $p$ of the data.  
This is done using [`PolynomialFeatures`](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html) from the `preprocessing` module of SciKit Learn.


* <font color='red'>(**?**)</font> Given we're using the SVM model, do we need the term $\boldsymbol{1}$ in our features? Look at the `include_bias` parameter in `PolynomialFeatures`.
* <font color='red'>(**?**)</font> Why is the `polyDeg` parameter set to 2?

In [None]:
# Construct the PolynomialFeatures Object

#===========================Fill This===========================#
# 1. Set the `degree` and pay attention to `include_bias` parameter.
oPolyTrns = PolynomialFeatures(degree = ???, include_bias = ???)
#===============================================================#

In [None]:
# Feature Generation
# Generate a set of features by applying the transformations.

#===========================Fill This===========================#
# 1. Read about the fit() and transform() methods of PolynomialFeatures.
# 2. Use the methods to apply the feature transform.
oPolyTrns = ???
mX1 = ???
#===============================================================#

In [None]:
# SVM Linear Model
# Apply a linear model on the transformed features.

vAcc = np.zeros(shape = len(lC)) #<! Array of accuracy

#===========================Fill This===========================#
# 1. Iterate over the parameters in `lC`.
# 2. Score each model.
# 3. Extract the best model.
# !! Make sure to use the transformed features.

for ??? in ???:
    oLinSvc  = ??? #<! Model definition and training
    vAcc[ii] = ??? #<! Accuracy

bestModelIdx    = ???
bestC           = ???

oLinSvc = ???

#===============================================================#

print(f'The best model with C = {bestC:0.2f} achieved accuracy of {vAcc[bestModelIdx]:0.2%}')

#### Decision Boundary

* <font color='red'>(**?**)</font> Can we apply the trained model on the original feature set? Think about dimensions of the data.

In order to plot the _decision boundary_ over the original features we need to have a single object, with the `predict()` method to apply both the pre processing and the prediction.  
This is the basic concept behind a pipeline in SciKit Learn.

* <font color='brown'>(**#**)</font> Later on we'll use this concept for the training step as well.

In [None]:
# Build the Pipeline

#===========================Fill This===========================#
# 1. Construct the pipeline object.
# 2. The 1st step is `Transformer` which applies the polynomial transformation.
# 3. The 2nd step is `Classifier` which applies the classifier.
modelPipe = Pipeline([('Transformer', ???), ('Classifier', ???)])
#===============================================================#

In [None]:
# Plot the Decision Boundary

hF, hA = plt.subplots(figsize = FIG_SIZE_DEF)
hA = PlotDecisionBoundary(modelPipe.predict, hA)
hA = PlotBinaryClassData(mX, vY, hA = hA, axisTitle = 'Classifier Decision Boundary')
plt.show()

### Polar Coordinates

In this section we'll replace the features with the following model:

$$ \phi \left( {x}_{1}, {x}_{2} \right) \to \left( \sqrt{ {x}_{1}^{2} + {x}_{2}^{2} }, \angle \left( {x}_{1}, {x}_{2} \right) \right) $$

Where $\angle \left( {x}_{1}, {x}_{2} \right)$ is the angle between the point $\left( {x}_{1}, {x}_{2} \right)$ to the positive direction of ${x}_{1}$ axis.

Then we'll show the decision boundary of the best model.

* <font color='red'>(**?**)</font> What do you expect the decision boundary to like in this time?

The tasks:

1. Create a transformer sub class to apply the data transformation.
2. Apply the transform on the data and plot it to verify it.
3. Create a pipeline based on the data using a pre defined parameters for the SVM model.
4. Train the pipeline using `fit()`.
5. Plot the decision boundary.

* <font color='brown'>(**#**)</font> Later on we'll learn how to control the parameters of the steps of a pipeline.


In [None]:
# The PolarCoordinatesTransformer Class
# This is a SciKit Learn transformer sub class.
# This class implements the `fit()`, `transform()` and `fit_transform()` methods.


class PolarCoordinatesTransformer(TransformerMixin):
    def __init__(self):
        pass

    def fit(self, mX, vY = None):
#===========================Fill This===========================#
# This method gets the input features and allocate memory for the transformed features.
# It also keeps, for later validation, the dimensions of the input data.
        numSamples  = ???
        dataDim     = ???
        if dataDim != 2:
            raise ValueError(f'The input data must have exactly 2 columns while it has {dataDim} columns')
        
        mZ = np.zeros(shape = (???, ???))

        self.numSamples = numSamples
        self.dataDim    = dataDim
        self.mZ         = mZ

        return self
#===============================================================#
    
    def transform(self, mX):
#===========================Fill This===========================#
# This method applies the actual transform.
# It saves the transformations into `mZ`.
# The 1st column is the magnitude and the 2nd column is the angle.
        if ((mX.shape[0] != self.numSamples) or (mX.shape[1] != self.dataDim)):
            raise ValueError(f'The data to transform has a different dimensions than the data which defined in `fit()`')
        
        self.mZ[:, 0] = ???
        self.mZ[:, 1] = ???

        return self.mZ
#===========================Fill This===========================#

    def fit_transform(self, mX, vY = None, **fit_params):
        
        return super().fit_transform(mX, vY, **fit_params)

* <font color='blue'>(**!**)</font> The class above calculates ${\left\| \boldsymbol{x} \right\|}_{2}$. Implement ${\left\| \boldsymbol{x} \right\|}_{2}^{2}$ instead and compare results.
* <font color='red'>(**?**)</font> Which of the option would you chose for production?

In [None]:
# Construct the PolarCoordinatesTransformer object

#===========================Fill This===========================#
oPolarTrns = ???
#===============================================================#

In [None]:
# Generate a set of features with the new feature

#===========================Fill This===========================#
# Use fit_transform() to both fit and apply at once
mX2 = oPolarTrns.???
#===============================================================#

In [None]:
# Plot the transformed features
hF, hA = plt.subplots(figsize = FIG_SIZE_DEF)
hA = PlotBinaryClassData(mX2, vY, hA = hA, axisTitle = 'Polar Coordinates Transformed Features')
hA.set_xlabel(r'${\left\Vert \bf{x} \right\Vert}_{2}$')
hA.set_ylabel(r'$ \angle \left( \bf{x} \right) $')

plt.show()

In [None]:
# SVM Linear Model - On the Transformed Data

vAcc = np.zeros(shape = len(lC))

for ii, C in enumerate(lC):
    oLinSvc  = SVC(C = C, kernel = kernelType).fit(mX2, vY)
    vAcc[ii] = oLinSvc.score(mX2, vY)

bestModelIdx    = np.argmax(vAcc)
bestC           = lC[bestModelIdx]

oLinSvc = SVC(C = bestC, kernel = kernelType).fit(mX2, vY)

print(f'The best model with C = {bestC:0.2f} achieved accuracy of {vAcc[bestModelIdx]:0.2%}')


In [None]:
PlotDecisionBoundary = PlotDecisionBoundaryClosure(numGridPts, 0, 2, -3.5, 3.5)


hF, hA = plt.subplots(figsize = FIG_SIZE_DEF)
hA = PlotDecisionBoundary(oLinSvc.predict, hA)
hA = PlotBinaryClassData(mX2, vY, hA = hA, axisTitle = 'Classifier Decision Boundary')
plt.show()

In [None]:
# Build the Pipeline

oPolarTrns = oPolarTrns.fit(np.zeros(shape = (numGridPts * numGridPts, 2))) #<! Fitting to the grid of the plot

#===========================Fill This===========================#
# Fill the objects
modelPipe = Pipeline([('Transformer', ???), ('Classifier', ???)])
#===============================================================#

In [None]:
PlotDecisionBoundary = PlotDecisionBoundaryClosure(numGridPts, -1.5, 1.5, -1.5, 1.5)

In [None]:
# Plot the Decision Boundary

hF, hA = plt.subplots(figsize = FIG_SIZE_DEF)
hA = PlotDecisionBoundary(modelPipe.predict, hA)
hA = PlotBinaryClassData(mX, vY, hA = hA, axisTitle = 'Classifier Decision Boundary')
plt.show()

* <font color='red'>(**?**)</font> Do we need both features?
* <font color='red'>(**?**)</font> How would you solve the case above?