[![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)](https://fixelalgorithms.gitlab.io)

# Regressor - Regression Trees

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 01/10/2022 | Royi Avital | First version                                                      |
|         |            |             |                                                                    |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/IntroductionMachineLearningSystemEngineers/RegressorTree.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures

from scipy.spatial.distance import cdist

# Misc
import datetime
import os
from platform import python_version
import random
import warnings
import yaml

# Typing
from typing import Tuple

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython
from IPython.display import Image, display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout

In [None]:
# Configuration
%matplotlib inline

warnings.filterwarnings("ignore")

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

In [None]:
# Constants

FIG_SIZE_DEF    = (8, 8)
ELM_SIZE_DEF    = 50
CLASS_COLOR     = ('b', 'r')
EDGE_COLOR      = 'k'
MARKER_SIZE_DEF = 10
LINE_WIDTH_DEF  = 2


In [None]:
# Fixel Algorithms Packages


In [None]:
# Parameters

# Data Generation
numSamples  = 201
noiseStd    = 0.1

# Model
vP = np.array([0.25, 2, 5])
polynomDeg = 2
λ = 0.1

# Data Visuzalization
gridSclae = 5
numGridPts = 250

In [None]:
# Auxiliary Functions

def f(vX):
    vY            = 0.5 * np.ones_like(vX)
    vY[vX < 3.25] = 1
    vY[vX < 2.5 ] = .5 + vX[vX < 2.5] / 5 - .25
    vY[vX < 1.5 ] = 0
    
    return vY


def PlotTree(vX: np.ndarray, vY: np.ndarray, vS: np.ndarray, numSplits: int, hA:plt.Axes = None, figSize: Tuple[int, int] = FIG_SIZE_DEF, markerSize: int = MARKER_SIZE_DEF, lineWidth: int = LINE_WIDTH_DEF, axisTitle: str = None):

    if hA is None:
        hF, hA = plt.subplots(1, 2, figsize = figSize)
    else:
        hF = hA[0].get_figure()
    
    # We can chose different loss models: “squared_error”, “friedman_mse”, “absolute_error”, “poisson”
    # MSE: Estimate the mean of the samples (Minimize L2)
    # Friedman MSE: Optimization of MSE to measure the gain of the split
    # MAE: Estimate the median of the samples (Minimize L1)
    # Poisson Deviance: Estimate the fit to Poission Model (Occourence of events: Number of hours -> numbe rof failures)
    oTree = DecisionTreeRegressor(criterion = 'squared_error', max_leaf_nodes = numSplits + 1, random_state = 0)
    oTree = oTree.fit(vX[:, None], vY) #<! fit() requires 2D input as mX
    
    vYY = oTree.predict(vS[:, None]) #<! predict() requires 2D input as mX
    
    hA[0].scatter(vX, vY, s = ELM_SIZE_DEF, c = 'b', edgecolor = EDGE_COLOR, label = '$y_i = f(x_i) + \epsilon_i$')
    hA[0].plot(vS, vYY, c = 'r', lw = 3, label = 'Regression Tree')
    hA[0].set_xlabel('$x$')
    hA[0].set_ylabel('$y$')
    hA[0].grid()
    hA[0].legend()

    plot_tree(oTree, filled = True, ax = hA[1], rounded = True)
    hA[1].set_title(f'Max splits = {numSplits}')

    # return hF


## Generate Data


In [None]:
vS = np.linspace(0, gridSclae, numGridPts)
vX = gridSclae * np.random.rand(numSamples) #<! Grid points
vN = noiseStd * np.random.randn(numSamples) #<! Noise samples
vY = f(vX) + vN #<! Signal samples


### Plot Data

In [None]:
hF, hA = plt.subplots(figsize = (20, 5))
hA.scatter(vX, vY, s = ELM_SIZE_DEF, c = 'b', edgecolor = EDGE_COLOR, label = '$y_i = f(x_i) + \epsilon_i$')
hA.set_xlabel('$x$')
hA.set_ylabel('$y$')
hA.grid()
hA.axis('equal')
hA.legend()

## Train Regression Tree Regressor


In [None]:
hPlotTree = lambda numSplits: PlotTree(vX = vX, vY = vY, vS = vS, numSplits = numSplits, figSize = (20, 10))
splitSlider = IntSlider(min = 1, max = 20, step = 1, value = 1, layout = Layout(width = '30%'))
interact(hPlotTree, numSplits = splitSlider)