# Basic Scalers implementation:
 - MinMaxStandardizer
 - NormalStandardizer
 - RobustStandardizer


In [159]:
import numpy as np
from abc import ABC, abstractmethod
from sklearn import preprocessing as prep

### Abstract Standardizer Class for further inheritation: 

In [160]:
class AbsStandardizer(ABC):
    '''
     - data_to_stadnardize: dict[str : np.array]
        Dictionary with keys as data names (headers) and values as numpy matrix, 
    representing data. 
     - data: dict[str : np.array]:
        Standardized data is stored here.
    '''
    
    def __init__(self, **data_to_stadnardize):
        self.data_to_stadnardize = data_to_stadnardize
        self.data = {}
        self.init_normalization()    
    
    def __getitem__(self, key: str) -> np.array:
        return self.data[key]
    
    @abstractmethod
    def init_normalization(self):
        pass

    @abstractmethod
    def normalize(self):
        pass

    @abstractmethod
    def denormalize(self):
        pass

### MinMax Standardizer:

In [161]:
class MinMaxStandardizer(AbsStandardizer):

    def init_normalization(self):
        self._params = {}
        for key, value in self.data_to_stadnardize.items():
            data_min, data_max = value.min(axis=0), value.max(axis=0)
            # Storing the Standarsizer main parameters into dictionary:
            self._params.update(
               {
                   key : {
                        "min" : data_min,
                        "max" : data_max
                        }
                }
            )
            self.data[key] = (value - data_min) / (data_max - data_min)
    
    def normalize(self, data: np.array, key: str) -> np.array:
        return (data - self._params[key]["min"]) \
                / (self._params[key]["max"] - self._params[key]["min"])
    
    def denormalize(self, data: np.array, key: str) -> np.array:
        return data * (self._params[key]["max"] - self._params[key]["min"]) \
            + self._params[key]["min"]

### Normal Standardizer:

In [162]:
class NormalStandardizer(AbsStandardizer):

    def init_normalization(self):
        self._params = {}
        for key, value in self.data_to_stadnardize.items():
            data_mean, data_std = value.mean(axis=0), value.std(axis=0)
            # Storing the Standarsizer main parameters into dictionary:
            self._params.update(
               {
                   key : {
                        "mean" : data_mean,
                        "std" : data_std
                        }
                }
            )
            self.data[key] = (value - data_mean) / data_std
        
    def normalize(self, data: np.array, key: str) -> np.array:
        return (data - self._params[key]["mean"]) / self._params[key]["std"]
    
    def denormalize(self, data: np.array, key: str) -> np.array:
        return data * self._params[key]["std"] + self._params[key]["std"]

### Robust Standardizer:

In [163]:
class RobustStandardizer(AbsStandardizer):

    def init_normalization(self):
        self._params = {}
        for key, value in self.data_to_stadnardize.items():
            data_q1, data_q3 = np.quantile(value, 0.25, axis=0), np.quantile(value, 0.75, axis=0)
            data_median = np.median(value, axis=0)
            # Storing the Standarsizer main parameters into dictionary:
            self._params.update(
                {
                    key : {
                        "q1" : data_q1,
                        "median" : data_median,
                        "q3" : data_q3
                        }
                }
            )
            self.data[key] = (value - data_median) / (data_q3 - data_q1)
        
    def normalize(self, data: np.array, key: str) -> np.array:
        return (data - self._params[key]["median"]) / ( self._params[key]["q3"] - self._params[key]["q1"] )
    
    def denormalize(self, data: np.array, key: str) -> np.array:
        return data * (self._params[key]["q3"] - self._params[key]["q1"]) \
            + self._params[key]["median"]

### Testing by comparing to SK-learn implementation:

In [164]:
dummy_data_x = np.array([ [-41, 151, 55], [34, 10, -20], [44, -45, 51] ])
dummy_data_y = np.array([-41, 151, 64])

*MinMax*

In [165]:
minmax_scaler = MinMaxStandardizer(dummy_x=dummy_data_x, dummy_y=dummy_data_y)
sk_minmax = prep.MinMaxScaler().fit(dummy_data_x)

print(f"My params: {minmax_scaler._params['dummy_x']['min']}, {minmax_scaler._params['dummy_x']['max']};")
print(f"SK params: {sk_minmax.data_min_}, {sk_minmax.data_max_}.")

My params: [-41 -45 -20], [ 44 151  55];
SK params: [-41. -45. -20.], [ 44. 151.  55.].


*Standard*

In [166]:
normal_scaler = NormalStandardizer(dummy_x=dummy_data_x, dummy_y=dummy_data_y)
sk_normal = prep.StandardScaler().fit(dummy_data_x)

print(f"My params: {normal_scaler._params['dummy_x']['mean']}, {normal_scaler._params['dummy_x']['std']};")
print(f"SK params: {sk_normal.mean_}, {sk_normal.scale_}.")

My params: [12.33333333 38.66666667 28.66666667], [37.93268892 82.54426422 34.45125381];
SK params: [12.33333333 38.66666667 28.66666667], [37.93268892 82.54426422 34.45125381].


*Robust*

In [167]:
robust_scaler = RobustStandardizer(dummy_x=dummy_data_x, dummy_y=dummy_data_y)
sk_robust = prep.RobustScaler().fit(dummy_data_x)

IQR = robust_scaler._params['dummy_x']['q3'] - robust_scaler._params['dummy_x']['q1']
print(f"My params: {robust_scaler._params['dummy_x']['median']}, {IQR}")
print(f"SK params: {sk_robust.center_}, {sk_robust.scale_}.")

My params: [34. 10. 51.], [42.5 98.  37.5]
SK params: [34. 10. 51.], [42.5 98.  37.5].
