# **Notebook presenting how to use objects from common.py file**

## **1. Basic imports**

In [1]:
import sys
sys.path.append('/Users/Kuba/Desktop/DriftDetectionWithExplainableAI')
from src.datasets import get_dataset

## **2. Data Scaling**

In [2]:
from src.common import DataScaler, ScalingType

size_of_box = 1000
generator = get_dataset("sea_drift")
X, y = generator.generate(size_of_box, size_of_box, n_features=3, random_state=42)
half = len(X)//2

X_old, X_new = X.loc[:half-1], X.loc[half:]
y_old, y_new = y[:half], y[half:]

  from .autonotebook import tqdm as notebook_tqdm


### **2.1. Min Max scaling**

In [3]:
data_scaler = DataScaler(ScalingType.MinMax)
X_before_scaled = data_scaler.fit_transform(X_old)
X_after_scaled = data_scaler.transform(X_new)

print("Before scaling:")
print(X_old.head())

print("\nAfter scaling:")
print(X_before_scaled.head())

Before scaling:
         X1        X2        X3
0  6.394268  0.250108  2.750293
1  2.232107  7.364712  6.766995
2  8.921796  0.869388  4.219218
3  0.297972  2.186380  5.053553
4  0.265360  1.988377  6.498844

After scaling:
         X1        X2        X3
0  0.638524  0.024617  0.274656
1  0.221149  0.736432  0.676770
2  0.891980  0.086576  0.421711
3  0.027197  0.218341  0.505237
4  0.023927  0.198531  0.649926


### **2.2. Standardization**

In [4]:
data_scaler = DataScaler(ScalingType.Standard)
X_before_scaled = data_scaler.fit_transform(X_old)
X_after_scaled = data_scaler.transform(X_new)

print("Before scaling:")
print(X_old.head())

print("\nAfter scaling:")
print(X_before_scaled.head())

Before scaling:
         X1        X2        X3
0  6.394268  0.250108  2.750293
1  2.232107  7.364712  6.766995
2  8.921796  0.869388  4.219218
3  0.297972  2.186380  5.053553
4  0.265360  1.988377  6.498844

After scaling:
         X1        X2        X3
0  0.421596 -1.648987 -0.760505
1 -1.031445  0.827434  0.625005
2  1.303975 -1.433430 -0.253818
3 -1.706666 -0.975017  0.033975
4 -1.718051 -1.043937  0.532509


## **3. Data Dimensions Reduction**

In [5]:
from src.common import DataDimensionsReducer, ReducerType

print("Possible options to choose from:")
for reducer in ReducerType:
    print(f"    {reducer}")

Possible options to choose from:
    ReducerType.PCA
    ReducerType.ICA
    ReducerType.FA
    ReducerType.LDA
    ReducerType.TSNE
    ReducerType.UMAP
    ReducerType.LLE
    ReducerType.MDS


**Notes**: 
* TSNE, MDS, and LLE are embedding-only methods and do NOT support transform(). To generate reduced data it is necessary to fit_transform once again
* LDA requires class labels `y` for fitting and most importantly, number of components must be less than number of classes (2 classes -> max 1 component).

In [6]:
size_of_box = 1000
generator = get_dataset("hyperplane_drift")
X_10d, y_10d = generator.generate(size_of_box, size_of_box, n_features=10, n_drift_features=6, random_state=42)
half = len(X_10d)//2

X_10d_old, X_10d_new = X_10d.loc[:half-1], X_10d.loc[half:]
y_10d_old, y_10d_new = y[:half], y[half:]

### **3.1. Example of PCA**

In [7]:
reducer = DataDimensionsReducer(ReducerType.PCA, n_components=2)
X_2d_old = reducer.fit_transform(X_10d_old, return_df=True)

print("Before reduction:")
print(X_10d_old.head())

print("\nAfter reduction:")
print(X_2d_old.head())

Before reduction:
         X1        X2        X3        X4        X5        X6        X7  \
0  0.218638  0.505355  0.026536  0.198838  0.649884  0.544941  0.220441   
1  0.096716  0.847494  0.603726  0.807128  0.729732  0.536228  0.973116   
2  0.079792  0.232791  0.101001  0.277974  0.635684  0.364832  0.370181   
3  0.640000  0.556950  0.684614  0.842852  0.776000  0.229048  0.032100   
4  0.264880  0.246628  0.561368  0.262742  0.584586  0.897823  0.399401   

         X8        X9       X10  
0  0.589266  0.809430  0.006499  
1  0.378534  0.552041  0.829405  
2  0.209507  0.266978  0.936655  
3  0.315453  0.267741  0.210983  
4  0.219321  0.997538  0.509526  

After reduction:
   component_1  component_2
0    -0.307683    -0.139746
1     0.333288     0.710986
2    -0.143964     0.214250
3    -0.239656    -0.243530
4    -0.112130    -0.045972


### **3.2. Example of LDA**

In [8]:
reducer = DataDimensionsReducer(ReducerType.LDA, n_components=1)
X_2d_old = reducer.fit_transform(X_10d_old, y_10d_old, return_df=True)

print("Before reduction:")
print(X_10d_old.head())

print("\nAfter reduction:")
print(X_2d_old.head())

Before reduction:
         X1        X2        X3        X4        X5        X6        X7  \
0  0.218638  0.505355  0.026536  0.198838  0.649884  0.544941  0.220441   
1  0.096716  0.847494  0.603726  0.807128  0.729732  0.536228  0.973116   
2  0.079792  0.232791  0.101001  0.277974  0.635684  0.364832  0.370181   
3  0.640000  0.556950  0.684614  0.842852  0.776000  0.229048  0.032100   
4  0.264880  0.246628  0.561368  0.262742  0.584586  0.897823  0.399401   

         X8        X9       X10  
0  0.589266  0.809430  0.006499  
1  0.378534  0.552041  0.829405  
2  0.209507  0.266978  0.936655  
3  0.315453  0.267741  0.210983  
4  0.219321  0.997538  0.509526  

After reduction:
   component_1
0     0.655746
1    -1.105207
2    -0.206837
3    -0.911417
4     1.925987


### **3.3. Example of TSNE**

In [9]:
reducer = DataDimensionsReducer(ReducerType.TSNE, n_components=2)
X_2d_old = reducer.fit_transform(X_10d_old, return_df=True)

print("Before reduction:")
print(X_10d_old.head())

print("\nAfter reduction:")
print(X_2d_old.head())

Before reduction:
         X1        X2        X3        X4        X5        X6        X7  \
0  0.218638  0.505355  0.026536  0.198838  0.649884  0.544941  0.220441   
1  0.096716  0.847494  0.603726  0.807128  0.729732  0.536228  0.973116   
2  0.079792  0.232791  0.101001  0.277974  0.635684  0.364832  0.370181   
3  0.640000  0.556950  0.684614  0.842852  0.776000  0.229048  0.032100   
4  0.264880  0.246628  0.561368  0.262742  0.584586  0.897823  0.399401   

         X8        X9       X10  
0  0.589266  0.809430  0.006499  
1  0.378534  0.552041  0.829405  
2  0.209507  0.266978  0.936655  
3  0.315453  0.267741  0.210983  
4  0.219321  0.997538  0.509526  

After reduction:
   component_1  component_2
0   -12.798947   -22.673468
1    24.791330    -5.325072
2    -1.419383    -3.022733
3   -21.629795    -4.438896
4   -33.907104   -12.707023
