## Imports

In [5]:
# General
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# ML
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

# Custom
import sys,os
sys.path.append('/home/jovyan/work/AML/Task_2/Solution') # I would like a cleaner solution but works for now
import Components.Outlier_Detection as Outlier_Detection
import Components.Feature_Selection as Feature_Selection
import Components.Normalisation as Normalisation
import Components.data_fetching as data_fetching
import Components.Data_Augmentation as Data_Augmentation
import Components.wrapper as wrapper


# CAREFUL:
# If you make changes to a custom module, you have to reload it, i.e rerun this cell
import importlib
importlib.reload(Data_Augmentation)
importlib.reload(Outlier_Detection)
importlib.reload(Feature_Selection)
importlib.reload(Normalisation)
importlib.reload(data_fetching)
importlib.reload(wrapper)

<module 'Components.wrapper' from '/home/jovyan/work/AML/Task_2/Solution/Components/wrapper.py'>

## Data Preprocessing

### Data Import

In [6]:
X, y = data_fetching.get_train_data()
x_test = data_fetching.get_test_data()

In [11]:
print(X.head())

         x0        x1        x2        x3        x4        x5        x6  \
0 -1.498973  1.448201  2.784979  1.905992  1.285007 -0.571679  1.253798   
1 -0.521470 -0.493049  0.891382 -0.080855  0.227825 -0.167394 -0.426608   
2 -0.417724 -0.019106  0.938377 -0.670472  0.298922  0.917788  0.189585   
3 -0.471972  0.000398  0.784836  1.088817 -0.436292  0.023086  0.611958   
4  0.201026 -0.579901  0.638809 -0.614121  0.468388  0.535726  0.271890   

         x7        x8        x9  ...      x990      x991      x992      x993  \
0 -2.590709  1.379211 -1.553323  ...  2.638401 -1.365574  2.856497 -1.916006   
1  0.371071 -0.065361 -0.271039  ...  0.662354 -0.443464 -0.540985 -0.164082   
2 -0.259406  0.591056 -1.391407  ...  0.617464 -0.543036 -0.321695 -1.778676   
3 -0.720903  0.310497 -0.703081  ...  0.672421 -1.942345  0.366181 -1.226904   
4  0.054270  0.297078 -0.677568  ...  0.144922  0.203202 -0.150227 -0.026890   

       x994      x995      x996      x997      x998      x999  
0  1

    id        x0        x1        x2        x3        x4        x5        x6  \
0  0.0 -1.498973  1.448201  2.784979  1.905992  1.285007 -0.571679  1.253798   
1  1.0 -0.521470 -0.493049  0.891382 -0.080855  0.227825 -0.167394 -0.426608   
2  2.0 -0.417724 -0.019106  0.938377 -0.670472  0.298922  0.917788  0.189585   
3  3.0 -0.471972  0.000398  0.784836  1.088817 -0.436292  0.023086  0.611958   
4  4.0  0.201026 -0.579901  0.638809 -0.614121  0.468388  0.535726  0.271890   

         x7        x8  ...      x990      x991      x992      x993      x994  \
0 -2.590709  1.379211  ...  2.638401 -1.365574  2.856497 -1.916006  1.406900   
1  0.371071 -0.065361  ...  0.662354 -0.443464 -0.540985 -0.164082  0.223598   
2 -0.259406  0.591056  ...  0.617464 -0.543036 -0.321695 -1.778676  1.118608   
3 -0.720903  0.310497  ...  0.672421 -1.942345  0.366181 -1.226904  1.152494   
4  0.054270  0.297078  ...  0.144922  0.203202 -0.150227 -0.026890  0.106822   

       x995      x996      x997      x