# XGBoost Classifier

In [10]:
# For browser notification
# !pip install jupyternotify
# %load_ext jupyternotify
# !pip install xgboost
# !pip install imblearn

# General
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# ML
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score
from sklearn.feature_selection import mutual_info_classif
import xgboost as xgb

# Custom
import sys,os
sys.path.append( '.' )
sys.path.append( '..' )
import Components.Outlier_Detection as Outlier_Detection
import Components.Feature_Selection as Feature_Selection
import Components.Normalisation as Normalisation
import Components.data_fetching as data_fetching
import Components.Data_Augmentation as Data_Augmentation
import Components.wrapper as wrapper

# CAREFUL:
# If you make changes to a custom module, you have to reload it, i.e rerun this cell
import importlib
importlib.reload(Data_Augmentation)
importlib.reload(Outlier_Detection)
importlib.reload(Feature_Selection)
importlib.reload(Normalisation)
importlib.reload(data_fetching)
importlib.reload(wrapper)


<module 'Components.wrapper' from '../Components/wrapper.py'>

# Data Preprocessing

In [21]:
x_train, y_train = data_fetching.get_train_data()
y_train = np.ravel(y_train)
x_test = data_fetching.get_test_data()

In [24]:
print(x_train.head())

        0         1         2         3         4         5         6    \
0 -1.498973  1.448201  2.784979  1.905992  1.285007 -0.571679  1.253798   
1 -0.521470 -0.493049  0.891382 -0.080855  0.227825 -0.167394 -0.426608   
2 -0.417724 -0.019106  0.938377 -0.670472  0.298922  0.917788  0.189585   
3 -0.471972  0.000398  0.784836  1.088817 -0.436292  0.023086  0.611958   
4  0.201026 -0.579901  0.638809 -0.614121  0.468388  0.535726  0.271890   

        7         8         9    ...       990       991       992       993  \
0 -2.590709  1.379211 -1.553323  ...  2.638401 -1.365574  2.856497 -1.916006   
1  0.371071 -0.065361 -0.271039  ...  0.662354 -0.443464 -0.540985 -0.164082   
2 -0.259406  0.591056 -1.391407  ...  0.617464 -0.543036 -0.321695 -1.778676   
3 -0.720903  0.310497 -0.703081  ...  0.672421 -1.942345  0.366181 -1.226904   
4  0.054270  0.297078 -0.677568  ...  0.144922  0.203202 -0.150227 -0.026890   

        994       995       996       997       998       999  
0  1

In [23]:
mutual_info_classif(x_train,y_train)

array([6.03844263e-02, 8.60610559e-02, 5.13496500e-02, 1.29712093e-01,
       1.99019289e-02, 1.27995882e-03, 3.84374391e-02, 9.68442770e-02,
       1.78532206e-02, 2.45817574e-02, 3.30027579e-02, 4.24420825e-02,
       3.14169868e-02, 1.41408912e-02, 9.71728519e-03, 4.71686316e-02,
       7.79479335e-02, 9.02380553e-02, 1.84262478e-02, 1.66257500e-02,
       1.21174244e-01, 4.47365287e-02, 1.98588332e-02, 7.00113721e-02,
       3.91665308e-02, 7.54686437e-02, 1.10558345e-01, 1.14054607e-02,
       7.20932120e-02, 2.54616946e-02, 1.35831242e-01, 8.56129932e-02,
       2.95621796e-02, 4.17712634e-02, 7.41315643e-02, 1.46675938e-01,
       4.73176089e-02, 2.57732300e-02, 8.11059928e-02, 9.86418215e-02,
       1.19383519e-02, 7.28333427e-03, 2.74922184e-02, 3.74053521e-02,
       1.99651900e-02, 1.24244155e-01, 4.18628855e-02, 3.16910180e-02,
       1.14880744e-01, 1.30894462e-01, 1.13501111e-01, 2.11494857e-02,
       3.57316117e-02, 3.23873027e-02, 1.63193449e-01, 1.14040581e-01,
      