## Imports

In [None]:
# General
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# ML

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC

# Custom
import sys,os
sys.path.append( '.' )
sys.path.append( '..' )
import Components.Outlier_Detection as Outlier_Detection
import Components.Feature_Selection as Feature_Selection
import Components.Normalisation as Normalisation
import Components.data_fetching as data_fetching
import Components.Data_Augmentation as Data_Augmentation
import Components.wrapper as wrapper


# CAREFUL:
# If you make changes to a custom module, you have to reload it, i.e rerun this cell
import importlib
importlib.reload(Data_Augmentation)
importlib.reload(Outlier_Detection)
importlib.reload(Feature_Selection)
importlib.reload(Normalisation)
importlib.reload(data_fetching)
importlib.reload(wrapper)

<module 'Components.wrapper' from '../Components/wrapper.py'>

## Data Preprocessing

### Data Import

In [None]:
X, y = data_fetching.get_train_data()
x_test = data_fetching.get_test_data()

In [None]:
print(X.head())

         x0        x1        x2        x3        x4        x5        x6  \
0 -1.498973  1.448201  2.784979  1.905992  1.285007 -0.571679  1.253798   
1 -0.521470 -0.493049  0.891382 -0.080855  0.227825 -0.167394 -0.426608   
2 -0.417724 -0.019106  0.938377 -0.670472  0.298922  0.917788  0.189585   
3 -0.471972  0.000398  0.784836  1.088817 -0.436292  0.023086  0.611958   
4  0.201026 -0.579901  0.638809 -0.614121  0.468388  0.535726  0.271890   

         x7        x8        x9  ...      x990      x991      x992      x993  \
0 -2.590709  1.379211 -1.553323  ...  2.638401 -1.365574  2.856497 -1.916006   
1  0.371071 -0.065361 -0.271039  ...  0.662354 -0.443464 -0.540985 -0.164082   
2 -0.259406  0.591056 -1.391407  ...  0.617464 -0.543036 -0.321695 -1.778676   
3 -0.720903  0.310497 -0.703081  ...  0.672421 -1.942345  0.366181 -1.226904   
4  0.054270  0.297078 -0.677568  ...  0.144922  0.203202 -0.150227 -0.026890   

       x994      x995      x996      x997      x998      x999  
0  1

### Normalisation

In [None]:
X = Normalisation.gaussian(X)
x_test = Normalisation.gaussian(x_test)

### Model train

In [None]:
clf = OneVsRestClassifier(SVC(class_weight='balanced'))
scores = cross_val_score(clf, X,y, cv=3, scoring= 'balanced_accuracy')
print(scores)
print(np.mean(scores))

[0.69       0.69805556 0.69027778]
0.6927777777777777


### to get classification report do a train test split