## Imports

In [None]:
!pip install jupyternotify
%load_ext jupyternotify



<IPython.core.display.Javascript object>

In [None]:
# General
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# ML
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score

# Custom
import sys,os
sys.path.append( '.' )
sys.path.append( '..' )
import Components.Outlier_Detection as Outlier_Detection
import Components.Feature_Selection as Feature_Selection
import Components.Normalisation as Normalisation
import Components.data_fetching as data_fetching
import Components.Data_Augmentation as Data_Augmentation
import Components.wrapper as wrapper


# CAREFUL:
# If you make changes to a custom module, you have to reload it, i.e rerun this cell
import importlib
importlib.reload(Data_Augmentation)
importlib.reload(Outlier_Detection)
importlib.reload(Feature_Selection)
importlib.reload(Normalisation)
importlib.reload(data_fetching)
importlib.reload(wrapper)

<module 'Components.wrapper' from '../Components/wrapper.py'>

## Data Preprocessing

### Data Import

In [None]:
x_train, y_train = data_fetching.get_train_data()
x_test = data_fetching.get_test_data()

## Model

### Pipeline Setup

In [None]:
# we need: -scaling transformer -weighted loss function -multi-class model 
pipe = Pipeline([('scaling',StandardScaler()),
                 ('classification',
                  SVC(C=1.0,kernel='rbf',gamma='scale',shrinking=True,cache_size=1000,class_weight='balanced'))
                ])


### Grid search

In [None]:
%%notify
C_range = np.logspace(-3, 3, 7)
gamma_range = np.logspace(-3, 3, 7)
parameters = [{'classification__kernel': ['rbf'], 'classification__gamma': gamma_range,'classification__C': C_range},
              {'classification__kernel': ['linear'], 'classification__C': C_range},
              {'classification__kernel': ['poly'], 'classification__C':C_range, 'classification__degree':[2,3,4,5]},
              {'classification__kernel':['sigmoid'],'classification__C':C_range}
             ]

clf = GridSearchCV(pipe, parameters,cv=10,n_jobs=16,scoring='balanced_accuracy')
clf.fit(x_train, np.ravel(y_train))

# View The Best Parameters
print(clf.best_params_)

{'classification__C': 0.001, 'classification__kernel': 'linear'}


<IPython.core.display.Javascript object>

In [None]:
print(clf.best_score_)

0.6962037037037037


In [None]:
results = pd.DataFrame(clf.cv_results_)

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None, "display.max_colwidth",200) 
results[["params","mean_test_score"]]

Unnamed: 0,params,mean_test_score
0,"{'classification__C': 0.001, 'classification__gamma': 0.001, 'classification__kernel': 'rbf'}",0.498241
1,"{'classification__C': 0.001, 'classification__gamma': 0.01, 'classification__kernel': 'rbf'}",0.341944
2,"{'classification__C': 0.001, 'classification__gamma': 0.1, 'classification__kernel': 'rbf'}",0.333333
3,"{'classification__C': 0.001, 'classification__gamma': 1.0, 'classification__kernel': 'rbf'}",0.333333
4,"{'classification__C': 0.001, 'classification__gamma': 10.0, 'classification__kernel': 'rbf'}",0.333333
5,"{'classification__C': 0.001, 'classification__gamma': 100.0, 'classification__kernel': 'rbf'}",0.333333
6,"{'classification__C': 0.001, 'classification__gamma': 1000.0, 'classification__kernel': 'rbf'}",0.333333
7,"{'classification__C': 0.01, 'classification__gamma': 0.001, 'classification__kernel': 'rbf'}",0.626481
8,"{'classification__C': 0.01, 'classification__gamma': 0.01, 'classification__kernel': 'rbf'}",0.333333
9,"{'classification__C': 0.01, 'classification__gamma': 0.1, 'classification__kernel': 'rbf'}",0.333333
