In [None]:
!pip install imblearn


In [None]:
# General
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# ML

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import StandardScaler
from imblearn.combine import SMOTEENN
from sklearn.pipeline import Pipeline



# Custom
import sys,os
sys.path.append( '.' )
sys.path.append( '..' )
import Components.Outlier_Detection as Outlier_Detection
import Components.Feature_Selection as Feature_Selection
import Components.Normalisation as Normalisation
import Components.data_fetching as data_fetching
import Components.Data_Augmentation as Data_Augmentation
import Components.wrapper as wrapper
import Components.two_step_utils as two_step_utils




# CAREFUL:
# If you make changes to a custom module, you have to reload it, i.e rerun this cell
import importlib
importlib.reload(Data_Augmentation)
importlib.reload(Outlier_Detection)
importlib.reload(Feature_Selection)
importlib.reload(Normalisation)
importlib.reload(data_fetching)
importlib.reload(wrapper)
importlib.reload(two_step_utils)

<module 'Components.two_step_utils' from '../Components/two_step_utils.py'>

### Data Imports

In [None]:
X, y = data_fetching.get_train_data()
x_test = data_fetching.get_test_data()



### Normalisation

In [None]:
X = Normalisation.gaussian(X)
x_test = Normalisation.gaussian(x_test)

# TASK 1

In [None]:
# get X_1 y_1 etc:
X_1, y_1, X_2, y_2 = two_step_utils.transform(X,y)

### Pipeline setup

In [None]:
# we need: -scaling transformer -weighted loss function -multi-class model 
pipe = Pipeline([
                 ('classification',
                  SVC(C=1.0,kernel='rbf',gamma='scale',shrinking=True,cache_size=1000,class_weight='balanced'))
                ])

In [None]:
gamma_range = np.logspace(-5, -3, 3)
parameters = [{'classification__kernel': ['rbf'], 'classification__gamma': gamma_range,'classification__C': [0.1,0.5,1.0,5.0]},
              {'classification__kernel': ['linear'], 'classification__C': np.logspace(-5,-3,3)}]

clf = GridSearchCV(pipe, parameters,cv=10,n_jobs=-1,scoring='balanced_accuracy', verbose=10)




In [None]:
clf.fit(X_1, y_1)

KeyboardInterrupt: 

In [None]:
print(clf.best_params_)
print(clf.best_score_)

# TASK 2

In [None]:
clf.fit(X_1, y_1)

In [None]:
print(clf.best_params_)
print(clf.best_score_)