# Tune hyper-parameters SVC model

In [1]:
%matplotlib inline

# Reading files
import h5py
import toml

# Scientific computing
import numpy as np
import pandas as pd
from scipy import interp

# Plot
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
#sns.set_context('paper')

# Machine Learning
## Model
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
## Splitter Classes
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
# Splitter Functions
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
# Hyper-parameter optimizers
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
# Model validation
from sklearn.model_selection import learning_curve
# Training metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

# Other
import os
import time
import requests

Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html


# NO MULTI GLITCH

## Preparation

Load data and target from `classification/ris/OUT-classified-merged.h5` and load into numpy arrays.

**Label `0` = NO GLITCH**

**Label `1` = GLITCH**

In [2]:
first_cycle = True
with pd.HDFStore('../../classification/ris/OUT-classified-merged.h5', mode='r') as in_data:
    for group in ['GLITCH', 'NO_GLITCH']:
        if first_cycle == True:
            data = np.array(in_data[group].to_numpy())
            if group == 'GLITCH':
                target = np.ones(len(data))
            elif group == 'NO_GLITCH':
                target = np.zeros(len(data))
            else:
                print("ERROR.")
            first_cycle = False
        else:
            data = np.concatenate((data, in_data[group].to_numpy()))
            if group == 'GLITCH':
                target = np.concatenate((target, np.ones(len(in_data[group].to_numpy()))))
            elif group == 'NO_GLITCH':
                target = np.concatenate((target, np.zeros(len(in_data[group].to_numpy()))))
            else:
                print("ERROR.")

## Tuning the hyper-parameters

### Kernel: `rbf`

#### Grid Search

### Kernel: `sigmoid`

Best parameters set found on development set: `{'C': 0.38000000000000006, 'coef0': -1.55, 'gamma': 0.0029500000000000012}`

Score: $0.7365370516420923$

#### Randomized Search

#### Grid Search

### Bagging Classifier

# YES MULTI GLITCH

## Preparation

Load data and target from `classification/ris/OUT-classified-merged.h5` and load into numpy arrays.

**Label `0` = NO GLITCH**

**Label `1` = GLITCH and MULTI GLITCH**

## Tuning the hyper-parameters

### Kernel: `rbf`

#### Randomized Search

#### Grid Search