# Libraries

In [8]:
import os
from pathlib import Path
import numpy as np
import pickle

from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import scipy

In [None]:
features_path = Path.cwd()
notebooks_path = features_path.parent
repo_path = notebooks_path.parent
os.chdir(str(features_path))
#print current working directory
print(os.getcwd())

# Functions

In [7]:
def save_pickle(file, filename):
    """save as pickle

    Args:
        file (obj): object to save
        filename (str): path of the object to save
    """
    with open(filename, 'wb') as handle:
        pickle.dump(file, handle, pickle.HIGHEST_PROTOCOL)

def open_pickle(filename):
    """open pickle file

    Args:
        filename (str): path of pickle file

    Returns:
        obj: object extracted form pickle
    """
    with open(filename, 'rb') as handle:
        return pickle.load(handle)

In [9]:
def classifier(method, scaler=StandardScaler(), verbose=4):
    """get pipe and grid for classifier

    Args:
        method (str): classifier name
        scaler (scikit object, optional): type of scaler. Defaults to StandardScaler().
        verbose (int, optional): verbose level. Defaults to 4.

    Returns:
        _type_: _description_
    """
    #ML Training setting
    if(method=='KNN'):
        param_grid = {'classifier__n_neighbors': list(range(1,32))}
        pipe = Pipeline([('scaler', scaler),('classifier',KNeighborsClassifier())])
        grid = GridSearchCV(pipe, param_grid, verbose = verbose)
        return grid, pipe
    elif(method=='RF'):
        param_grid = {'classifier__n_estimators': [100, 200, 400],}
        pipe = Pipeline([('scaler', scaler),('classifier',RandomForestClassifier())])
        grid = GridSearchCV(pipe, param_grid, verbose = verbose)
        return grid, pipe
    elif(method=='SVM'):
        if(grid=='normal'):
            param_grid = {'classifier__C': [0.1, 1, 10, 100],
                    'classifier__gamma': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001],
                    'classifier__kernel': ['rbf'],'classifier__class_weight':['balanced']}
            pipe = Pipeline([('scaler', scaler),('classifier',SVC())])
            grid = GridSearchCV(pipe, param_grid, scoring='accuracy', verbose = verbose)
            return grid, pipe
        elif(grid=='random'):
            parameters = {'classifier__C': scipy.stats.expon(scale=10), 'classifier__gamma': scipy.stats.expon(scale=.001), #Parameters for grid search
            'classifier__kernel': ['rbf'], 'classifier__class_weight':['balanced']}
            pipe = Pipeline([('scaler', scaler),('classifier',SVC())]) #Definition of pipeline
            grid = RandomizedSearchCV(pipe, parameters,n_iter=100, scoring='accuracy', verbose=verbose, return_train_score=False) #Random search
            return grid, pipe

# IMP

In [None]:
# Read train and valiadation data
meta = pd.read_csv('data/meta_info.csv', sep='\t')