### Importing libraries and helper methods

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../../models/'))

In [1]:
import warnings
from sklearn.exceptions import UndefinedMetricWarning

# We're ignoring some warning from sklearn.metrics.classification_report
warnings.simplefilter(action='ignore', category=UndefinedMetricWarning)

In [3]:
import numpy as np
from helpers.utils import print_metrics
from sklearn.model_selection import RandomizedSearchCV, train_test_split, GridSearchCV
from sklearn.multiclass import OneVsRestClassifier
from helpers.database_helpers import get_stratified_kfold, get_iris
from models.icq_estimators import IcqClassifier
from helpers.icq_methods import create_and_execute_classifier_new_approach

### Setting up database, k-fold and random seed

In [4]:
seed = 42

In [5]:
X, y = get_iris()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
cv = get_stratified_kfold(random_seed=seed)
icq = OneVsRestClassifier(IcqClassifier(classifier_function=create_and_execute_classifier_new_approach, max_iter=2000, random_seed=seed, plot_graphs_and_metrics=False))

# Search for Sigma Q Params
First research on the new approach (having weights on U operator and inputs on rho env) - varying SigmaQ params

## Integer params
On this phase, we're dealing only with integer params, i.e. [1, 1, 1, 1], [2, 2, 2, 2], etc.

In [6]:
params = dict()
params["estimator__sigma_q_weights"] = []
params["estimator__classifier_function"] = [create_and_execute_classifier_new_approach]
params["estimator__max_iter"] = [2000]
params["estimator__random_seed"] = [seed]
params["estimator__accuracy_succ"] = [1.00]
for i in range(0, 15, 1):
    for j in range(0, 15, 1):
        for k in range(0, 15, 1):
            for l in range(0, 15, 1):
                if not(i == j and j == k and i == 1 and l == 0):
                    params["estimator__sigma_q_weights"].append([i, j, k, l])

In [7]:
%%time
busca = RandomizedSearchCV(icq, params, n_iter=50, scoring='accuracy', n_jobs=-1, cv=cv, random_state=seed, verbose=100)

# Executa busca
resultado = busca.fit(X_train, y_train)

# Resume resultados
print('Resultados busca - ICQ Alterado')
print('Melhor acurácia: %s' % resultado.best_score_)
print('Melhor hiperparâmetro: %s' % resultado.best_params_)

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Resultados busca - ICQ Alterado
Melhor acurácia: 0.8916666666666666
Melhor hiperparâmetro: {'estimator__sigma_q_weights': [1, 0, 12, 1], 'estimator__random_seed': 42, 'estimator__max_iter': 2000, 'estimator__classifier_function': <function create_and_execute_classifier_new_approach at 0x0000016BE1EC08B0>, 'estimator__accuracy_succ': 1.0}
Wall time: 9h 19min 28s


## Float params
On this phase, we'll deal with Sigma Q params as floats [0,1].

In [8]:
params = dict()
params["estimator__sigma_q_weights"] = []
params["estimator__classifier_function"] = [create_and_execute_classifier_new_approach]
params["estimator__max_iter"] = [2000]
params["estimator__random_seed"] = [seed]
params["estimator__accuracy_succ"] = [1.00]
for i in np.arange(0, 1, 0.1):
    for j in np.arange(0, 1, 0.1):
        for k in np.arange(0, 1, 0.1):
            for l in np.arange(0, 1, 0.1):
                if not(i == j and j == k and i == 1 and l == 0):
                    params["estimator__sigma_q_weights"].append([i, j, k, l])

In [9]:
%%time
busca = RandomizedSearchCV(icq, params, n_iter=50, scoring='accuracy', n_jobs=-1, cv=cv, random_state=seed, verbose=1)

# Executa busca
resultado = busca.fit(X_train, y_train)

# Resume resultados
print('Resultados busca - ICQ Alterado')
print('Melhor acurácia: %s' % resultado.best_score_)
print('Melhor hiperparâmetro: %s' % resultado.best_params_)

Fitting 10 folds for each of 50 candidates, totalling 500 fits
Resultados busca - ICQ Alterado
Melhor acurácia: 0.9166666666666666
Melhor hiperparâmetro: {'estimator__sigma_q_weights': [0.7000000000000001, 0.0, 0.4, 0.1], 'estimator__random_seed': 42, 'estimator__max_iter': 2000, 'estimator__classifier_function': <function create_and_execute_classifier_new_approach at 0x0000016BE1EC08B0>, 'estimator__accuracy_succ': 1.0}
Wall time: 7h 20min 35s


# Search for Learning Rates
Since we didn't have any result varying the Sigma Q, let's now vary the learning rate param and see what happens.

## Small subset of learning rates

In [10]:
params = dict()
params["estimator__sigma_q_weights"] = [[1,1,1,0]]
params["estimator__classifier_function"] = [create_and_execute_classifier_new_approach]
params["estimator__max_iter"] = [2000]
params["estimator__random_seed"] = [seed]
params["estimator__accuracy_succ"] = [1.0]
params["estimator__learning_rate"] = [0.1, 0.01, 0.001, 0.2, 0.02, 0.002, 0.0001]

In [11]:
%%time
busca = RandomizedSearchCV(icq, params, n_iter=10, scoring='accuracy', n_jobs=-1, cv=cv, random_state=seed)

# Executa busca
resultado = busca.fit(X_train, y_train)

# Resume resultados
print('Resultados busca - ICQ Alterado')
print('Melhor acurácia: %s' % resultado.best_score_)
print('Melhor hiperparâmetro: %s' % resultado.best_params_)



Resultados busca - ICQ Alterado
Melhor acurácia: 0.9583333333333333
Melhor hiperparâmetro: {'estimator__sigma_q_weights': [1, 1, 1, 0], 'estimator__random_seed': 42, 'estimator__max_iter': 2000, 'estimator__learning_rate': 0.001, 'estimator__classifier_function': <function create_and_execute_classifier_new_approach at 0x00000280A6918DC0>, 'estimator__accuracy_succ': 1.0}
Wall time: 46min 24s


## Biggest subset of learning rates
Let's now try with a biggest subset of learning rates, which will take longer, but hopefully will earn better results. We'll try with:
- [0.1, 0.2, ..., 0.9]; +
- [0.01, 0.2, ..., 0.9]; +
- [0.001, 0.002, ..., 0.009]; +
- [0.0001, 0.0002, ..., 0.0009]; +
- [0.00001, 0.00002, ..., 0.00009]; +
- [0.000001, 0.000002, ..., 0.000009];

In [7]:
params = dict()
params["estimator__sigma_q_weights"] = [[1,1,1,0]]
params["estimator__classifier_function"] = [create_and_execute_classifier_new_approach]
params["estimator__max_iter"] = [2000]
params["estimator__random_seed"] = [seed]
params["estimator__accuracy_succ"] = [1.0]
params["estimator__learning_rate"] = []
for i in range (1, 10):
    params["estimator__learning_rate"].append(0.1 * i)
    params["estimator__learning_rate"].append(0.01 * i)
    params["estimator__learning_rate"].append(0.001 * i)
    params["estimator__learning_rate"].append(0.0001 * i)
    params["estimator__learning_rate"].append(0.00001 * i)
    params["estimator__learning_rate"].append(0.000001 * i)

In [8]:
%%time
busca = GridSearchCV(icq, params, scoring='accuracy', n_jobs=-1, cv=cv, verbose=10)

# Executa busca
resultado = busca.fit(X_train, y_train)

# Resume resultados
print('Resultados busca - ICQ Alterado')
print('Melhor acurácia: %s' % resultado.best_score_)
print('Melhor hiperparâmetro: %s' % resultado.best_params_)

Fitting 10 folds for each of 54 candidates, totalling 540 fits
Resultados busca - ICQ Alterado
Melhor acurácia: 0.9666666666666666
Melhor hiperparâmetro: {'estimator__accuracy_succ': 1.0, 'estimator__classifier_function': <function create_and_execute_classifier_new_approach at 0x000001F85E6CE8B0>, 'estimator__learning_rate': 0.0008, 'estimator__max_iter': 2000, 'estimator__random_seed': 42, 'estimator__sigma_q_weights': [1, 1, 1, 0]}
Wall time: 8h 24min 59s
