In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt                
from sklearn.preprocessing import LabelEncoder
from src.wrapped import Wrapped
from src.analysesV02 import Analytics 
from src.trainV02 import TrainModels
from skopt.space import Real, Integer, Categorical
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier

# Importando dados para treinamento

In [2]:
ac = Analytics()
tm = TrainModels()
wp = Wrapped(
    '../data/row/',
    '../data/processed/',
    '../data/files/'
)

# dataframe
df = wp.load_data('df_instrumentos_features_selecionadas').drop(columns=['file_name'])
df = df.query("instrumento != 'voice' & instrumento != 'synthesizer'")

# dataframes por instrumentos
inst_corda    = ["cello", "guitar", "violin", "bass", "banjo", "mandolin", "ukulele"]
inst_percusao = ["mallet_percussion", "drums", "cymbals"]
inst_sopro    = ["clarinet", "trombone", "flute", "trumpet", "saxophone"]
inst_aerofone = ["accordion", "organ", "piano"] 

df_inst_aerofone = df[df['instrumento'].isin(inst_aerofone)]
df_inst_sopro    = df[df['instrumento'].isin(inst_sopro)]
df_inst_corda    = df[df['instrumento'].isin(inst_corda)]
df_inst_percusao = df[df['instrumento'].isin(inst_percusao)]

# Pre-processamento

- Label Encoder

In [3]:
le = LabelEncoder()

df_inst_aerofone['labels'] = le.fit_transform(df_inst_aerofone.instrumento)
df_inst_sopro['labels']    = le.fit_transform(df_inst_sopro.instrumento)
df_inst_corda['labels']    = le.fit_transform(df_inst_corda.instrumento)
df_inst_percusao['labels'] = le.fit_transform(df_inst_percusao.instrumento)

# Analise de Anomalias

#### Tunning de Hiperparametros

- XGBoost

In [5]:
parametros = {
    "eta": Real(0.01, 0.2),
    "max_depth": Integer(6, 20),
    "gamma":  Integer(0, 10),
    "learning_rate": Real(0,1),
    "subsample": Real(0.5, 1)
}

tm.train_tunning_hyperparameters(
    dataframe=df_inst_corda, 
    model=XGBClassifier(),
    parameters=parametros, 
    filename="resultados_parametros_corda_XGBoost"
)

interação 0 - Metric: euclidean, Algoritmo: auto, neighbor: 3
interação 1 - Metric: manhattan, Algoritmo: auto, neighbor: 3
interação 2 - Metric: minkowski, Algoritmo: auto, neighbor: 3
interação 3 - Metric: euclidean, Algoritmo: ball_tree, neighbor: 3
interação 4 - Metric: manhattan, Algoritmo: ball_tree, neighbor: 3
interação 5 - Metric: minkowski, Algoritmo: ball_tree, neighbor: 3
interação 6 - Metric: euclidean, Algoritmo: kd_tree, neighbor: 3
interação 7 - Metric: manhattan, Algoritmo: kd_tree, neighbor: 3
interação 8 - Metric: minkowski, Algoritmo: kd_tree, neighbor: 3
interação 9 - Metric: euclidean, Algoritmo: brute, neighbor: 3
interação 10 - Metric: manhattan, Algoritmo: brute, neighbor: 3
interação 11 - Metric: minkowski, Algoritmo: brute, neighbor: 3
interação 12 - Metric: euclidean, Algoritmo: auto, neighbor: 5
interação 13 - Metric: manhattan, Algoritmo: auto, neighbor: 5
interação 14 - Metric: minkowski, Algoritmo: auto, neighbor: 5
interação 15 - Metric: euclidean, Algo

Unnamed: 0,metric_detected_outiler,algorithm_detected_outiler,neighbors_detected_outiler,parametos_models,accuracy_models
0,euclidean,auto,3,"{'eta': 0.04678835269469437, 'gamma': 0, 'lear...",52.525253
1,manhattan,auto,3,"{'eta': 0.15858960267655547, 'gamma': 4, 'lear...",52.161616
2,minkowski,auto,3,"{'eta': 0.16578653877946736, 'gamma': 0, 'lear...",50.707071
3,euclidean,ball_tree,3,"{'eta': 0.01, 'gamma': 0, 'learning_rate': 0.1...",53.259259
4,manhattan,ball_tree,3,"{'eta': 0.0877428847157884, 'gamma': 3, 'learn...",52.16835
5,minkowski,ball_tree,3,"{'eta': 0.2, 'gamma': 0, 'learning_rate': 0.26...",52.915825
6,euclidean,kd_tree,3,"{'eta': 0.18691183596853658, 'gamma': 2, 'lear...",52.16835
7,manhattan,kd_tree,3,"{'eta': 0.18310315230961735, 'gamma': 0, 'lear...",53.62963
8,minkowski,kd_tree,3,"{'eta': 0.1182346811524497, 'gamma': 4, 'learn...",51.070707
9,euclidean,brute,3,"{'eta': 0.01, 'gamma': 4, 'learning_rate': 0.1...",52.545455


- HistGBM

In [None]:
parametros = {
    "min_samples_leaf": Integer(5, 20),
    "max_depth": Integer(6, 20),
    "loss": Categorical(['log_loss','auto','categorical_crossentropy']), 
    "max_bins": Integer(100, 250)
}

tm.train_tunning_hyperparameters(
    dataframe=df_inst_sopro, 
    model=HistGradientBoostingClassifier(), 
    parameters=parametros, 
    filename="resultados_parametros_sopro_histGB"
)

- Random Forest

In [None]:
parametros = {
    "criterion": Categorical(['gini','entropy']),
    "max_depth": Integer(6, 20),
    "min_samples_split": Integer(2, 10),
    "min_samples_leaf": Integer(2, 10),
    "max_features": Categorical(['auto', 'sqrt','log2']), 
    "bootstrap": Categorical([True, False]),
    "n_estimators": Integer(100, 500)
}

tm.train_tunning_hyperparameters(
    dataframe=df_inst_aerofone, 
    model=RandomForestClassifier(), 
    parameters=parametros, 
    filename="resultados_parametros_aerofone_random_forest"
)