In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from pyod.models.knn import KNN 

from sklearn.feature_selection import( 
    RFECV,
    SequentialFeatureSelector,
    SelectFromModel
)

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import(
    RandomForestClassifier,
    HistGradientBoostingClassifier
)

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from src.wrapped import Wrapped
from src.train import cross_validate_balancead, train_feature_combination

In [2]:
wp = Wrapped(
    '../data/row/',
    '../data/processed/',
    '../data/files/'
)

# Importando dados para treinamento

In [3]:
df = wp.load_data('df_instrumentos_features_selecionadas')

In [4]:
# instrumentos aerofone
inst_aerofone = ["acordion", "organ", "piano"] 

df = df[df['instrumento'].isin(inst_aerofone)]

- Definindo as variaveis de treino e o target

In [5]:
le = LabelEncoder()
df['labels'] = le.fit_transform(df.instrumento)

In [6]:
X = df.drop(columns=['labels', 'instrumento','file_name'])
y = df['labels'].to_frame()

# TESTE 1: Balanceamento

- 1° Opção: Balanceamento das classes com SMOTH

In [7]:
t1_models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    MLPClassifier(),
    SVC(),
])

t1_acuracy_models = [cross_validate_balancead(k=5, model=model, X=X, y=y, oversampling=True) for model in t1_models]

dict_results_t1 = {
    'Naive Bayes': t1_acuracy_models[0],
    'KNN': t1_acuracy_models[1],
    'Arvore de Decisão': t1_acuracy_models[2],
    'Floresta Aleatoria': t1_acuracy_models[3],
    'HistGradientBoosting': t1_acuracy_models[4],
    'LIGHTGBM': t1_acuracy_models[5],
    'MLP': t1_acuracy_models[6],
    'SVC': t1_acuracy_models[7],
}

dict_results_t1

Acuracia do modelo GaussianNB() do Fold 0: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 1: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 2: 0.8024193548387096
Acuracia do modelo GaussianNB() do Fold 3: 0.7258064516129032
Acuracia do modelo GaussianNB() do Fold 4: 0.7732793522267206
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6491935483870968
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6895161290322581
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6975806451612904
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.625
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6532258064516129
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7217741935483871
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6612903225806451
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6653225806451613
Acuracia do modelo DecisionTreeClassifier() do F

{'Naive Bayes': 0.7570752252840538,
 'KNN': 0.6739584693744287,
 'Arvore de Decisão': 0.6771646859083191,
 'Floresta Aleatoria': 0.7974108658743633,
 'HistGradientBoosting': 0.7998367506856472,
 'LIGHTGBM': 0.8143659396630534,
 'MLP': 0.6794893561447042,
 'SVC': 0.6941230246832963}

- 2° Opção: Class Weight

In [8]:
models_class_weight = np.array([
    DecisionTreeClassifier(class_weight='balanced'), 
    RandomForestClassifier(class_weight='balanced', random_state = 0, n_jobs = -1), 
    LGBMClassifier(class_weight='balanced'),
])

t1_acuracy_models_class_weight = [cross_validate_balancead(k=5, model=model, X=X, y=y, weight=True) for model in models_class_weight]

dict_results_t1_class_weight = {
    'Arvore de Decisão': t1_acuracy_models_class_weight[0],
    'Floresta Aleatoria': t1_acuracy_models_class_weight[1],
    'LIGHTGBM': t1_acuracy_models_class_weight[2],
}

dict_results_t1_class_weight

Acuracia do modelo DecisionTreeClassifier(class_weight='balanced') do Fold 0: 0.6653225806451613
Acuracia do modelo DecisionTreeClassifier(class_weight='balanced') do Fold 1: 0.6935483870967742
Acuracia do modelo DecisionTreeClassifier(class_weight='balanced') do Fold 2: 0.6774193548387096
Acuracia do modelo DecisionTreeClassifier(class_weight='balanced') do Fold 3: 0.6733870967741935
Acuracia do modelo DecisionTreeClassifier(class_weight='balanced') do Fold 4: 0.6720647773279352
Acuracia do modelo RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=0) do Fold 0: 0.7943548387096774
Acuracia do modelo RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=0) do Fold 1: 0.7862903225806451
Acuracia do modelo RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=0) do Fold 2: 0.8185483870967742
Acuracia do modelo RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=0) do Fold 3: 0.7943548387096774
Acuracia do modelo 

{'Arvore de Decisão': 0.6763484393365548,
 'Floresta Aleatoria': 0.8014627138566018,
 'LIGHTGBM': 0.8079012668146793}

# TESTE 2: Remover Outlier

- Treinar modelo de classificação de outilier

In [9]:
detector_outilier = KNN()

new_df = df.drop(columns=['instrumento','file_name'])

detector_outilier.fit(new_df)

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0)

- Checar numeros de outilers e não outilers<br/>
**0 (False) Não é outiler**<br/>
**1 (True) É outiler**

In [10]:
previsions = detector_outilier.labels_
np.unique(previsions, return_counts=True)

(array([0, 1]), array([1115,  124]))

- Distancia euclidiana dos registros

In [11]:
confiance_previsions = detector_outilier.decision_scores_
confiance_previsions

array([423.62695697, 769.93888098, 606.54722631, ..., 393.4113039 ,
       708.25321334, 312.90606476])

- Checar os outilers

In [12]:
# pegando indice dos registros que são outilers
outilers_id = [previsions[i] == 1 for i in range(previsions.shape[0])]
outilers = df.iloc[outilers_id,:]
outilers

Unnamed: 0,tonnetz0,tonnetz1,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,mfcc0,...,spectral_centroid,spectral_onset,spectral_rolloff,spectogram,mel_spectogram,zero_crossing_rate,zero_crossing,instrumento,file_name,labels
4753,-0.338461,0.166383,0.054567,0.177930,0.132602,0.279368,0.332723,0.581587,0.183638,-229.021957,...,1116.046282,0.710854,1804.692258,-71.231659,-80.000000,0.101791,22509,organ,001225_92160.ogg,0
4759,0.113469,0.061886,0.244864,0.338876,0.389752,0.293442,0.463162,0.364057,0.630125,-67.292572,...,1902.642238,1.126650,3426.202407,-56.919830,-77.357018,0.121871,26933,organ,004472_203520.ogg,0
4770,-0.018969,-0.209658,0.061133,0.108866,0.055079,0.041505,0.122339,0.209809,1.000000,-66.966331,...,2855.082325,0.920277,5307.809668,-47.889267,-55.215321,0.268291,59343,organ,009797_933120.ogg,0
4777,-0.087780,-0.248317,0.106441,0.498834,0.211081,0.170587,0.026591,0.098398,0.626627,-243.035324,...,1669.312345,0.998628,2339.125376,-47.927460,-74.754349,0.141556,31274,organ,012428_215040.ogg,0
4782,0.088565,0.044894,0.509603,0.293923,0.341588,0.189993,0.230675,0.171667,0.289855,-78.201912,...,2170.705481,1.331679,5474.579580,-40.272797,-69.009399,0.054556,12064,organ,014711_103680.ogg,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5785,0.011829,0.056785,0.594120,0.377562,0.342001,0.340806,0.540833,0.372891,0.459916,-55.257523,...,2225.204095,0.985631,5055.481495,-41.028175,-38.847610,0.105147,23260,piano,113685_15360.ogg,1
5810,0.181668,-0.022978,0.230894,0.246150,0.419428,0.244691,0.388288,0.281466,0.235830,-183.096222,...,1619.894011,1.040480,3768.910079,-32.043541,-36.987198,0.054055,11956,piano,118795_241920.ogg,1
5819,0.027706,0.089406,0.434948,0.125371,0.231317,0.110108,0.047079,0.052817,0.032457,-330.949127,...,1205.187669,1.246012,1720.058277,-59.069580,-72.654686,0.129964,28728,piano,119297_195840.ogg,1
5836,-0.042766,-0.251569,0.054530,0.159495,0.063132,0.040510,0.032541,0.214021,0.976086,-318.622833,...,1434.749693,1.063645,2117.723080,-59.973610,-65.730003,0.081683,18146,piano,122656_0.ogg,1


In [13]:
outilers.instrumento.value_counts()

organ    102
piano     22
Name: instrumento, dtype: int64

- Agora vamos remover os outilers da nossa base

In [14]:
# pegando indice dos registros que não são outilers
outilers_id =[previsions[i] == 0 for i in range(previsions.shape[0])]
df_train = df.iloc[outilers_id,:]
df_train

Unnamed: 0,tonnetz0,tonnetz1,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,mfcc0,...,spectral_centroid,spectral_onset,spectral_rolloff,spectogram,mel_spectogram,zero_crossing_rate,zero_crossing,instrumento,file_name,labels
4749,0.093194,-0.044222,0.255362,0.456872,0.204396,0.233627,0.503311,0.279743,0.301376,-132.641800,...,1373.688478,1.184139,2556.480829,-49.299706,-67.469734,0.066916,14792,organ,000247_76800.ogg,0
4750,-0.013780,-0.080225,0.323944,0.574369,0.257362,0.199327,0.415919,0.175389,0.205804,-274.705200,...,1659.804275,0.810513,3302.748706,-46.304691,-75.902222,0.076960,17026,organ,000258_3840.ogg,0
4751,0.175323,-0.037352,0.397090,0.881431,0.322897,0.124738,0.104840,0.077247,0.133319,-192.369568,...,1176.728816,1.201949,2307.949695,-72.486847,-79.975418,0.077022,17024,organ,001089_80640.ogg,0
4752,-0.082696,-0.164400,0.192645,0.382144,0.240159,0.104088,0.142035,0.073915,0.045623,-215.710052,...,907.139528,1.057319,1788.030255,-55.473831,-79.442757,0.040779,9017,organ,001145_218880.ogg,0
4754,0.227057,0.097982,0.187913,0.301687,0.335240,0.291053,0.720657,0.320461,0.349228,-115.656960,...,1483.546718,1.006624,3098.632926,-54.068634,-54.208893,0.078745,17404,organ,001331_157440.ogg,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5983,0.306987,0.197302,0.181270,0.482163,0.806090,0.410171,0.182062,0.128068,0.223802,-208.045074,...,821.680671,0.769584,1623.308746,-52.423000,-75.093781,0.038042,8414,piano,152057_38400.ogg,1
5984,0.339596,0.193580,0.309043,0.115449,0.066983,0.121548,0.543438,0.246675,0.126754,-359.998535,...,704.896231,0.654192,1007.089425,-73.745857,-80.000000,0.042257,9343,piano,153905_72960.ogg,1
5985,0.022141,0.249860,0.019285,0.019074,0.026441,0.062063,0.285675,0.762290,0.256367,-342.691559,...,647.666659,0.788650,1045.659338,-9.425838,-39.180683,0.026027,5747,piano,154231_199680.ogg,1
5986,-0.038987,0.185848,0.182197,0.123432,0.274012,0.223736,0.119671,0.259500,0.083546,-192.736328,...,1094.419047,1.140904,1788.779671,-25.643265,-72.219749,0.081838,18093,piano,154374_92160.ogg,1


- Treinar modelos sem outlier e balanceameto

In [15]:
X_rm_outilers = df_train.drop(columns=['labels', 'instrumento','file_name'])
y_rm_outilers = df_train['labels'].to_frame()

models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    # XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t2_acuracy_models = [cross_validate_balancead(k=5, model=model, X=X_rm_outilers,  y=y_rm_outilers) for model in models]

dict_results_t2 = {
    'Naive Bayes': t2_acuracy_models[0],
    'KNN': t2_acuracy_models[1],
    'Arvore de Decisão': t2_acuracy_models[2],
    'Floresta Aleatoria': t2_acuracy_models[3],
    'HistGradientBoosting': t2_acuracy_models[4],
    'LIGHTGBM': t2_acuracy_models[5],
    # 'XGB': t2_acuracy_models[6],
    'MLP': t2_acuracy_models[6],
    'SVC': t2_acuracy_models[7],
}

dict_results_t2

Acuracia do modelo GaussianNB() do Fold 0: 0.7802690582959642
Acuracia do modelo GaussianNB() do Fold 1: 0.7533632286995515
Acuracia do modelo GaussianNB() do Fold 2: 0.7713004484304933
Acuracia do modelo GaussianNB() do Fold 3: 0.7443946188340808
Acuracia do modelo GaussianNB() do Fold 4: 0.7802690582959642
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6367713004484304
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6905829596412556
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6860986547085202
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.600896860986547
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.6860986547085202
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6367713004484304
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7488789237668162
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6816143497757847
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6816143497757847
Acuracia do modelo DecisionTreeClas

{'Naive Bayes': 0.7659192825112108,
 'KNN': 0.6600896860986547,
 'Arvore de Decisão': 0.6914798206278027,
 'Floresta Aleatoria': 0.7901345291479821,
 'HistGradientBoosting': 0.8026905829596412,
 'LIGHTGBM': 0.8026905829596412,
 'MLP': 0.7085201793721974,
 'SVC': 0.6789237668161435}

# TESTE 3: Features por Correlação

In [51]:
df_corr = df.drop(columns=['instrumento', 'file_name', 'labels']).corr().unstack().reset_index().dropna()
df_corr.rename(columns = {'level_0': 'features_a', 'level_1': 'features_b', 0:'correlacao'}, inplace = True)
df_corr

Unnamed: 0,features_a,features_b,correlacao
0,tonnetz0,tonnetz0,1.000000
1,tonnetz0,tonnetz1,0.075220
2,tonnetz0,chroma1,0.026262
3,tonnetz0,chroma2,-0.111607
4,tonnetz0,chroma3,0.300317
...,...,...,...
1931,zero_crossing,spectral_rolloff,0.743868
1932,zero_crossing,spectogram,0.001365
1933,zero_crossing,mel_spectogram,0.025331
1934,zero_crossing,zero_crossing_rate,0.999998


- Correlação Positia

In [52]:
corr_posit = df_corr.query('correlacao > 0.5 and correlacao < 1.0')
corr_posit

Unnamed: 0,features_a,features_b,correlacao
430,mfcc0,spectral_band2,0.598072
431,mfcc0,spectral_band3,0.533049
433,mfcc0,spectral_centroid,0.618657
434,mfcc0,spectral_onset,0.554714
435,mfcc0,spectral_rolloff,0.616262
...,...,...,...
1891,zero_crossing_rate,zero_crossing,0.999998
1926,zero_crossing,spectral_band2,0.540448
1929,zero_crossing,spectral_centroid,0.826604
1931,zero_crossing,spectral_rolloff,0.743868


In [53]:
len(corr_posit.features_a.unique()), corr_posit.features_a.unique()

(24,
 array(['mfcc0', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10',
        'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16',
        'mfcc17', 'spectral_band2', 'spectral_band3', 'spectral_band4',
        'spectral_centroid', 'spectral_onset', 'spectral_rolloff',
        'spectogram', 'mel_spectogram', 'zero_crossing_rate',
        'zero_crossing'], dtype=object))

In [54]:
X =  df[corr_posit.features_a.unique()]
y = df['labels'].to_frame()

models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t3_acuracy_models_corr_posit = [cross_validate_balancead(k=5, model=model, X=X, y=y) for model in models]

dict_results_t3_corr_posit = {
    'Naive Bayes': t3_acuracy_models_corr_posit[0],
    'KNN': t3_acuracy_models_corr_posit[1],
    'Arvore de Decisão': t3_acuracy_models_corr_posit[2],
    'Floresta Aleatoria': t3_acuracy_models_corr_posit[3],
    'HistGradientBoosting': t3_acuracy_models_corr_posit[4],
    'LIGHTGBM': t3_acuracy_models_corr_posit[5],
    'XGB': t3_acuracy_models_corr_posit[6],
    'MLP': t3_acuracy_models_corr_posit[7],
    'SVC': t3_acuracy_models_corr_posit[8],
}

dict_results_t3_corr_posit

Acuracia do modelo GaussianNB() do Fold 0: 0.75
Acuracia do modelo GaussianNB() do Fold 1: 0.7338709677419355
Acuracia do modelo GaussianNB() do Fold 2: 0.7741935483870968
Acuracia do modelo GaussianNB() do Fold 3: 0.7217741935483871
Acuracia do modelo GaussianNB() do Fold 4: 0.7530364372469636
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.657258064516129
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6935483870967742
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6935483870967742
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.6209677419354839
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6532258064516129
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.6935483870967742
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6935483870967742
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6774193548387096
Acuracia do modelo DecisionTreeClassifier() do Fol

{'Naive Bayes': 0.7465750293848765,
 'KNN': 0.6747649209873319,
 'Arvore de Decisão': 0.6795807757607417,
 'Floresta Aleatoria': 0.7796656654042053,
 'HistGradientBoosting': 0.7917493796526054,
 'LIGHTGBM': 0.7949719211179312,
 'XGB': 0.7715652344260155,
 'MLP': 0.6868486352357321,
 'SVC': 0.6933035131252449}

- Correlação Negativa

In [55]:
corr_negat = df_corr.query('correlacao > -1.0 and correlacao < -0.5')
corr_negat

Unnamed: 0,features_a,features_b,correlacao
474,mfcc1,spectral_band2,-0.841483
475,mfcc1,spectral_band3,-0.805492
476,mfcc1,spectral_band4,-0.753147
477,mfcc1,spectral_centroid,-0.800156
479,mfcc1,spectral_rolloff,-0.781028
482,mfcc1,zero_crossing_rate,-0.544639
483,mfcc1,zero_crossing,-0.544811
1506,spectral_band2,mfcc1,-0.841483
1550,spectral_band3,mfcc1,-0.805492
1594,spectral_band4,mfcc1,-0.753147


In [56]:
len(corr_negat.features_b.unique()), corr_negat.features_b.unique()

(8,
 array(['spectral_band2', 'spectral_band3', 'spectral_band4',
        'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate',
        'zero_crossing', 'mfcc1'], dtype=object))

In [57]:
X =  df[corr_negat.features_b.unique()]
y = df['labels'].to_frame()

models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t3_acuracy_models_corr_neg = [cross_validate_balancead(k=5, model=model, X=X, y=y) for model in models]

dict_results_t3_corr_neg = {
    'Naive Bayes': t3_acuracy_models_corr_neg[0],
    'KNN': t3_acuracy_models_corr_neg[1],
    'Arvore de Decisão': t3_acuracy_models_corr_neg[2],
    'Floresta Aleatoria': t3_acuracy_models_corr_neg[3],
    'HistGradientBoosting': t3_acuracy_models_corr_neg[4],
    'LIGHTGBM': t3_acuracy_models_corr_neg[5],
    'XGB': t3_acuracy_models_corr_neg[6],
    'MLP': t3_acuracy_models_corr_neg[7],
    'SVC': t3_acuracy_models_corr_neg[8],
}

dict_results_t3_corr_neg

Acuracia do modelo GaussianNB() do Fold 0: 0.6451612903225806
Acuracia do modelo GaussianNB() do Fold 1: 0.6451612903225806
Acuracia do modelo GaussianNB() do Fold 2: 0.7137096774193549
Acuracia do modelo GaussianNB() do Fold 3: 0.6048387096774194
Acuracia do modelo GaussianNB() do Fold 4: 0.6882591093117408
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6693548387096774
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6854838709677419
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6814516129032258
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.6008064516129032
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6693548387096774
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.6370967741935484
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6330645161290323
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6290322580645161
Acuracia do modelo DecisionTreeClas

{'Naive Bayes': 0.6594260154107353,
 'KNN': 0.6691197596970092,
 'Arvore de Decisão': 0.6618878150711767,
 'Floresta Aleatoria': 0.7045971006921771,
 'HistGradientBoosting': 0.7134582734752514,
 'LIGHTGBM': 0.7078261721300769,
 'XGB': 0.7053937573462192,
 'MLP': 0.5996865613164424,
 'SVC': 0.6916873449131513}

- Se combinarmos as duas abordagens ?

In [58]:
features = np.append(corr_posit.features_b.unique(), corr_negat.features_b.unique())
features

array(['spectral_band2', 'spectral_band3', 'spectral_centroid',
       'spectral_onset', 'spectral_rolloff', 'mfcc6', 'mfcc5', 'mfcc7',
       'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc13', 'mfcc12', 'mfcc14',
       'mfcc15', 'mfcc16', 'mfcc17', 'mfcc0', 'spectral_band4',
       'zero_crossing_rate', 'zero_crossing', 'mel_spectogram',
       'spectogram', 'spectral_band2', 'spectral_band3', 'spectral_band4',
       'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate',
       'zero_crossing', 'mfcc1'], dtype=object)

In [60]:
features = np.append(corr_posit.features_b.unique(), corr_negat.features_b.unique())
features = np.unique(features)

X = df[features]
y = df['labels'].to_frame()

models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t3_combine_corr = [cross_validate_balancead(k=5, model=model, X=X, y=y) for model in models]

dict_results_t3_cb = {
    'Naive Bayes': t3_combine_corr[0],
    'KNN': t3_combine_corr[1],
    'Arvore de Decisão': t3_combine_corr[2],
    'Floresta Aleatoria': t3_combine_corr[3],
    'HistGradientBoosting': t3_combine_corr[4],
    'LIGHTGBM': t3_combine_corr[5],
    'XGB': t3_combine_corr[6],
    'MLP': t3_combine_corr[7],
    'SVC': t3_combine_corr[8],
}

dict_results_t3_cb

Acuracia do modelo GaussianNB() do Fold 0: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 1: 0.7379032258064516
Acuracia do modelo GaussianNB() do Fold 2: 0.7701612903225806
Acuracia do modelo GaussianNB() do Fold 3: 0.717741935483871
Acuracia do modelo GaussianNB() do Fold 4: 0.7530364372469636
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.657258064516129
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6935483870967742
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6935483870967742
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.6209677419354839
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6854838709677419
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7016129032258065
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.7016129032258065
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6653225806451613
Acuracia do modelo DecisionTreeClassi

{'Naive Bayes': 0.7441556745461669,
 'KNN': 0.6747649209873319,
 'Arvore de Decisão': 0.6900777066736319,
 'Floresta Aleatoria': 0.7917559096251796,
 'HistGradientBoosting': 0.7933492229332637,
 'LIGHTGBM': 0.7941687344913151,
 'XGB': 0.7885072482695572,
 'MLP': 0.6981389578163772,
 'SVC': 0.6941099647381481}

- Correlação de -0.5 a 0.5

In [61]:
corr_prox_zero = df_corr.query('correlacao > -0.5 and correlacao < 0.5')
corr_prox_zero

Unnamed: 0,features_a,features_b,correlacao
1,tonnetz0,tonnetz1,0.075220
2,tonnetz0,chroma1,0.026262
3,tonnetz0,chroma2,-0.111607
4,tonnetz0,chroma3,0.300317
5,tonnetz0,chroma4,-0.261591
...,...,...,...
1927,zero_crossing,spectral_band3,0.421096
1928,zero_crossing,spectral_band4,0.331765
1930,zero_crossing,spectral_onset,0.371884
1932,zero_crossing,spectogram,0.001365


In [62]:
len(corr_prox_zero.features_b.unique())

44

In [63]:
X =  df[corr_prox_zero.features_b.unique()]
y = df['labels'].to_frame()

models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t3_acuracy_models_corr_prox_zero = [cross_validate_balancead(k=5, model=model, X=X, y=y) for model in models]

dict_results_t3_corr = {
    'Naive Bayes': t3_acuracy_models_corr_prox_zero[0],
    'KNN': t3_acuracy_models_corr_prox_zero[1],
    'Arvore de Decisão': t3_acuracy_models_corr_prox_zero[2],
    'Floresta Aleatoria': t3_acuracy_models_corr_prox_zero[3],
    'HistGradientBoosting': t3_acuracy_models_corr_prox_zero[4],
    'LIGHTGBM': t3_acuracy_models_corr_prox_zero[5],
    'XGB': t3_acuracy_models_corr_prox_zero[6],
    'MLP': t3_acuracy_models_corr_prox_zero[7],
    'SVC': t3_acuracy_models_corr_prox_zero[8],
}

dict_results_t3_corr

Acuracia do modelo GaussianNB() do Fold 0: 0.7379032258064516
Acuracia do modelo GaussianNB() do Fold 1: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 2: 0.7983870967741935
Acuracia do modelo GaussianNB() do Fold 3: 0.7258064516129032
Acuracia do modelo GaussianNB() do Fold 4: 0.7732793522267206
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6532258064516129
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6935483870967742
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6975806451612904
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.6209677419354839
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6532258064516129
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7096774193548387
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6895161290322581
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6532258064516129
Acuracia do modelo DecisionTreeClas

{'Naive Bayes': 0.7554623220582474,
 'KNN': 0.6747649209873319,
 'Arvore de Decisão': 0.6795905707196029,
 'Floresta Aleatoria': 0.8054982369074051,
 'HistGradientBoosting': 0.8151658613033825,
 'LIGHTGBM': 0.8038755387227374,
 'XGB': 0.7990335640590309,
 'MLP': 0.7473978059292151,
 'SVC': 0.6908841582865352}

# TESTE4: Recursive Feature Elimination - (RFE)

- RFE

In [24]:
X = df.drop(columns=['instrumento','file_name','labels'])
y = df['labels'].to_frame()

clf = DecisionTreeClassifier(max_depth=17,
                             max_features='auto', 
                             min_samples_leaf=2,
                             min_samples_split=3)
                               
                               
estimators = RFECV(estimator=clf, scoring='accuracy', cv=5)

# fit rfecv
rfecv_data = estimators.fit(X, y)

# gorresponde à posição do ranking do i-ésimo recurso
ranking_features = rfecv_data.ranking_

# A máscara dos recursos selecionados.
support_features = rfecv_data.support_

# filtro
features_selects = X.columns[support_features]

In [25]:
features_selects

Index(['mfcc0', 'mfcc3', 'mfcc4', 'mfcc7', 'mfcc13', 'spectral_band4',
       'mel_spectogram'],
      dtype='object')

In [26]:
ranking_features.shape

(44,)

- Count Ranking Features

In [27]:
# Couts da features import 
unique, frequency = np.unique(ranking_features, return_counts=True)
summary = np.asarray((unique, frequency)).T
summary

array([[ 1,  7],
       [ 2,  1],
       [ 3,  1],
       [ 4,  1],
       [ 5,  1],
       [ 6,  1],
       [ 7,  1],
       [ 8,  1],
       [ 9,  1],
       [10,  1],
       [11,  1],
       [12,  1],
       [13,  1],
       [14,  1],
       [15,  1],
       [16,  1],
       [17,  1],
       [18,  1],
       [19,  1],
       [20,  1],
       [21,  1],
       [22,  1],
       [23,  1],
       [24,  1],
       [25,  1],
       [26,  1],
       [27,  1],
       [28,  1],
       [29,  1],
       [30,  1],
       [31,  1],
       [32,  1],
       [33,  1],
       [34,  1],
       [35,  1],
       [36,  1],
       [37,  1],
       [38,  1]])

- Treinamento de Modelo

In [28]:
models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t4_acuracy_models = [cross_validate_balancead(k=5, 
                                              model=model,
                                              X=X, 
                                              y=y, 
                                              oversampling=True) for model in models]

dict_results_t4 = {
    'Naive Bayes': t4_acuracy_models[0],
    'KNN': t4_acuracy_models[1],
    'Arvore de Decisão': t4_acuracy_models[2],
    'Floresta Aleatoria': t4_acuracy_models[3],
    'HistGradientBoosting': t4_acuracy_models[4],
    'LIGHTGBM': t4_acuracy_models[5],
    'XGB': t4_acuracy_models[6],
    'MLP': t4_acuracy_models[7],
    'SVC': t4_acuracy_models[8],
}

dict_results_t4

Acuracia do modelo GaussianNB() do Fold 0: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 1: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 2: 0.8024193548387096
Acuracia do modelo GaussianNB() do Fold 3: 0.7258064516129032
Acuracia do modelo GaussianNB() do Fold 4: 0.7732793522267206
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6491935483870968
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6895161290322581
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6975806451612904
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.625
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6411290322580645
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7258064516129032
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6612903225806451
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6935483870967742
Acuracia do modelo DecisionTreeClassifier() do F

{'Naive Bayes': 0.7570752252840538,
 'KNN': 0.6739584693744287,
 'Arvore de Decisão': 0.6820066605720256,
 'Floresta Aleatoria': 0.8030560271646859,
 'HistGradientBoosting': 0.7998367506856472,
 'LIGHTGBM': 0.8143659396630534,
 'XGB': 0.7974239258195116,
 'MLP': 0.6191066997518611,
 'SVC': 0.6941230246832963}

# Teste 5: SequentialFeatureSelector

### Forward

In [29]:
def selectorSequential(k, model_estimator, n_features, X, y):
    sfs = SequentialFeatureSelector(
        cv=k, 
        # direction = 'forward',
        n_features_to_select = n_features,
        estimator=model_estimator
    ).fit(X, y)
    
    mask_feature = sfs.get_support()
    
    return X[X.columns[mask_feature]]

In [30]:
_X = selectorSequential(5, DecisionTreeClassifier(), 30, X, y)

In [31]:
_X

Unnamed: 0,chroma1,chroma2,chroma3,chroma6,chroma7,mfcc0,mfcc1,mfcc2,mfcc3,mfcc5,...,mfcc19,chroma8,chroma9,chroma10,chroma12,spectral_band3,spectral_band4,spectral_centroid,zero_crossing_rate,zero_crossing
4749,0.255362,0.456872,0.204396,0.279743,0.301376,-132.641800,150.560562,-27.407114,23.959936,5.121245,...,-0.543738,0.225076,0.512915,0.591787,0.239542,2271.494439,2881.275834,1373.688478,0.066916,14792
4750,0.323944,0.574369,0.257362,0.175389,0.205804,-274.705200,129.994186,-6.221944,17.190561,5.065033,...,3.570923,0.428047,0.373425,0.695905,0.267779,2670.194362,3316.965188,1659.804275,0.076960,17026
4751,0.397090,0.881431,0.322897,0.077247,0.133319,-192.369568,163.741394,-28.770174,45.025253,-7.566900,...,-1.958624,0.192063,0.214159,0.524587,0.208911,1849.038419,2427.920908,1176.728816,0.077022,17024
4752,0.192645,0.382144,0.240159,0.073915,0.045623,-215.710052,176.578156,-22.725855,25.334278,3.577324,...,-2.461581,0.133746,0.737449,0.559517,0.211628,1954.477439,2657.412445,907.139528,0.040779,9017
4753,0.054567,0.177930,0.132602,0.581587,0.183638,-229.021957,180.878830,-45.049942,-9.942696,11.785124,...,-2.376422,0.116503,0.297538,0.281471,0.113429,1348.504100,1843.822364,1116.046282,0.101791,22509
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5983,0.181270,0.482163,0.806090,0.128068,0.223802,-208.045074,218.784363,-53.445499,23.011065,5.633037,...,0.061071,0.209973,0.192073,0.359051,0.155280,1277.163672,1710.803257,821.680671,0.038042,8414
5984,0.309043,0.115449,0.066983,0.246675,0.126754,-359.998535,182.519424,24.680046,-24.597450,-12.057526,...,3.444711,0.375308,0.116710,0.176107,0.169490,1681.556710,2431.230598,704.896231,0.042257,9343
5985,0.019285,0.019074,0.026441,0.762290,0.256367,-342.691559,155.418442,-8.510861,-32.044605,-18.061348,...,18.423162,0.474692,0.117034,0.131735,0.016938,1672.357525,2406.899185,647.666659,0.026027,5747
5986,0.182197,0.123432,0.274012,0.259500,0.083546,-192.736328,203.023346,-75.077393,-23.789988,-20.502050,...,-9.698064,0.123066,0.094510,0.268173,0.147167,1112.070824,1570.408078,1094.419047,0.081838,18093


In [32]:
models = np.array([
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    LGBMClassifier(),
    XGBClassifier(),
    MLPClassifier(),
    SVC(),
])

t5_acuracy_models = [cross_validate_balancead(k=5, 
                                              model=model,
                                              X=_X, 
                                              y=y, 
                                              oversampling=True) for model in models]

dict_results_t5 = {
    'Naive Bayes':t5_acuracy_models[0],
    'KNN':t5_acuracy_models[1],
    'Arvore de Decisão': t5_acuracy_models[2],
    'Floresta Aleatoria':t5_acuracy_models[3],
    'HistGradientBoosting':t5_acuracy_models[4],
    'LIGHTGBM': t5_acuracy_models[5],
    'XGB': t5_acuracy_models[6],
    'MLP': t5_acuracy_models[7],
    'SVC': t5_acuracy_models[8],
}

dict_results_t5

Acuracia do modelo GaussianNB() do Fold 0: 0.7620967741935484
Acuracia do modelo GaussianNB() do Fold 1: 0.7620967741935484
Acuracia do modelo GaussianNB() do Fold 2: 0.7862903225806451
Acuracia do modelo GaussianNB() do Fold 3: 0.7540322580645161
Acuracia do modelo GaussianNB() do Fold 4: 0.7732793522267206
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6491935483870968
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6733870967741935
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.7258064516129032
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.6088709677419355
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.6923076923076923
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.7137096774193549
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7056451612903226
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.7943548387096774
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6975806451612904
Acuracia do modelo DecisionTreeCla

{'Naive Bayes': 0.7675590962517957,
 'KNN': 0.6699131513647643,
 'Arvore de Decisão': 0.7271973357711898,
 'Floresta Aleatoria': 0.7958077576074182,
 'HistGradientBoosting': 0.7974206608332246,
 'LIGHTGBM': 0.8079012668146793,
 'XGB': 0.795801227634844,
 'MLP': 0.6682186234817814,
 'SVC': 0.6916938748857254}

# Teste 6: Combinação

- Variaveis de  0 a 15 - combinações de 10

In [6]:
train_feature_combination(
    model = GaussianNB(),
    df = df,
    list_features = np.arange(0, 15),
    size_comb = 10
)

Teste 1 -> features Selecionada para o treino: Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
       'chroma5', 'chroma6', 'chroma7', 'mfcc0'],
      dtype='object')
Acuracia do modelo GaussianNB() do Fold 0: 0.7338709677419355
Acuracia do modelo GaussianNB() do Fold 1: 0.6451612903225806
Acuracia do modelo GaussianNB() do Fold 2: 0.7338709677419355
Acuracia do modelo GaussianNB() do Fold 3: 0.6935483870967742
Acuracia do modelo GaussianNB() do Fold 4: 0.6963562753036437
Accuracy 0.700561577641374 do teste -> 1
Teste 2 -> features Selecionada para o treino: Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
       'chroma5', 'chroma6', 'chroma7', 'mfcc1'],
      dtype='object')
Acuracia do modelo GaussianNB() do Fold 0: 0.7056451612903226
Acuracia do modelo GaussianNB() do Fold 1: 0.6129032258064516
Acuracia do modelo GaussianNB() do Fold 2: 0.7137096774193549
Acuracia do modelo GaussianNB() do Fold 3: 0.6814516129032258
Acuracia do m

defaultdict(list,
            {'features': [Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
                     'chroma5', 'chroma6', 'chroma7', 'mfcc0'],
                    dtype='object'),
              Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
                     'chroma5', 'chroma6', 'mfcc0', 'mfcc1'],
                    dtype='object'),
              Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
                     'chroma5', 'chroma6', 'mfcc0', 'mfcc3'],
                    dtype='object'),
              Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
                     'chroma5', 'chroma6', 'mfcc0', 'mfcc4'],
                    dtype='object'),
              Index(['tonnetz0', 'tonnetz1', 'chroma1', 'chroma2', 'chroma3', 'chroma4',
                     'chroma5', 'chroma6', 'mfcc0', 'mfcc5'],
                    dtype='object'),
              Index(['tonnet

- Variaveis de  15 a 30 - combinações de 10

In [7]:
train_feature_combination(
    model = GaussianNB(),
    df = df,
    list_features = np.arange(15, 30),
    size_comb = 10
)

Teste 1 -> features Selecionada para o treino: Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12',
       'mfcc13', 'mfcc14', 'mfcc15'],
      dtype='object')
Acuracia do modelo GaussianNB() do Fold 0: 0.6814516129032258
Acuracia do modelo GaussianNB() do Fold 1: 0.6653225806451613
Acuracia do modelo GaussianNB() do Fold 2: 0.7459677419354839
Acuracia do modelo GaussianNB() do Fold 3: 0.6733870967741935
Acuracia do modelo GaussianNB() do Fold 4: 0.680161943319838
Accuracy 0.6892581951155805 do teste -> 1
Teste 2 -> features Selecionada para o treino: Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12',
       'mfcc13', 'mfcc14', 'mfcc16'],
      dtype='object')
Acuracia do modelo GaussianNB() do Fold 0: 0.6733870967741935
Acuracia do modelo GaussianNB() do Fold 1: 0.6733870967741935
Acuracia do modelo GaussianNB() do Fold 2: 0.7217741935483871
Acuracia do modelo GaussianNB() do Fold 3: 0.6532258064516129
Acuracia do modelo GaussianNB() do Fold 4

defaultdict(list,
            {'features': [Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12',
                     'mfcc15', 'mfcc16', 'mfcc17'],
                    dtype='object'),
              Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12',
                     'mfcc15', 'mfcc16', 'mfcc18'],
                    dtype='object'),
              Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc13',
                     'mfcc15', 'mfcc17', 'mfcc19'],
                    dtype='object'),
              Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc13',
                     'mfcc15', 'mfcc18', 'mfcc19'],
                    dtype='object'),
              Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc13',
                     'mfcc15', 'mfcc19', 'chroma8'],
                    dtype='object'),
              Index(['mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc13',
  

- Variaveis de 30 a 44 - combinações de 10

In [8]:
train_feature_combination(
    model = GaussianNB(),
    df = df,
    list_features = np.arange(30, 44),
    size_comb = 10
)

Teste 1 -> features Selecionada para o treino: Index(['chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_band2',
       'spectral_band3', 'spectral_band4', 'spectral_centroid',
       'spectral_onset', 'spectral_rolloff'],
      dtype='object')
Acuracia do modelo GaussianNB() do Fold 0: 0.6693548387096774
Acuracia do modelo GaussianNB() do Fold 1: 0.6451612903225806
Acuracia do modelo GaussianNB() do Fold 2: 0.7096774193548387
Acuracia do modelo GaussianNB() do Fold 3: 0.6653225806451613
Acuracia do modelo GaussianNB() do Fold 4: 0.6882591093117408
Accuracy 0.6755550476687998 do teste -> 1
Teste 2 -> features Selecionada para o treino: Index(['chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_band2',
       'spectral_band3', 'spectral_band4', 'spectral_centroid',
       'spectral_onset', 'spectogram'],
      dtype='object')
Acuracia do modelo GaussianNB() do Fold 0: 0.6653225806451613
Acuracia do modelo GaussianNB() do Fold 1: 0.6935483870967742
Acuracia do modelo GaussianN

defaultdict(list,
            {'features': [Index(['chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_band2',
                     'spectral_band3', 'spectral_band4', 'spectral_centroid',
                     'spectral_onset', 'mel_spectogram'],
                    dtype='object'),
              Index(['chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_band2',
                     'spectral_band3', 'spectral_band4', 'spectral_centroid',
                     'spectral_rolloff', 'mel_spectogram'],
                    dtype='object'),
              Index(['chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_band2',
                     'spectral_band3', 'spectral_band4', 'spectral_centroid', 'spectogram',
                     'mel_spectogram'],
                    dtype='object'),
              Index(['chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_band2',
                     'spectral_band3', 'spectral_band4', 'spectral_centroid',
                     'mel_spect