In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import RFECV, SelectFromModel

# models
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    HistGradientBoostingClassifier
)

# scripts 
from src.wrapped import Wrapped
from src.analysesV02 import Analytics
from src.trainV02 import TrainModels

In [2]:
ac = Analytics()
tm = TrainModels()
wp = Wrapped(
    '../data/row/',
    '../data/processed/',
    '../data/files/'
)

# Importando dados para treinamento

In [3]:
# dataframe
df = wp.load_data('df_instrumentos_features_selecionadas').drop(columns=['file_name'])

# dataframes por instrumentos
inst_corda = ["cello", "guitar", "violin", "bass", "banjo", "mandolin", "ukulele"]
inst_percusao = ["mallet_percussion", "drums", "cymbals"]
inst_sopro = ["clarinet", "trombone", "flute", "trumpet", "saxophone"]
inst_aerofone = ["accordion", "organ", "piano"] 

df_inst_aerofone = df[df['instrumento'].isin(inst_aerofone)]
df_inst_sopro = df[df['instrumento'].isin(inst_sopro)]
df_inst_corda = df[df['instrumento'].isin(inst_corda)]
df_inst_percusao = df[df['instrumento'].isin(inst_percusao)]

# Pre-processamento

- Label Encoder

In [5]:
# le = LabelEncoder()

# df_inst_aerofone['labels'] = le.fit_transform(df_inst_aerofone.instrumento)
# df_inst_sopro['labels'] = le.fit_transform(df_inst_sopro.instrumento)
# df_inst_corda['labels'] = le.fit_transform(df_inst_corda.instrumento)
# df_inst_percusao['labels'] = le.fit_transform(df_inst_percusao.instrumento)

# Baseline

In [4]:
list_models = np.array([
    LGBMClassifier(),
    XGBClassifier(),
    GaussianNB(),
    KNeighborsClassifier(), 
    DecisionTreeClassifier(), 
    RandomForestClassifier(), 
    HistGradientBoostingClassifier(),
    MLPClassifier()
])

- corda

In [10]:
tm.train_models(
    X = df_inst_corda.drop(columns=['labels', 'instrumento']),  
    y = df_inst_corda['labels'],
    models=list_models
)

Acuracia do modelo LGBMClassifier() do Fold 0: 0.5118829981718465
Acuracia do modelo LGBMClassifier() do Fold 1: 0.5118829981718465
Acuracia do modelo LGBMClassifier() do Fold 2: 0.5439560439560439
Acuracia do modelo LGBMClassifier() do Fold 3: 0.5421245421245421
Acuracia do modelo LGBMClassifier() do Fold 4: 0.5311355311355311
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estima

{'LGBMClassifier': 0.528196422711962,
 'XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              predictor=None, random_state=None, ..': 0.5292929800242414,
 'GaussianNB': 0.49586489074606077,
 'KNeighborsClassifier': 0.44766927161808334,
 'DecisionTreeClassifier': 0.42973287529046217,
 'RandomForestClassifier': 0.4449220188708306,
 

-  percusão

In [13]:
tm.train_models(
    X=df_inst_percusao.drop(columns=['labels', 'instrumento']), 
    y=df_inst_percusao['labels'],
    models=list_models
)

Acuracia do modelo LGBMClassifier() do Fold 0: 0.7435897435897436
Acuracia do modelo LGBMClassifier() do Fold 1: 0.7275641025641025
Acuracia do modelo LGBMClassifier() do Fold 2: 0.7564102564102564
Acuracia do modelo LGBMClassifier() do Fold 3: 0.8038585209003215
Acuracia do modelo LGBMClassifier() do Fold 4: 0.7717041800643086
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estima

{'LGBMClassifier': 0.47662697537651244,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5046399312315564,
 'GaussianNB': 0.5202079708218131,
 'KNeighborsClassifier': 0.5222158824516687,
 'DecisionTreeClassifier': 0.528506232735754,
 'RandomForestClassifier': 0.543206083760

- Sopro

In [16]:
tm.train_models(
    X=df_inst_sopro.drop(columns=['labels', 'instrumento']), 
    y=df_inst_sopro['labels'],
    models=list_models
)

Acuracia do modelo LGBMClassifier() do Fold 0: 0.7111801242236024
Acuracia do modelo LGBMClassifier() do Fold 1: 0.7142857142857143
Acuracia do modelo LGBMClassifier() do Fold 2: 0.7204968944099379
Acuracia do modelo LGBMClassifier() do Fold 3: 0.6770186335403726
Acuracia do modelo LGBMClassifier() do Fold 4: 0.7476635514018691
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estima

{'LGBMClassifier': 0.5663007362074556,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5736501077281118,
 'GaussianNB': 0.5752534545577045,
 'KNeighborsClassifier': 0.5712590719170855,
 'DecisionTreeClassifier': 0.5693893232522017,
 'RandomForestClassifier': 0.575799002899

- Aerofone

In [17]:
tm.train_models(
    X=df_inst_aerofone.drop(columns=['labels', 'instrumento']), 
    y=df_inst_aerofone['labels'],
    models=list_models
)

Acuracia do modelo LGBMClassifier() do Fold 0: 0.7028571428571428
Acuracia do modelo LGBMClassifier() do Fold 1: 0.74
Acuracia do modelo LGBMClassifier() do Fold 2: 0.7257142857142858
Acuracia do modelo LGBMClassifier() do Fold 3: 0.7142857142857143
Acuracia do modelo LGBMClassifier() do Fold 4: 0.7742857142857142
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jo

{'LGBMClassifier': 0.5845367248427371,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5903182793817529,
 'GaussianNB': 0.5933012002512118,
 'KNeighborsClassifier': 0.5931322798340767,
 'DecisionTreeClassifier': 0.5938617086082711,
 'RandomForestClassifier': 0.597875842130

# Remover Outilers 

- Instrumento de corda

In [18]:
outilers_inst_corda  = ac.detected_outilers_knn(
    dataframe=df_inst_corda,
    algorithm='auto', 
    neighbors=5, 
    metric='euclidean'
)

df_inst_corda = ac.show_outilers(
    dataframe=df_inst_corda,
    pred=outilers_inst_corda['predictions']
)

df_inst_corda

Unnamed: 0,tonnetz0,tonnetz1,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,mfcc0,...,spectral_band4,spectral_centroid,spectral_onset,spectral_rolloff,spectogram,mel_spectogram,zero_crossing_rate,zero_crossing,instrumento,labels
524,0.044009,0.049910,0.422762,0.372759,0.418751,0.342388,0.345684,0.425205,0.419690,-113.403656,...,3451.164410,2589.236162,1.418300,5736.500270,-55.941154,-70.428162,0.124409,27503,banjo,0
546,-0.019221,0.017872,0.412917,0.454047,0.530891,0.557669,0.349409,0.344261,0.382070,-202.772339,...,3570.616525,3004.965818,1.600475,5830.002334,-78.944046,-80.000000,0.175861,38878,banjo,0
548,0.104703,-0.251793,0.163021,0.812954,0.344378,0.115020,0.292584,0.153532,0.169621,-140.371948,...,2368.569054,1704.019101,1.065461,2712.683983,-69.137093,-79.591011,0.129967,28745,banjo,0
555,-0.035994,0.023556,0.502060,0.572897,0.544631,0.463271,0.181727,0.342575,0.116119,-176.971466,...,2538.017416,2065.352164,1.527929,3548.731828,-65.745941,-79.547966,0.154887,34292,banjo,0
570,0.128059,0.082795,0.264848,0.264230,0.504677,0.353699,0.363608,0.434325,0.461813,-43.627769,...,3736.268165,3178.869223,1.445494,6845.210425,-47.192692,-59.145737,0.158472,35001,banjo,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7999,0.188614,-0.123051,0.435805,0.078954,0.048469,0.095815,0.166382,0.088318,0.250419,-390.158630,...,2069.443752,1772.864514,1.268380,2945.977004,-76.595161,-80.000000,0.126178,27872,violin,6
8016,0.066705,-0.239727,0.177657,0.249958,0.202065,0.227559,0.358576,0.191313,0.535777,-163.197540,...,3258.026601,2049.088272,1.239214,3472.241494,-45.379463,-80.000000,0.128462,28414,violin,6
8017,0.096697,-0.279816,0.159033,0.078111,0.079078,0.180735,0.292732,0.125269,0.363708,-115.140617,...,3348.078759,2277.244488,1.186836,3779.127109,-50.537724,-80.000000,0.143759,31806,violin,6
8020,0.015595,-0.058762,0.120028,0.238263,0.234643,0.267743,0.228591,0.260930,0.267262,-324.705627,...,3383.563150,3573.542978,1.354204,6552.813508,-43.596504,-73.498703,0.230540,50962,violin,6


- Instrumento percusão

In [19]:
outilers_inst_percusao  = ac.detected_outilers_knn(
    dataframe=df_inst_percusao,
    algorithm='auto', 
    neighbors=5, 
    metric='euclidean'
)

df_inst_percusao = ac.show_outilers(
    dataframe=df_inst_percusao,
    pred=outilers_inst_percusao['predictions']
)

df_inst_percusao

Unnamed: 0,tonnetz0,tonnetz1,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,mfcc0,...,spectral_band4,spectral_centroid,spectral_onset,spectral_rolloff,spectogram,mel_spectogram,zero_crossing_rate,zero_crossing,instrumento,labels
2028,0.064332,-0.023445,0.221986,0.318158,0.420399,0.800723,0.699277,0.292424,0.310495,-92.665298,...,2783.919958,2549.517812,1.755609,4183.087002,-47.144733,-65.961197,0.182486,40358,cymbals,0
2066,0.067263,-0.079658,0.400289,0.414243,0.519233,0.403937,0.590723,0.326989,0.475230,-97.677353,...,3505.349304,3813.025950,1.273033,7134.060110,-47.323631,-62.697948,0.257473,56977,cymbals,0
2080,-0.028268,0.020594,0.308341,0.539134,0.486185,0.370537,0.365475,0.277279,0.237507,-201.765625,...,3470.966019,5631.779972,1.150056,8727.017531,-66.101967,-79.368797,0.486816,107670,cymbals,0
2087,0.000423,0.008728,0.315313,0.329236,0.318093,0.291647,0.358462,0.355331,0.340839,-375.171051,...,3459.666432,4034.598255,2.311115,7370.300831,-48.193481,-79.993744,0.275738,60975,cymbals,0
2090,-0.027502,-0.040906,0.428343,0.575150,0.432707,0.349952,0.269307,0.282274,0.353393,-208.308487,...,3599.667922,4320.618296,1.748420,7560.852192,-42.228710,-69.745705,0.315267,69716,cymbals,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4548,-0.336018,-0.463490,0.187934,0.705260,0.531249,0.322856,0.199759,0.178082,0.259735,-298.939850,...,3395.363828,1251.895828,0.913958,2349.167543,-39.887836,-76.786873,0.104538,23109,mallet_percussion,2
4549,0.002682,-0.031066,0.318778,0.249719,0.236765,0.188140,0.255103,0.378614,0.406869,-389.299408,...,3314.920715,3257.742889,1.764614,5858.005490,-69.019249,-74.674164,0.244661,54124,mallet_percussion,2
4573,0.424326,-0.240209,0.127338,0.211656,0.438093,0.604280,0.843284,0.976121,0.586622,-423.657928,...,1068.042998,283.995303,0.419410,510.951435,-62.118206,-73.764893,0.012129,2688,mallet_percussion,2
4577,0.348110,0.159918,0.278293,0.092337,0.303201,0.065521,0.185706,0.234855,0.103411,-222.538467,...,2891.545646,2264.087534,1.359796,3551.354782,-21.233885,-59.256969,0.166638,36868,mallet_percussion,2


- Instrumento de Sopro

In [21]:
outilers_inst_sopro  = ac.detected_outilers_knn(
    dataframe=df_inst_sopro,
    algorithm='auto', 
    neighbors=5, 
    metric='euclidean'
)

df_inst_sopro = ac.show_outilers(
    dataframe=df_inst_sopro,
    pred=outilers_inst_sopro['predictions']
)

df_inst_sopro

Unnamed: 0,tonnetz0,tonnetz1,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,mfcc0,...,spectral_band4,spectral_centroid,spectral_onset,spectral_rolloff,spectogram,mel_spectogram,zero_crossing_rate,zero_crossing,instrumento,labels
1876,-0.044667,0.260524,0.692127,0.238955,0.029413,0.424056,0.732275,0.106237,0.264656,-178.568253,...,2269.854161,1872.058375,0.862944,2738.513835,-70.011116,-80.000000,0.139929,30932,clarinet,0
1878,0.089747,0.253008,0.299878,0.225017,0.544357,0.164941,0.079692,0.119684,0.194826,-211.757339,...,2431.808142,1999.856294,1.274141,3282.789275,-58.643265,-71.021973,0.176906,39102,clarinet,0
1886,0.002779,0.015584,0.311722,0.319367,0.339069,0.368589,0.345589,0.443560,0.344975,-85.485229,...,2937.446277,3154.871456,1.945904,5838.245903,-44.260166,-62.325306,0.232276,51374,clarinet,0
1888,0.108679,0.066236,0.484411,0.352953,0.433525,0.316416,0.265250,0.316138,0.292531,-121.484428,...,3654.260611,1992.119222,1.766695,4726.313261,-42.178364,-41.680836,0.041709,9216,clarinet,0
1897,0.040330,0.013987,0.226904,0.302980,0.674932,0.560282,0.423103,0.323710,0.402272,-124.647331,...,2093.215040,1949.869060,1.131823,3117.917883,-63.385639,-78.626289,0.151718,33537,clarinet,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7151,-0.215709,0.074658,0.095596,0.119745,0.361354,0.896803,0.408373,0.369947,0.177898,-387.084808,...,3460.288056,3394.058894,1.074963,6651.736344,-53.667458,-76.665230,0.194558,43089,trumpet,4
7153,0.095131,0.165130,0.409808,0.178644,0.131329,0.086917,0.195207,0.238547,0.105908,-196.057907,...,1670.854795,1466.013189,1.140587,2337.326779,-75.481300,-79.999992,0.101459,22454,trumpet,4
7156,-0.148693,0.003055,0.543909,0.820975,0.261659,0.262984,0.201507,0.296120,0.160154,-182.133682,...,2812.377753,2612.295812,0.958099,4009.572352,-64.094101,-80.000000,0.170468,37702,trumpet,4
7159,-0.040215,0.031151,0.587902,0.642199,0.661011,0.675489,0.697853,0.718583,0.482416,-276.234711,...,1626.501203,285.508118,0.854189,403.660127,-16.440603,-14.334520,0.014532,3213,trumpet,4


- Instrumento Aerofones

In [22]:
outilers_inst_aerofone  = ac.detected_outilers_knn(
    dataframe=df_inst_aerofone,
    algorithm='auto', 
    neighbors=5, 
    metric='euclidean'
)

df_inst_aerofone = ac.show_outilers(
    dataframe=df_inst_aerofone,
    pred=outilers_inst_aerofone['predictions']
)

df_inst_aerofone

Unnamed: 0,tonnetz0,tonnetz1,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,mfcc0,...,spectral_band4,spectral_centroid,spectral_onset,spectral_rolloff,spectogram,mel_spectogram,zero_crossing_rate,zero_crossing,instrumento,labels
3,0.157274,-0.066099,0.209729,0.338755,0.660264,0.256625,0.275209,0.180417,0.399448,-66.319801,...,3855.791675,2903.979876,1.218963,6751.133809,-42.723160,-44.649315,0.144010,31836,accordion,0
11,0.067479,-0.072520,0.319265,0.259853,0.236549,0.416537,0.552991,0.282335,0.311797,-94.087822,...,3774.787224,2669.619821,1.465200,6189.322048,-46.716263,-67.817337,0.096043,21237,accordion,0
28,0.177880,0.099179,0.446227,0.186154,0.310970,0.289374,0.480033,0.271526,0.262448,-113.129501,...,3006.696138,3178.625677,1.504104,5658.636007,-64.840324,-79.991013,0.225914,49936,accordion,0
35,-0.231209,-0.164767,0.777508,0.578240,0.235910,0.271922,0.123425,0.241857,0.367082,-22.823570,...,3031.853586,2150.842674,0.733548,4609.279553,-52.478855,-73.244209,0.110248,24359,accordion,0
46,0.001389,0.058207,0.307783,0.359920,0.321503,0.340189,0.230577,0.230887,0.267941,-335.472443,...,3364.332940,3317.306522,1.480623,6072.413242,-37.332355,-79.453758,0.249911,55267,accordion,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5762,0.320744,0.333888,0.157782,0.363536,0.682014,0.422248,0.271323,0.336627,0.666637,-345.862030,...,796.619605,357.755105,0.542614,697.306070,-28.129459,-64.974831,0.018301,4058,piano,2
5763,0.015400,0.123637,0.372522,0.159783,0.299456,0.384501,0.292673,0.341428,0.091393,-226.478699,...,1073.110842,942.045483,1.311127,1374.577767,-37.704166,-79.981483,0.079346,17538,piano,2
5771,-0.272394,-0.220975,0.386687,0.572144,0.377248,0.291276,0.279419,0.360151,0.437108,-318.811493,...,1387.380208,499.685219,0.679895,1074.961481,-52.980217,-61.614857,0.017695,3918,piano,2
5819,0.027706,0.089406,0.434948,0.125371,0.231317,0.110108,0.047079,0.052817,0.032457,-330.949127,...,1459.945022,1205.187669,1.246012,1720.058277,-59.069580,-72.654686,0.129964,28728,piano,2


# Correlação de -0.5 a 0.5

- Instrumento de corda

In [23]:
df_aux = ac.features_corr(df_inst_corda, -0.5, 0.5).drop(columns='instrumento')
print(f'Total de Features selecionadas {df_aux[df_aux.columns[:-1]].shape[1]} - base original tinha {df[df.columns[:-1]].shape[1]}')
tm.train_models(
    X=df_aux.drop(columns=['labels']), 
    y=df_aux['labels'],
    models=list_models
)

Total de Features selecionadas 44 - base original tinha 44
Acuracia do modelo LGBMClassifier() do Fold 0: 0.4727272727272727
Acuracia do modelo LGBMClassifier() do Fold 1: 0.509090909090909
Acuracia do modelo LGBMClassifier() do Fold 2: 0.41818181818181815
Acuracia do modelo LGBMClassifier() do Fold 3: 0.6
Acuracia do modelo LGBMClassifier() do Fold 4: 0.5
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints

{'LGBMClassifier': 0.5994542287769655,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5954556245041631,
 'GaussianNB': 0.5909659351845156,
 'KNeighborsClassifier': 0.5846886360122558,
 'DecisionTreeClassifier': 0.5784503500084667,
 'RandomForestClassifier': 0.577057495679

- Instrumento de Percusão

In [24]:
df_aux = ac.features_corr(df_inst_percusao, -0.5, 0.5).drop(columns='instrumento')
print(f'Total de Features selecionadas {df_aux[df_aux.columns[:-1]].shape[1]} - base original tinha {df[df.columns[:-1]].shape[1]}')
tm.train_models(
    X=df_aux.drop(columns=['labels']), 
    y=df_aux['labels'],
    models=list_models
)

Total de Features selecionadas 44 - base original tinha 44
Acuracia do modelo LGBMClassifier() do Fold 0: 0.6875
Acuracia do modelo LGBMClassifier() do Fold 1: 0.6129032258064516
Acuracia do modelo LGBMClassifier() do Fold 2: 0.6774193548387096
Acuracia do modelo LGBMClassifier() do Fold 3: 0.7419354838709677
Acuracia do modelo LGBMClassifier() do Fold 4: 0.6129032258064516
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, mo

{'LGBMClassifier': 0.5688996449772686,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5715170497435549,
 'GaussianNB': 0.5726717545132096,
 'KNeighborsClassifier': 0.5726192806203141,
 'DecisionTreeClassifier': 0.5734248736674613,
 'RandomForestClassifier': 0.576426331533

- Instrumento de Sopro

In [25]:
df_aux = ac.features_corr(df_inst_sopro, -0.5, 0.5).drop(columns='instrumento')
print(f'Total de Features selecionadas {df_aux[df_aux.columns[:-1]].shape[1]} - base original tinha {df[df.columns[:-1]].shape[1]}')
tm.train_models(
    X=df_aux.drop(columns=['labels']), 
    y=df_aux['labels'],
    models=list_models
)

Total de Features selecionadas 44 - base original tinha 44
Acuracia do modelo LGBMClassifier() do Fold 0: 0.7272727272727273
Acuracia do modelo LGBMClassifier() do Fold 1: 0.53125
Acuracia do modelo LGBMClassifier() do Fold 2: 0.59375
Acuracia do modelo LGBMClassifier() do Fold 3: 0.6875
Acuracia do modelo LGBMClassifier() do Fold 4: 0.75
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
           

{'LGBMClassifier': 0.5740531860929555,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5754850011589752,
 'GaussianNB': 0.5743094188427266,
 'KNeighborsClassifier': 0.576165647967546,
 'DecisionTreeClassifier': 0.5750180113849679,
 'RandomForestClassifier': 0.5767888619036

- Instrumento de Aerofones

In [26]:
df_aux = ac.features_corr(df_inst_aerofone, -0.5, 0.5).drop(columns='instrumento')
print(f'Total de Features selecionadas {df_aux[df_aux.columns[:-1]].shape[1]} - base original tinha {df[df.columns[:-1]].shape[1]}')
tm.train_models(
    X=df_aux.drop(columns=['labels']), 
    y=df_aux['labels'],
    models=list_models
)

Total de Features selecionadas 44 - base original tinha 44
Acuracia do modelo LGBMClassifier() do Fold 0: 0.6285714285714286
Acuracia do modelo LGBMClassifier() do Fold 1: 0.6571428571428571
Acuracia do modelo LGBMClassifier() do Fold 2: 0.7428571428571429
Acuracia do modelo LGBMClassifier() do Fold 3: 0.7142857142857143
Acuracia do modelo LGBMClassifier() do Fold 4: 0.5882352941176471
Acuracia do modelo XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, mis

{'LGBMClassifier': 0.5770623136610008,
 "XGBClassifier(base_score=None, booster=None, callbacks=None,\n              colsample_bylevel=None, colsample_bynode=None,\n              colsample_bytree=None, early_stopping_rounds=None,\n              enable_categorical=False, eval_metric=None, feature_types=None,\n              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n              interaction_constraints=None, learning_rate=None, max_bin=None,\n              max_cat_threshold=None, max_cat_to_onehot=None,\n              max_delta_step=None, max_depth=None, max_leaves=None,\n              min_child_weight=None, missing=nan, monotone_constraints=None,\n              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n              objective='multi:softprob', predictor=None, ..": 0.5789066464159041,
 'GaussianNB': 0.5799079438203346,
 'KNeighborsClassifier': 0.5796237638042813,
 'DecisionTreeClassifier': 0.5800239252738075,
 'RandomForestClassifier': 0.581511747568