In [2]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from pyod.models.knn import KNN 

from sklearn.feature_selection import( 
    RFECV,
    SequentialFeatureSelector,
    SelectFromModel
)

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import(
    RandomForestClassifier,
    HistGradientBoostingClassifier
)

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from src.wrapped import Wrapped
from src.train import cross_validate_balancead, train_feature_combination

In [3]:
wp = Wrapped(
    '../data/row/',
    '../data/processed/',
    '../data/files/'
)

# Importando dados para treinamento

In [64]:
# dataframe
df = wp.load_data('df_instrumentos_features_selecionadas').drop(columns=['file_name'])

# dataframes por instrumentos
inst_corda = ["cello", "guitar", "violin", "bass", "banjo", "mandolin", "ukulele"]
inst_percusao = ["mallet_percussion", "drums", "cymbals"]
inst_sopro = ["clarinet", "trombone", "flute", "trumpet", "saxophone"]
inst_aerofone = ["acordion", "organ", "piano"] 

df_inst_aerofone = df[df['instrumento'].isin(inst_aerofone)]
df_inst_sopro = df[df['instrumento'].isin(inst_sopro)]
df_inst_corda = df[df['instrumento'].isin(inst_corda)]
df_inst_percusao = df[df['instrumento'].isin(inst_percusao)]

# instrumento de corda 
inst_selecionados = np.concatenate((inst_percusao, inst_sopro, inst_aerofone))

new_df = df[df['instrumento'].isin(inst_selecionados)]

# Label Encoder

In [65]:
le = LabelEncoder()

df_inst_aerofone['labels'] = le.fit_transform(df_inst_aerofone.instrumento)
df_inst_sopro['labels'] = le.fit_transform(df_inst_sopro.instrumento)
df_inst_corda['labels'] = le.fit_transform(df_inst_corda.instrumento)
df_inst_percusao['labels'] = le.fit_transform(df_inst_percusao.instrumento)
new_df['labels'] = le.fit_transform(new_df.instrumento)

# Função de treinamento

In [66]:
def train_models(df_train):
    X = df_train.drop(columns=['labels', 'instrumento'])
    y = df_train['labels'].to_frame()

    models = np.array([
        GaussianNB(),
        KNeighborsClassifier(), 
        DecisionTreeClassifier(), 
        RandomForestClassifier(), 
        HistGradientBoostingClassifier(),
        LGBMClassifier(),
        MLPClassifier(),
        XGBClassifier(),
        SVC(),
    ])

    acuracy_models = [cross_validate_balancead(k=5, model=model, X=X,  y=y) for model in models]

    dict_results = {
        'Naive Bayes': acuracy_models[0],
        'KNN': acuracy_models[1],
        'Arvore de Decisão': acuracy_models[2],
        'Floresta Aleatoria': acuracy_models[3],
        'HistGradientBoosting': acuracy_models[4],
        'LIGHTGBM': acuracy_models[5],
        'MLP': acuracy_models[6],
        'XGB': acuracy_models[7],
        'SVC': acuracy_models[8],
    }

    return dict_results

# Remover Outilers 

In [75]:
def remove_outilers(df):
    detector_outilier = KNN()
    aux = df.drop(columns=['instrumento'])
    detector_outilier.fit(aux)
    
    previsions = detector_outilier.labels_
    outilers_id =[previsions[i] == 0 for i in range(previsions.shape[0])]
    df_train = df.iloc[outilers_id,:]
    return df_train

- Toda a base menos os instrumentos de corda

In [23]:
train_models(remove_outilers(new_df))

Acuracia do modelo GaussianNB() do Fold 0: 0.37831021437578816
Acuracia do modelo GaussianNB() do Fold 1: 0.41740226986128626
Acuracia do modelo GaussianNB() do Fold 2: 0.41740226986128626
Acuracia do modelo GaussianNB() do Fold 3: 0.39092055485498106
Acuracia do modelo GaussianNB() do Fold 4: 0.4110970996216898
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.2572509457755359
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.27994955863808324
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.2812105926860025
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.2408575031525851
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.2837326607818411
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.33417402269861285
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.31778058007566207
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.33039092055485497
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.31904161412358134
Acuracia do modelo Decisi

{'Naive Bayes': 0.4030264817150063,
 'KNN': 0.26860025220680955,
 'Arvore de Decisão': 0.3238335435056746,
 'Floresta Aleatoria': 0.47137452711223204,
 'HistGradientBoosting': 0.4948297604035309,
 'LIGHTGBM': 0.5016393442622951,
 'MLP': 0.30466582597730135,
 'XGB': 0.4948297604035309,
 'SVC': 0.3059268600252206}

- Instrumento de corda

In [19]:
train_models(remove_outilers(df_inst_corda))

Acuracia do modelo GaussianNB() do Fold 0: 0.4105691056910569
Acuracia do modelo GaussianNB() do Fold 1: 0.39227642276422764
Acuracia do modelo GaussianNB() do Fold 2: 0.4715447154471545
Acuracia do modelo GaussianNB() do Fold 3: 0.4623217922606925
Acuracia do modelo GaussianNB() do Fold 4: 0.45213849287169044
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.2926829268292683
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.2784552845528455
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.3130081300813008
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.3014256619144603
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.3095723014256619
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.3333333333333333
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.3475609756097561
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.3861788617886179
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.34623217922606925
Acuracia do modelo DecisionTree

{'Naive Bayes': 0.43777010580696435,
 'KNN': 0.29902886096070735,
 'Arvore de Decisão': 0.35272216978788934,
 'Floresta Aleatoria': 0.52034341728346,
 'HistGradientBoosting': 0.5329566340469921,
 'LIGHTGBM': 0.5402737072177238,
 'MLP': 0.28722534068517874,
 'SVC': 0.3592295464706175}

- Instrumento percusão

In [18]:
train_models(remove_outilers(df_inst_percusao))

Acuracia do modelo GaussianNB() do Fold 0: 0.6832740213523132
Acuracia do modelo GaussianNB() do Fold 1: 0.6654804270462633
Acuracia do modelo GaussianNB() do Fold 2: 0.6571428571428571
Acuracia do modelo GaussianNB() do Fold 3: 0.7071428571428572
Acuracia do modelo GaussianNB() do Fold 4: 0.6857142857142857
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.5480427046263345
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.5302491103202847
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.5071428571428571
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.5607142857142857
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.5571428571428572
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.597864768683274
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.5658362989323843
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6392857142857142
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.65
Acuracia do modelo DecisionTreeClassifier() do Fo

{'Naive Bayes': 0.6797508896797153,
 'KNN': 0.5406583629893238,
 'Arvore de Decisão': 0.6184544992374174,
 'Floresta Aleatoria': 0.7346873411286222,
 'HistGradientBoosting': 0.7468327402135231,
 'LIGHTGBM': 0.7439883070665989,
 'MLP': 0.5250152516522623,
 'SVC': 0.5706354855109304}

- Instrumento de Sopro

In [17]:
train_models(remove_outilers(df_inst_sopro))

Acuracia do modelo GaussianNB() do Fold 0: 0.5379310344827586
Acuracia do modelo GaussianNB() do Fold 1: 0.6448275862068965
Acuracia do modelo GaussianNB() do Fold 2: 0.596551724137931
Acuracia do modelo GaussianNB() do Fold 3: 0.5847750865051903
Acuracia do modelo GaussianNB() do Fold 4: 0.6332179930795848
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.4517241379310345
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.4862068965517241
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.5
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.47750865051903113
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.5363321799307958
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.5448275862068965
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.5413793103448276
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.5586206896551724
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.5397923875432526
Acuracia do modelo DecisionTreeClassifier() do Fo

{'Naive Bayes': 0.5994606848824723,
 'KNN': 0.4903543729865172,
 'Arvore de Decisão': 0.5504188044386111,
 'Floresta Aleatoria': 0.7106383486457463,
 'HistGradientBoosting': 0.7023553275265482,
 'LIGHTGBM': 0.7161651354253669,
 'MLP': 0.5545090084715427,
 'SVC': 0.5145090084715427}

- Instrumento Aerofones

In [16]:
train_models(remove_outilers(df_inst_aerofone))

Acuracia do modelo GaussianNB() do Fold 0: 0.7802690582959642
Acuracia do modelo GaussianNB() do Fold 1: 0.7533632286995515
Acuracia do modelo GaussianNB() do Fold 2: 0.7713004484304933
Acuracia do modelo GaussianNB() do Fold 3: 0.7443946188340808
Acuracia do modelo GaussianNB() do Fold 4: 0.7802690582959642
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6367713004484304
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6905829596412556
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6860986547085202
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.600896860986547
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.6860986547085202
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6457399103139013
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7443946188340808
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6816143497757847
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.672645739910314
Acuracia do modelo DecisionTreeClass

{'Naive Bayes': 0.7659192825112108,
 'KNN': 0.6600896860986547,
 'Arvore de Decisão': 0.6869955156950673,
 'Floresta Aleatoria': 0.790134529147982,
 'HistGradientBoosting': 0.8026905829596412,
 'LIGHTGBM': 0.8026905829596412,
 'MLP': 0.6896860986547085,
 'SVC': 0.6789237668161435}

# Correlação de -0.5 a 0.5

In [70]:
def df_features_corr(df, limit_inf, limit_sup):
    df_corr = df.drop(columns=['instrumento','labels']).corr().unstack().reset_index().dropna()

    df_corr.rename(
        columns = {'level_0': 'features_a', 'level_1': 'features_b', 0:'correlacao'}, 
        inplace = True
    )

    df_corr = df_corr.query(f'correlacao > {limit_inf} and correlacao < {limit_sup}')
    features_select = df_corr.features_b.unique()
    features = np.concatenate((features_select, ['labels', 'instrumento']))
    
    return df[features]


- Toda a base menos os instrumentos de corda

In [80]:
train_models(df_features_corr(new_df, -0.5, 0.5))

Acuracia do modelo GaussianNB() do Fold 0: 0.3820861678004535
Acuracia do modelo GaussianNB() do Fold 1: 0.39727582292849034
Acuracia do modelo GaussianNB() do Fold 2: 0.4108967082860386
Acuracia do modelo GaussianNB() do Fold 3: 0.38024971623155507
Acuracia do modelo GaussianNB() do Fold 4: 0.41430192962542567
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.2585034013605442
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.2758229284903519
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.2758229284903519
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.23723041997729852
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.28603859250851305
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.31405895691609975
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.34846765039727584
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.3132803632236095
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.2996594778660613
Acuracia do modelo Decision

{'Naive Bayes': 0.39696206897439257,
 'KNN': 0.2666836541654119,
 'Arvore de Decisão': 0.3261489082958193,
 'Floresta Aleatoria': 0.46709032458991917,
 'HistGradientBoosting': 0.51089567874066,
 'LIGHTGBM': 0.5079434573678128,
 'MLP': 0.2546498130088206,
 'XGB': 0.497961500150571,
 'SVC': 0.29278237212402936}

- Percusão

In [73]:
train_models(df_features_corr(df_inst_percusao, -0.5, 0.5))

Acuracia do modelo GaussianNB() do Fold 0: 0.6730769230769231
Acuracia do modelo GaussianNB() do Fold 1: 0.657051282051282
Acuracia do modelo GaussianNB() do Fold 2: 0.6634615384615384
Acuracia do modelo GaussianNB() do Fold 3: 0.6977491961414791
Acuracia do modelo GaussianNB() do Fold 4: 0.6881028938906752
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.5673076923076923
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.5352564102564102
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.5064102564102564
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.5562700964630225
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.5562700964630225
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.5865384615384616
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.5993589743589743
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6153846153846154
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6205787781350482
Acuracia do modelo DecisionTreeClas

{'Naive Bayes': 0.6758883667243796,
 'KNN': 0.5443029103800808,
 'Arvore de Decisão': 0.6123464424107511,
 'Floresta Aleatoria': 0.7407185258471433,
 'HistGradientBoosting': 0.7593350647209168,
 'LIGHTGBM': 0.7631874020941545,
 'MLP': 0.539242724049798,
 'XGB': 0.7561299365157886,
 'SVC': 0.5622639953829665}

- Sopro

In [72]:
train_models(df_features_corr(df_inst_sopro, -0.5, 0.5))

Acuracia do modelo GaussianNB() do Fold 0: 0.5590062111801242
Acuracia do modelo GaussianNB() do Fold 1: 0.6428571428571429
Acuracia do modelo GaussianNB() do Fold 2: 0.5993788819875776
Acuracia do modelo GaussianNB() do Fold 3: 0.5962732919254659
Acuracia do modelo GaussianNB() do Fold 4: 0.6230529595015576
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.468944099378882
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.4782608695652174
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.5093167701863354
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.4813664596273292
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.5389408099688473
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.5652173913043478
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.5279503105590062
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.5652173913043478
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.531055900621118
Acuracia do modelo DecisionTreeClass

{'Naive Bayes': 0.6041136974903736,
 'KNN': 0.4953658017453222,
 'Arvore de Decisão': 0.5425610959540256,
 'Floresta Aleatoria': 0.7104003405506859,
 'HistGradientBoosting': 0.7072889456473366,
 'LIGHTGBM': 0.709773417697026,
 'MLP': 0.49169133724192643,
 'XGB': 0.7128790077591378,
 'SVC': 0.5102803738317757}

- Aerofones

In [71]:
train_models(df_features_corr(df_inst_aerofone, -0.5, 0.5))

Acuracia do modelo GaussianNB() do Fold 0: 0.7379032258064516
Acuracia do modelo GaussianNB() do Fold 1: 0.7419354838709677
Acuracia do modelo GaussianNB() do Fold 2: 0.7983870967741935
Acuracia do modelo GaussianNB() do Fold 3: 0.7258064516129032
Acuracia do modelo GaussianNB() do Fold 4: 0.7732793522267206
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6532258064516129
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6935483870967742
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6975806451612904
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.6209677419354839
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.708502024291498
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6532258064516129
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7096774193548387
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6895161290322581
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6774193548387096
Acuracia do modelo DecisionTreeClas

{'Naive Bayes': 0.7554623220582474,
 'KNN': 0.6747649209873319,
 'Arvore de Decisão': 0.6795709808018806,
 'Floresta Aleatoria': 0.799830220713073,
 'HistGradientBoosting': 0.8151658613033825,
 'LIGHTGBM': 0.8038755387227374,
 'MLP': 0.7175525662792216,
 'XGB': 0.7990335640590309,
 'SVC': 0.6908841582865352}

# Combinar as duas tecnicas

In [81]:
df = remove_outilers(df_features_corr(new_df, -0.5, 0.5))
train_models(df)

Acuracia do modelo GaussianNB() do Fold 0: 0.37831021437578816
Acuracia do modelo GaussianNB() do Fold 1: 0.41740226986128626
Acuracia do modelo GaussianNB() do Fold 2: 0.41740226986128626
Acuracia do modelo GaussianNB() do Fold 3: 0.39092055485498106
Acuracia do modelo GaussianNB() do Fold 4: 0.4110970996216898
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.2572509457755359
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.27994955863808324
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.2812105926860025
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.2408575031525851
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.2837326607818411
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.3455233291298865
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.3240857503152585
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.3203026481715006
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.3114754098360656
Acuracia do modelo DecisionTr

{'Naive Bayes': 0.4030264817150063,
 'KNN': 0.26860025220680955,
 'Arvore de Decisão': 0.32585119798234546,
 'Floresta Aleatoria': 0.46986128625472895,
 'HistGradientBoosting': 0.4978562421185372,
 'LIGHTGBM': 0.4991172761664565,
 'MLP': 0.2728877679697352,
 'XGB': 0.4867591424968475,
 'SVC': 0.3059268600252206}

- Percusão

In [78]:
df = remove_outilers(df_features_corr(df_inst_percusao, -0.5, 0.5))
train_models(df)

Acuracia do modelo GaussianNB() do Fold 0: 0.6832740213523132
Acuracia do modelo GaussianNB() do Fold 1: 0.6654804270462633
Acuracia do modelo GaussianNB() do Fold 2: 0.6571428571428571
Acuracia do modelo GaussianNB() do Fold 3: 0.7071428571428572
Acuracia do modelo GaussianNB() do Fold 4: 0.6857142857142857
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.5480427046263345
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.5302491103202847
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.5071428571428571
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.5607142857142857
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.5571428571428572
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.5800711743772242
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.5907473309608541
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6357142857142857
Acuracia do modelo DecisionTreeClassifier() do Fo

{'Naive Bayes': 0.6797508896797153,
 'KNN': 0.5406583629893238,
 'Arvore de Decisão': 0.6091637010676156,
 'Floresta Aleatoria': 0.7375495678698526,
 'HistGradientBoosting': 0.7504067107269954,
 'LIGHTGBM': 0.7468352821555668,
 'MLP': 0.5578215556685306,
 'XGB': 0.744707676664972,
 'SVC': 0.5706354855109304}

- Aerofone

In [79]:
df = remove_outilers(df_features_corr(df_inst_aerofone, -0.5, 0.5))
train_models(df)

Acuracia do modelo GaussianNB() do Fold 0: 0.7802690582959642
Acuracia do modelo GaussianNB() do Fold 1: 0.7533632286995515
Acuracia do modelo GaussianNB() do Fold 2: 0.7713004484304933
Acuracia do modelo GaussianNB() do Fold 3: 0.7443946188340808
Acuracia do modelo GaussianNB() do Fold 4: 0.7802690582959642
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.6367713004484304
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.6905829596412556
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.6860986547085202
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.600896860986547
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.6860986547085202
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.6636771300448431
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.7623318385650224
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.6860986547085202
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.6771300448430493
Acuracia do modelo DecisionTreeClas

{'Naive Bayes': 0.7659192825112108,
 'KNN': 0.6600896860986547,
 'Arvore de Decisão': 0.6941704035874439,
 'Floresta Aleatoria': 0.7973094170403587,
 'HistGradientBoosting': 0.7973094170403587,
 'LIGHTGBM': 0.8044843049327353,
 'MLP': 0.7076233183856502,
 'XGB': 0.7838565022421525,
 'SVC': 0.6789237668161435}

- Sopro

In [77]:
df = remove_outilers(df_features_corr(df_inst_sopro, -0.5, 0.5))
train_models(df)

Acuracia do modelo GaussianNB() do Fold 0: 0.5379310344827586
Acuracia do modelo GaussianNB() do Fold 1: 0.6448275862068965
Acuracia do modelo GaussianNB() do Fold 2: 0.596551724137931
Acuracia do modelo GaussianNB() do Fold 3: 0.5847750865051903
Acuracia do modelo GaussianNB() do Fold 4: 0.6332179930795848
Acuracia do modelo KNeighborsClassifier() do Fold 0: 0.4517241379310345
Acuracia do modelo KNeighborsClassifier() do Fold 1: 0.4862068965517241
Acuracia do modelo KNeighborsClassifier() do Fold 2: 0.5
Acuracia do modelo KNeighborsClassifier() do Fold 3: 0.47750865051903113
Acuracia do modelo KNeighborsClassifier() do Fold 4: 0.5363321799307958
Acuracia do modelo DecisionTreeClassifier() do Fold 0: 0.5793103448275863
Acuracia do modelo DecisionTreeClassifier() do Fold 1: 0.5379310344827586
Acuracia do modelo DecisionTreeClassifier() do Fold 2: 0.5620689655172414
Acuracia do modelo DecisionTreeClassifier() do Fold 3: 0.5363321799307958
Acuracia do modelo DecisionTreeClassifier() do Fo

{'Naive Bayes': 0.5994606848824723,
 'KNN': 0.4903543729865172,
 'Arvore de Decisão': 0.5607755637752059,
 'Floresta Aleatoria': 0.7078654098556257,
 'HistGradientBoosting': 0.7078773416060136,
 'LIGHTGBM': 0.7092494929006085,
 'MLP': 0.4440424770313805,
 'XGB': 0.7064837131607208,
 'SVC': 0.5145090084715427}