In [None]:
import pandas as pd
import numpy as np
import statistics as st
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from google.colab import drive

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn import preprocessing
from sklearn import neighbors
from sklearn import tree
from sklearn.multioutput import MultiOutputClassifier
from sklearn.multioutput import ClassifierChain

import tensorflow as tf
from tensorflow import keras
import cv2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model

drive.mount('/content/drive')
folderPath = "/content/drive/My Drive/TCC/"

df = pd.read_csv(folderPath + "substructure.csv")
df2 = pd.read_csv(folderPath + "pubchem.csv")
df3 = pd.read_csv(folderPath + "KR.csv")
df4 = pd.read_csv(folderPath + "atompair.csv")

labels = pd.read_csv(folderPath + "labels.csv")

df.drop(['Name'],axis=1, inplace=True)
df2.drop(['Name'],axis=1, inplace=True)
df3.drop(['Name'],axis=1, inplace=True)
df4.drop(['Name'],axis=1, inplace=True)
labels.drop(['row ID'],axis=1, inplace=True)

x = df.iloc[:,:]
x2 = df2.iloc[:,:]
x3 = df3.iloc[:,:]
x4 = df4.iloc[:,:]
y = labels.iloc[:,:]

#Iterar o algoritmo 50 vezes para análise estatística armazenando os resultados de acurácia e de F-Measure em duas matrizes

num_iteracoes = 50
df_acuracia = []
df_fmeasure = []

for i in range(num_iteracoes):
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
  x_train2, x_test2, y_train, y_test = train_test_split(x2, y, test_size=0.2)
  x_train3, x_test3, y_train, y_test = train_test_split(x3, y, test_size=0.2)
  x_train4, x_test4, y_train, y_test = train_test_split(x4, y, test_size=0.2)

  def run_pca(x,y):
    pca = PCA(n_components=4, random_state=42,whiten=True)
    cols = list(x.columns)
    x_pca = pca.fit_transform(x[cols].values)
    print(pca.explained_variance_ratio_)
    print(pca.singular_values_)

    x_pca = pd.DataFrame(x_pca, columns=['comp1', 'comp2','comp3','comp4'])
    x_train_pca, x_test_pca,y_train,y_test = train_test_split(x_pca, y, test_size=0.2)
    return x_train_pca, x_test_pca

  def run_multioutput(x_train, x_test,y_train,y_test):
    model = MultiOutputClassifier(neighbors.KNeighborsClassifier(n_neighbors=3))
    model2 = MultiOutputClassifier(tree.DecisionTreeClassifier(max_depth=8,random_state=0))
    model.fit(x_train, y_train)
    model2.fit(x_train,y_train)

    pred = model.predict(x_train)
    accuracy = accuracy_score(y_train, pred)
    f_measure = f1_score(y_train, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Treino neighbors: ", f_measure)
    pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, pred)
    f_measure = f1_score(y_test, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Teste neighbors: ", f_measure)

    pred = model.predict(x_train)
    accuracy = accuracy_score(y_train, pred)
    f_measure = f1_score(y_train, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Treino tree: ", f_measure)
    pred = model2.predict(x_test)
    accuracy = accuracy_score(y_test, pred)
    f_measure = f1_score(y_test, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Teste tree: ", f_measure,"\n")

  def run_chain(x_train,x_test,y_train,y_test):
    model3 = ClassifierChain(neighbors.KNeighborsClassifier(n_neighbors=5))
    model4 = ClassifierChain(tree.DecisionTreeClassifier(max_depth=8,random_state=0))
    model3.fit(x_train,y_train)
    model4.fit(x_train,y_train)

    pred = model3.predict(x_train)
    accuracy = accuracy_score(y_train, pred)
    f_measure = f1_score(y_train, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Treino neighbors: ", f_measure)
    pred = model3.predict(x_test)
    accuracy = accuracy_score(y_test, pred)
    f_measure = f1_score(y_test, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Teste neighbors: ", f_measure)

    pred = model4.predict(x_train)
    accuracy = accuracy_score(y_train, pred)
    f_measure = f1_score(y_train, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Treino tree: ", f_measure)
    pred = model4.predict(x_test)
    accuracy = accuracy_score(y_test, pred)
    f_measure = f1_score(y_test, pred, average='micro')
    df_acuracia.append(accuracy)
    df_fmeasure.append(f_measure)
    print("Acurácia Teste tree: ", f_measure,"\n")


  def run_autoencoder(x_train,x_test,y_train,y_test):
    tamanho = x_train.shape[1]

    # Construção das camadas

    input = keras.layers.Input(shape=(tamanho, ))

    encoder = keras.layers.Dense(int(tamanho/2), activation='sigmoid')(input)
    encoder = keras.layers.Dense(int(tamanho/5), activation='sigmoid')(encoder)
    encoder = keras.layers.Dense(int(tamanho/7), activation='sigmoid')(encoder)

    latent = keras.layers.Dense(int(30), activation='sigmoid')(encoder)

    decoder = keras.layers.Dense(int(tamanho/7), activation='sigmoid')(latent)
    decoder = keras.layers.Dense(int(tamanho/5), activation='sigmoid')(decoder)
    decoder = keras.layers.Dense(int(tamanho/2), activation='sigmoid')(decoder)

    output = keras.layers.Dense(int(tamanho), activation='sigmoid')(decoder)



    # Criação do modelo

    autoencoder = Model(input, output)
    autoencoder.compile(optimizer='adam', loss='mse', metrics='accuracy')

    plot_model(autoencoder, 'autoencoder.png', show_shapes=True)

    # Treinamento do modelo
    treinamento = autoencoder.fit(x_train, x_train, epochs=50, batch_size=16, verbose=False, validation_data=(x_test,x_test))

    # Reconstrução da base

    reconstrucao = autoencoder.predict(x_train)
    train_loss = tf.keras.losses.mse(reconstrucao, x_train)
    plt.plot(treinamento.history['loss'], label='train')
    plt.plot(treinamento.history['val_loss'], label='test')
    plt.legend()
    plt.show()
    # define an encoder model (without the decoder)
    encoder = Model(inputs=input, outputs=latent)
    plot_model(encoder, 'encoder_no_compress.png', show_shapes=True)
    # save the encoder to file
    encoder.save('encoder.h5')
    # encode the train data
    x_train_encode = encoder.predict(x_train)
    # encode the test data
    x_test_encode = encoder.predict(x_test)
    return x_train_encode, x_test_encode

  run_multioutput(x_train, x_test, y_train, y_test)
  run_multioutput(x_train2, x_test2, y_train, y_test)
  run_multioutput(x_train3, x_test3, y_train, y_test)
  run_multioutput(x_train4, x_test4, y_train, y_test)

  run_chain(x_train, x_test, y_train, y_test)
  run_chain(x_train2, x_test2, y_train, y_test)
  run_chain(x_train3, x_test3, y_train, y_test)
  run_chain(x_train4, x_test4, y_train, y_test)

  x_train_pca, x_test_pca = run_pca(x,y)
  x_train_pca2, x_test_pca2 = run_pca(x2,y)
  x_train_pca3, x_test_pca3 = run_pca(x3,y)
  x_train_pca4, x_test_pca4 = run_pca(x4,y)

  run_multioutput(x_train_pca, x_test_pca, y_train, y_test)
  run_multioutput(x_train_pca2, x_test_pca2, y_train, y_test)
  run_multioutput(x_train_pca3, x_test_pca3, y_train, y_test)
  run_multioutput(x_train_pca4, x_test_pca4, y_train, y_test)

  run_chain(x_train_pca, x_test_pca, y_train, y_test)
  run_chain(x_train_pca2, x_test_pca2, y_train, y_test)
  run_chain(x_train_pca3, x_test_pca3, y_train, y_test)
  run_chain(x_train_pca4, x_test_pca4, y_train, y_test)

  x_train_encode, x_test_encode = run_autoencoder(x_train, x_test, y_train, y_test)
  x_train_encode2, x_test_encode2 = run_autoencoder(x_train2, x_test2, y_train, y_test)
  x_train_encode3, x_test_encode3 = run_autoencoder(x_train3, x_test3, y_train, y_test)
  x_train_encode4, x_test_encode4 = run_autoencoder(x_train4, x_test4, y_train, y_test)

  run_multioutput(x_train_encode, x_test_encode, y_train, y_test)
  run_multioutput(x_train_encode2, x_test_encode2, y_train, y_test)
  run_multioutput(x_train_encode3, x_test_encode3, y_train, y_test)
  run_multioutput(x_train_encode4, x_test_encode4, y_train, y_test)

  run_chain(x_train_encode, x_test_encode, y_train, y_test)
  run_chain(x_train_encode2, x_test_encode2, y_train, y_test)
  run_chain(x_train_encode3, x_test_encode3, y_train, y_test)
  run_chain(x_train_encode4, x_test_encode4, y_train, y_test)

In [None]:
planilha_acuracia = pd.DataFrame({'Acuracia': df_acuracia})
planilha_acuracia.to_csv('acuracia.csv', index=False)
planilha_fmeasure = pd.DataFrame({'FMeasure': df_fmeasure})
planilha_fmeasure.to_csv('fmeasure.csv', index=False)

In [None]:
dados1 = planilha_acuracia['Acuracia'].to_numpy().reshape((96, num_iteracoes))
dados2 = planilha_fmeasure['FMeasure'].to_numpy().reshape((96, num_iteracoes))
# Crie um novo DataFrame com as novas colunas
novas_acuracia = [f'Acuracia{i+1}' for i in range(num_iteracoes)]
df_acuracia = pd.DataFrame(dados1, columns=novas_acuracia)
novas_measure = [f'Measure{i+1}' for i in range(num_iteracoes)]
df_measure = pd.DataFrame(dados2, columns=novas_measure)

# Visualize o novo DataFrame
df_acuracia.to_csv('df_acuracia.csv', index=False)
df_measure.to_csv('df_measure.csv', index=False)
df_acuracia

Unnamed: 0,Acuracia1,Acuracia2,Acuracia3,Acuracia4,Acuracia5,Acuracia6,Acuracia7,Acuracia8,Acuracia9,Acuracia10,...,Acuracia41,Acuracia42,Acuracia43,Acuracia44,Acuracia45,Acuracia46,Acuracia47,Acuracia48,Acuracia49,Acuracia50
0,0.466667,0.625,0.466667,0.500,0.633333,0.500,0.633333,0.500,0.433333,0.375,...,0.566667,0.250,0.566667,0.125,0.533333,0.375,0.533333,0.375,0.566667,0.125
1,0.566667,0.125,0.533333,0.000,0.866667,0.125,0.333333,0.250,0.966667,0.250,...,1.000000,0.125,0.366667,0.375,0.766667,0.625,0.566667,0.125,0.566667,0.250
2,0.466667,0.000,0.466667,0.125,0.600000,0.375,0.600000,0.500,0.700000,0.250,...,0.500000,0.250,0.500000,0.500,0.566667,0.125,0.600000,0.000,0.466667,0.125
3,0.800000,0.250,0.500000,0.375,1.000000,0.250,0.400000,0.250,0.733333,0.375,...,0.800000,0.000,0.500000,0.125,0.500000,0.000,0.700000,0.250,0.700000,0.250
4,0.533333,0.000,0.533333,0.250,0.533333,0.375,0.533333,0.500,0.500000,0.000,...,0.466667,0.375,0.533333,0.125,0.500000,0.000,0.900000,0.250,0.466667,0.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,0.500000,0.000,0.633333,0.000,0.633333,0.250,0.533333,0.125,0.533333,0.250,...,0.700000,0.125,0.433333,0.125,0.600000,0.125,0.333333,0.125,0.900000,0.125
92,0.400000,0.500,1.000000,0.375,0.600000,0.375,0.766667,0.250,0.300000,0.000,...,0.533333,0.000,0.533333,0.125,0.566667,0.625,0.566667,0.500,0.533333,0.250
93,0.533333,0.250,0.433333,0.125,0.433333,0.125,0.366667,0.000,0.466667,0.125,...,0.566667,0.125,0.400000,0.125,0.900000,0.375,0.433333,0.250,1.000000,0.125
94,0.533333,0.625,0.733333,0.500,0.333333,0.375,0.333333,0.375,0.566667,0.125,...,0.600000,0.000,0.600000,0.125,0.633333,0.500,0.633333,0.500,0.466667,0.125


In [None]:
df_measure

Unnamed: 0,Measure1,Measure2,Measure3,Measure4,Measure5,Measure6,Measure7,Measure8,Measure9,Measure10,...,Measure41,Measure42,Measure43,Measure44,Measure45,Measure46,Measure47,Measure48,Measure49,Measure50
0,0.677419,0.600000,0.677419,0.444444,0.764706,0.533333,0.764706,0.500000,0.721311,0.500000,...,0.777778,0.166667,0.777778,0.000000,0.756757,0.533333,0.756757,0.500000,0.760563,0.000000
1,0.753623,0.000000,0.736842,0.400000,0.923077,0.181818,0.600000,0.333333,0.985507,0.333333,...,1.000000,0.375000,0.675676,0.500000,0.861538,0.666667,0.564103,0.285714,0.564103,0.588235
2,0.590909,0.470588,0.590909,0.526316,0.727273,0.666667,0.727273,0.700000,0.769231,0.666667,...,0.622222,0.555556,0.622222,0.782609,0.680851,0.470588,0.681818,0.375000,0.595745,0.526316
3,0.844444,0.600000,0.653061,0.727273,1.000000,0.608696,0.553191,0.588235,0.827586,0.750000,...,0.867925,0.526316,0.746988,0.600000,0.746988,0.266667,0.843750,0.375000,0.843750,0.307692
4,0.781250,0.181818,0.781250,0.166667,0.800000,0.705882,0.800000,0.714286,0.727273,0.526316,...,0.707692,0.428571,0.700000,0.166667,0.722222,0.444444,0.935484,0.428571,0.746667,0.307692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,0.680000,0.526316,0.740741,0.266667,0.740741,0.555556,0.622222,0.555556,0.622222,0.700000,...,0.821429,0.470588,0.488889,0.571429,0.666667,0.545455,0.470588,0.285714,0.920000,0.400000
92,0.444444,0.727273,1.000000,0.666667,0.680000,0.727273,0.851852,0.666667,0.566667,0.375000,...,0.774194,0.444444,0.774194,0.533333,0.727273,0.777778,0.727273,0.700000,0.700000,0.600000
93,0.700000,0.571429,0.666667,0.375000,0.666667,0.400000,0.580645,0.470588,0.571429,0.533333,...,0.640000,0.470588,0.676471,0.555556,0.933333,0.666667,0.701299,0.571429,1.000000,0.153846
94,0.773333,0.777778,0.827586,0.750000,0.644068,0.461538,0.644068,0.461538,0.816901,0.500000,...,0.782609,0.421053,0.782609,0.470588,0.788732,0.625000,0.788732,0.615385,0.773333,0.444444


In [None]:
media_acc = df_acuracia.mean(axis=1)
media_acc = media_acc.to_numpy().reshape(48,2)
media_acc = pd.DataFrame(media_acc)

In [None]:
std_acc = df_acuracia.std(axis=1)
std_acc = std_acc.to_numpy().reshape(48,2)
std_acc = pd.DataFrame(std_acc)

In [None]:
media_measure = df_measure.mean(axis=1)
media_measure = media_measure.to_numpy().reshape(48,2)
media_measure = pd.DataFrame(media_measure)

In [None]:
std_measure = df_measure.std(axis=1)
std_measure = std_measure.to_numpy().reshape(48,2)
std_measure = pd.DataFrame(std_measure)

In [None]:
df_res = pd.concat([media_acc, std_acc,media_measure,std_measure], axis=1)
df_res.columns = [f'{col}_{idx}' if df_res.columns[:idx].tolist().count(col) > 0 else col for idx, col in enumerate(df_res.columns)]
df_res = df_res.rename(columns={0: 'mean_train_acc', 1: 'mean_test_acc', '0_2':'std_train_acc', '1_3': 'std_test_acc', '0_4': 'mean_train_measure', '1_5':'mean_test_measure','0_6': 'std_train_measure','1_7':'std_test_measure' })
df_res = df_res.rename(index={
    0: 'df1 + MO + KNN',
    1: 'df1 + MO + DT',
    2: 'df2 + MO + KNN',
    3: 'df2 + MO + DT',
    4: 'df3 + MO + KNN',
    5: 'df3 + MO + DT',
    6: 'df4 + MO + KNN',
    7: 'df4 + MO + DT',
    8: 'df1 + RC + KNN',
    9: 'df1 + RC + DT',
    10: 'df2 + RC + KNN',
    11: 'df2 + RC + DT',
    12: 'df3 + RC + KNN',
    13: 'df3 + RC + DT',
    14: 'df4 + RC + KNN',
    15: 'df4 + RC + DT',
    16: 'df1 + PCA + MO + KNN',
    17: 'df1 + PCA + MO + DT',
    18: 'df2 + PCA + MO + KNN',
    19: 'df2 + PCA + MO + DT',
    20: 'df3 + PCA + MO + KNN',
    21: 'df3 + PCA + MO + DT',
    22: 'df4 + PCA + MO + KNN',
    23: 'df4 + PCA + MO + DT',
    24: 'df1 + PCA + RC + KNN',
    25: 'df1 + PCA + RC + DT',
    26: 'df2 + PCA + RC + KNN',
    27: 'df2 + PCA + RC + DT',
    28: 'df3 + PCA + RC + KNN',
    29: 'df3 + PCA + RC + DT',
    30: 'df4 + PCA + RC + KNN',
    31: 'df4 + PCA + RC + DT',
    32: 'df1 + AE + MO + KNN',
    33: 'df1 + AE + MO + DT',
    34: 'df2 + AE + MO + KNN',
    35: 'df2 + AE + MO + DT',
    36: 'df3 + AE + MO + KNN',
    37: 'df3 + AE + MO + DT',
    38: 'df4 + AE + MO + KNN',
    39: 'df4 + AE + MO + DT',
    40: 'df1 + AE + RC + KNN',
    41: 'df1 + AE + RC + DT',
    42: 'df2 + AE + RC + KNN',
    43: 'df2 + AE + RC + DT',
    44: 'df3 + AE + RC + KNN',
    45: 'df3 + AE + RC + DT',
    46: 'df4 + AE + RC + KNN',
    47: 'df4 + AE + RC + DT'})

In [None]:
df_res

Unnamed: 0,mean_train_acc,mean_test_acc,std_train_acc,std_test_acc,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
df1 + MO + KNN,0.475,0.454,0.180725,0.231499,0.579416,0.598644,0.249981,0.211955
df1 + MO + DT,0.399833,0.4055,0.228388,0.281344,0.642753,0.613856,0.121362,0.199976
df2 + MO + KNN,0.402833,0.411333,0.227076,0.261129,0.599854,0.616674,0.226822,0.217981
df2 + MO + DT,0.406167,0.407167,0.212884,0.23371,0.631259,0.588218,0.162742,0.185206
df3 + MO + KNN,0.3955,0.386833,0.23984,0.266747,0.563922,0.589448,0.234944,0.215758
df3 + MO + DT,0.359667,0.363167,0.285324,0.261759,0.534361,0.600005,0.235897,0.202132
df4 + MO + KNN,0.408833,0.462333,0.222708,0.231248,0.59853,0.659347,0.196617,0.176441
df4 + MO + DT,0.402333,0.412333,0.226272,0.252876,0.589996,0.630229,0.180262,0.178487
df1 + RC + KNN,0.374333,0.452,0.237612,0.220932,0.583211,0.589914,0.196467,0.210981
df1 + RC + DT,0.4335,0.449667,0.179249,0.232701,0.601396,0.664516,0.186042,0.173706


In [None]:
res_df1 = pd.DataFrame()
filtro = lambda x: 'df1' in x
mascara = df_res.index.map(filtro)
res_df1 = df_res[mascara]

In [None]:
df_rounded = res_df1.iloc[:,4:]
df_rounded = df_rounded.round(3)
df_rounded

Unnamed: 0,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
df1 + MO + KNN,0.579,0.599,0.25,0.212
df1 + MO + DT,0.643,0.614,0.121,0.2
df1 + RC + KNN,0.583,0.59,0.196,0.211
df1 + RC + DT,0.601,0.665,0.186,0.174
df1 + PCA + MO + KNN,0.611,0.605,0.159,0.219
df1 + PCA + MO + DT,0.621,0.591,0.18,0.214
df1 + PCA + RC + KNN,0.603,0.634,0.175,0.155
df1 + PCA + RC + DT,0.584,0.63,0.254,0.192
df1 + AE + MO + KNN,0.606,0.628,0.19,0.16
df1 + AE + MO + DT,0.577,0.589,0.179,0.232


In [None]:
res_df1.describe()

Unnamed: 0,mean_train_acc,mean_test_acc,std_train_acc,std_test_acc,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
count,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0
mean,0.412764,0.428083,0.216703,0.238362,0.600327,0.611173,0.190959,0.196396
std,0.02488,0.02493,0.025443,0.016709,0.018821,0.025518,0.035903,0.02702
min,0.374333,0.376,0.175537,0.220884,0.577217,0.571725,0.121362,0.154983
25%,0.399833,0.414125,0.198888,0.229642,0.583871,0.590652,0.17819,0.171198
50%,0.40425,0.435667,0.223709,0.234147,0.599968,0.609663,0.187899,0.205478
75%,0.42375,0.45025,0.236769,0.242865,0.606923,0.628493,0.198841,0.214832
max,0.475,0.454,0.248779,0.281344,0.642753,0.664516,0.25442,0.232065


In [None]:
res_df2 = pd.DataFrame()
filtro = lambda x: 'df2' in x
mascara = df_res.index.map(filtro)
res_df2 = df_res[mascara]

In [None]:
df_rounded = res_df2.iloc[:,4:]
df_rounded = df_rounded.round(3)
df_rounded

Unnamed: 0,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
df2 + MO + KNN,0.6,0.617,0.227,0.218
df2 + MO + DT,0.631,0.588,0.163,0.185
df2 + RC + KNN,0.601,0.573,0.212,0.205
df2 + RC + DT,0.603,0.632,0.183,0.171
df2 + PCA + MO + KNN,0.628,0.619,0.155,0.183
df2 + PCA + MO + DT,0.628,0.634,0.196,0.155
df2 + PCA + RC + KNN,0.597,0.616,0.201,0.219
df2 + PCA + RC + DT,0.546,0.6,0.236,0.199
df2 + AE + MO + KNN,0.563,0.643,0.204,0.169
df2 + AE + MO + DT,0.61,0.664,0.196,0.153


In [None]:
res_df2.describe()

Unnamed: 0,mean_train_acc,mean_test_acc,std_train_acc,std_test_acc,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
count,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0
mean,0.418917,0.415778,0.227925,0.224934,0.602534,0.614769,0.196588,0.189339
std,0.019349,0.016577,0.019106,0.02198,0.028493,0.026463,0.025435,0.023781
min,0.3775,0.381167,0.193054,0.184084,0.545525,0.572706,0.154821,0.152653
25%,0.407542,0.406125,0.214448,0.208485,0.593498,0.596963,0.179326,0.17019
50%,0.422667,0.41825,0.224852,0.224037,0.601719,0.616505,0.198772,0.191765
75%,0.426458,0.424375,0.242726,0.240932,0.628055,0.632693,0.213355,0.207969
max,0.452167,0.443333,0.256462,0.261129,0.641162,0.663728,0.235738,0.218909


In [None]:
res_df3 = pd.DataFrame()
filtro = lambda x: 'df3' in x
mascara = df_res.index.map(filtro)
res_df3 = df_res[mascara]

In [None]:
df_rounded = res_df3.iloc[:,4:]
df_rounded = df_rounded.round(3)
df_rounded

Unnamed: 0,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
df3 + MO + KNN,0.564,0.589,0.235,0.216
df3 + MO + DT,0.534,0.6,0.236,0.202
df3 + RC + KNN,0.612,0.623,0.207,0.178
df3 + RC + DT,0.552,0.672,0.265,0.181
df3 + PCA + MO + KNN,0.606,0.553,0.224,0.237
df3 + PCA + MO + DT,0.594,0.584,0.193,0.172
df3 + PCA + RC + KNN,0.588,0.606,0.224,0.21
df3 + PCA + RC + DT,0.546,0.611,0.258,0.208
df3 + AE + MO + KNN,0.654,0.61,0.158,0.177
df3 + AE + MO + DT,0.594,0.545,0.223,0.28


In [None]:
res_df4 = pd.DataFrame()
filtro = lambda x: 'df4' in x
mascara = df_res.index.map(filtro)
res_df4 = df_res[mascara]

In [None]:
df_rounded = res_df4.iloc[:,4:]
df_rounded = df_rounded.round(3)
df_rounded

Unnamed: 0,mean_train_measure,mean_test_measure,std_train_measure,std_test_measure
df4 + MO + KNN,0.599,0.659,0.197,0.176
df4 + MO + DT,0.59,0.63,0.18,0.178
df4 + RC + KNN,0.654,0.646,0.189,0.181
df4 + RC + DT,0.624,0.608,0.167,0.184
df4 + PCA + MO + KNN,0.557,0.529,0.251,0.274
df4 + PCA + MO + DT,0.636,0.64,0.143,0.164
df4 + PCA + RC + KNN,0.618,0.581,0.183,0.212
df4 + PCA + RC + DT,0.573,0.62,0.203,0.19
df4 + AE + MO + KNN,0.584,0.65,0.239,0.16
df4 + AE + MO + DT,0.616,0.587,0.19,0.213
