# **Practica 2: Metricas de evaluacion**
Huitron Martinez Fernando

Configuracion 1: epochs=4, batch size=2000, validation split =0.9

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.metrics import Precision, Recall
from keras.datasets import mnist

# Definir parámetros de entrenamiento
EPOCHS = 4
BATCH_SIZE = 2000
VALIDATION_SPLIT = 0.9

# Definir función para realizar entrenamiento y evaluación
def train_and_evaluate(x_train, y_train, x_test, y_test):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(1)
    ])

    model.compile(loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall()])
    model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=0)
    y_pred = (model.predict(x_test) >= 0.5).astype(int)[:,0]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    precision_1 = precision_score(y_test, y_pred)
    print("Precision_1:", precision_1)
    recall_1 = recall_score(y_test, y_pred)
    print("Recall_1:", recall_1)
    f1_s_1 = f1_score(y_test, y_pred)
    print("F1-score_1:", f1_s_1)

    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    print("Precision_0:", precision_0)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    print("Recall_0:", recall_0)
    f1_s_0 = f1_score(y_test, y_pred, pos_label=0)
    print("F1-score_0:", f1_s_0)

    return accuracy, precision, recall, f1, conf_matrix

# Cargar el conjunto de datos MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Se obtiene una muestra con igual número de: cincos vs no cincos
idx_y_train_5 = np.where(y_train == 5)[0]
idx_y_train_n5 = np.where(y_train != 5)[0]
idx_y_train_n5 = np.random.choice(idx_y_train_n5, idx_y_train_5.shape[0], replace=False)

idx_y_train = np.concatenate([idx_y_train_5, idx_y_train_n5])
np.random.shuffle(idx_y_train)

x_train = x_train[idx_y_train]
y_train = y_train[idx_y_train]

idx_y_test_5 = np.where(y_test == 5)[0]
idx_y_test_n5 = np.where(y_test != 5)[0]
idx_y_test_n5 = np.random.choice(idx_y_test_n5, idx_y_test_5.shape[0], replace=False)

idx_y_test = np.concatenate([idx_y_test_5, idx_y_test_n5])
np.random.shuffle(idx_y_test)

x_test = x_test[idx_y_test]
y_test = y_test[idx_y_test]

y_train_binary = (y_train == 5).astype(int)
y_test_binary = (y_test == 5).astype(int)

# Realizar entrenamiento y evaluación
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
conf_matrices = []
for _ in range(5):
    accuracy, precision, recall, f1, conf_matrix = train_and_evaluate(x_train, y_train_binary, x_test, y_test_binary)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    conf_matrices.append(conf_matrix)

# Calcular las medias de las métricas
accuracy_mean = np.mean(accuracy_list)
precision_mean = np.mean(precision_list)
recall_mean = np.mean(recall_list)
f1_mean = np.mean(f1_list)

# Imprimir resultados
print("Epochs:", EPOCHS)
print("Batch Size:", BATCH_SIZE)
print("Validation Split:", VALIDATION_SPLIT)
print("Accuracy Mean:", accuracy_mean)
print("Precision Mean:", precision_mean)
print("Recall Mean:", recall_mean)
print("F1-score Mean:", f1_mean)
print()

# Imprimir matriz de confusión promedio
print("Matriz de confusión promedio:")
print(np.mean(conf_matrices, axis=0))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Precision_1: 0.40310077519379844
Recall_1: 0.05829596412556054
F1-score_1: 0.10186092066601372
Precision_0: 0.49244712990936557
Recall_0: 0.9136771300448431
F1-score_0: 0.6399685904986259
Precision_1: 0.5446175637393768
Recall_1: 0.8621076233183856
F1-score_1: 0.6675347222222222
Precision_0: 0.6693548387096774
Recall_0: 0.27914798206278024
F1-score_0: 0.3939873417721519
Precision_1: 0.5718562874251497
Recall_1: 0.6423766816143498
F1-score_1: 0.6050686378035903
Precision_0: 0.592071611253197
Recall_0: 0.5190582959641256
F1-score_0: 0.5531660692951015
Precision_1: 0.5509803921568628
Recall_1: 0.6300448430493274
F1-score_1: 0.5878661087866109
Precision_0: 0.5680628272251309
Recall_0: 0.48654708520179374
F1-score_0: 0.5241545893719807
Precision_1: 0.5016414970453054
Recall_1: 0.8565022421524664
F1-score_1: 0.632712215320911
Precision_0: 0.5095785440613027
Recall_0: 0.1491031390134529
F1-score_0: 0.2

Configuracion 2: epochs=20, batch size=2000, validation split =0.9

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.metrics import Precision, Recall
from keras.datasets import mnist

# Definir parámetros de entrenamiento
EPOCHS = 20
BATCH_SIZE = 2000
VALIDATION_SPLIT = 0.9

# Definir función para realizar entrenamiento y evaluación
def train_and_evaluate(x_train, y_train, x_test, y_test):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(1)
    ])

    model.compile(loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall()])
    model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=0)
    y_pred = (model.predict(x_test) >= 0.5).astype(int)[:,0]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    precision_1 = precision_score(y_test, y_pred)
    print("Precision_1:", precision_1)
    recall_1 = recall_score(y_test, y_pred)
    print("Recall_1:", recall_1)
    f1_s_1 = f1_score(y_test, y_pred)
    print("F1-score_1:", f1_s_1)

    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    print("Precision_0:", precision_0)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    print("Recall_0:", recall_0)
    f1_s_0 = f1_score(y_test, y_pred, pos_label=0)
    print("F1-score_0:", f1_s_0)

    return accuracy, precision, recall, f1, conf_matrix

# Cargar el conjunto de datos MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Se obtiene una muestra con igual número de: cincos vs no cincos
idx_y_train_5 = np.where(y_train == 5)[0]
idx_y_train_n5 = np.where(y_train != 5)[0]
idx_y_train_n5 = np.random.choice(idx_y_train_n5, idx_y_train_5.shape[0], replace=False)

idx_y_train = np.concatenate([idx_y_train_5, idx_y_train_n5])
np.random.shuffle(idx_y_train)

x_train = x_train[idx_y_train]
y_train = y_train[idx_y_train]

idx_y_test_5 = np.where(y_test == 5)[0]
idx_y_test_n5 = np.where(y_test != 5)[0]
idx_y_test_n5 = np.random.choice(idx_y_test_n5, idx_y_test_5.shape[0], replace=False)

idx_y_test = np.concatenate([idx_y_test_5, idx_y_test_n5])
np.random.shuffle(idx_y_test)

x_test = x_test[idx_y_test]
y_test = y_test[idx_y_test]

y_train_binary = (y_train == 5).astype(int)
y_test_binary = (y_test == 5).astype(int)

# Realizar entrenamiento y evaluación
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
conf_matrices = []
for _ in range(5):
    accuracy, precision, recall, f1, conf_matrix = train_and_evaluate(x_train, y_train_binary, x_test, y_test_binary)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    conf_matrices.append(conf_matrix)

# Calcular las medias de las métricas
accuracy_mean = np.mean(accuracy_list)
precision_mean = np.mean(precision_list)
recall_mean = np.mean(recall_list)
f1_mean = np.mean(f1_list)

# Imprimir resultados
print("Epochs:", EPOCHS)
print("Batch Size:", BATCH_SIZE)
print("Validation Split:", VALIDATION_SPLIT)
print("Accuracy Mean:", accuracy_mean)
print("Precision Mean:", precision_mean)
print("Recall Mean:", recall_mean)
print("F1-score Mean:", f1_mean)
print()

# Imprimir matriz de confusión promedio
print("Matriz de confusión promedio:")
print(np.mean(conf_matrices, axis=0))


Precision_1: 0.6099348534201955
Recall_1: 0.8396860986547086
F1-score_1: 0.7066037735849057
Precision_0: 0.7428057553956835
Recall_0: 0.46300448430493274
F1-score_0: 0.5704419889502762
Precision_1: 0.538135593220339
Recall_1: 0.14237668161434977
F1-score_1: 0.225177304964539
Precision_0: 0.5058139534883721
Recall_0: 0.8778026905829597
F1-score_0: 0.6418032786885246
Precision_1: 0.553020134228188
Recall_1: 0.9237668161434978
F1-score_1: 0.6918555835432411
Precision_0: 0.7687074829931972
Recall_0: 0.2533632286995516
F1-score_0: 0.38111298482293426
Precision_1: 0.6241776315789473
Recall_1: 0.850896860986547
F1-score_1: 0.7201138519924098
Precision_0: 0.7658450704225352
Recall_0: 0.4876681614349776
F1-score_0: 0.5958904109589042
Precision_1: 0.5915948275862069
Recall_1: 0.6154708520179372
F1-score_1: 0.6032967032967032
Precision_0: 0.5992990654205608
Recall_0: 0.5751121076233184
F1-score_0: 0.5869565217391305
Epochs: 20
Batch Size: 2000
Validation Split: 0.9
Accuracy Mean: 0.60291479820627

Configuracion 3: epochs=4, batch size=2, validation split =0.9

In [4]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.metrics import Precision, Recall
from keras.datasets import mnist

# Definir parámetros de entrenamiento
EPOCHS = 4
BATCH_SIZE = 2
VALIDATION_SPLIT = 0.9

# Definir función para realizar entrenamiento y evaluación
def train_and_evaluate(x_train, y_train, x_test, y_test):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(1)
    ])

    model.compile(loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall()])
    model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=0)
    y_pred = (model.predict(x_test) >= 0.5).astype(int)[:,0]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    precision_1 = precision_score(y_test, y_pred)
    print("Precision_1:", precision_1)
    recall_1 = recall_score(y_test, y_pred)
    print("Recall_1:", recall_1)
    f1_s_1 = f1_score(y_test, y_pred)
    print("F1-score_1:", f1_s_1)

    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    print("Precision_0:", precision_0)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    print("Recall_0:", recall_0)
    f1_s_0 = f1_score(y_test, y_pred, pos_label=0)
    print("F1-score_0:", f1_s_0)

    return accuracy, precision, recall, f1, conf_matrix

# Cargar el conjunto de datos MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Se obtiene una muestra con igual número de: cincos vs no cincos
idx_y_train_5 = np.where(y_train == 5)[0]
idx_y_train_n5 = np.where(y_train != 5)[0]
idx_y_train_n5 = np.random.choice(idx_y_train_n5, idx_y_train_5.shape[0], replace=False)

idx_y_train = np.concatenate([idx_y_train_5, idx_y_train_n5])
np.random.shuffle(idx_y_train)

x_train = x_train[idx_y_train]
y_train = y_train[idx_y_train]

idx_y_test_5 = np.where(y_test == 5)[0]
idx_y_test_n5 = np.where(y_test != 5)[0]
idx_y_test_n5 = np.random.choice(idx_y_test_n5, idx_y_test_5.shape[0], replace=False)

idx_y_test = np.concatenate([idx_y_test_5, idx_y_test_n5])
np.random.shuffle(idx_y_test)

x_test = x_test[idx_y_test]
y_test = y_test[idx_y_test]

y_train_binary = (y_train == 5).astype(int)
y_test_binary = (y_test == 5).astype(int)

# Realizar entrenamiento y evaluación
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
conf_matrices = []
for _ in range(5):
    accuracy, precision, recall, f1, conf_matrix = train_and_evaluate(x_train, y_train_binary, x_test, y_test_binary)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    conf_matrices.append(conf_matrix)

# Calcular las medias de las métricas
accuracy_mean = np.mean(accuracy_list)
precision_mean = np.mean(precision_list)
recall_mean = np.mean(recall_list)
f1_mean = np.mean(f1_list)

# Imprimir resultados
print("Epochs:", EPOCHS)
print("Batch Size:", BATCH_SIZE)
print("Validation Split:", VALIDATION_SPLIT)
print("Accuracy Mean:", accuracy_mean)
print("Precision Mean:", precision_mean)
print("Recall Mean:", recall_mean)
print("F1-score Mean:", f1_mean)
print()

# Imprimir matriz de confusión promedio
print("Matriz de confusión promedio:")
print(np.mean(conf_matrices, axis=0))


Precision_1: 0.48377403846153844
Recall_1: 0.9024663677130045
F1-score_1: 0.6298904538341158
Precision_0: 0.275
Recall_0: 0.03699551569506727
F1-score_0: 0.06521739130434782
Precision_1: 0.5515151515151515
Recall_1: 0.10201793721973094
F1-score_1: 0.17218543046357615
Precision_0: 0.5052501544163064
Recall_0: 0.9170403587443946
F1-score_0: 0.6515332536837913
Precision_1: 0.6513761467889908
Recall_1: 0.3183856502242152
F1-score_1: 0.42771084337349397
Precision_0: 0.5489614243323442
Recall_0: 0.8295964125560538
F1-score_0: 0.6607142857142857
Precision_1: 0.5656728444802579
Recall_1: 0.7869955156950673
F1-score_1: 0.6582278481012659
Precision_0: 0.6500920810313076
Recall_0: 0.3957399103139013
F1-score_0: 0.49198606271777
Precision_1: 0.5444126074498568
Recall_1: 0.4260089686098655
F1-score_1: 0.4779874213836478
Precision_0: 0.5285451197053407
Recall_0: 0.6434977578475336
F1-score_0: 0.5803842264914055
Epochs: 4
Batch Size: 2
Validation Split: 0.9
Accuracy Mean: 0.5358744394618835
Precision

Configuracion 4: epochs=4, batch size=2000, validation split =0.01

In [3]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.metrics import Precision, Recall
from keras.datasets import mnist

# Definir parámetros de entrenamiento
EPOCHS = 4
BATCH_SIZE = 2000
VALIDATION_SPLIT = 0.01

# Definir función para realizar entrenamiento y evaluación
def train_and_evaluate(x_train, y_train, x_test, y_test):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(1)
    ])

    model.compile(loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall()])
    model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=0)
    y_pred = (model.predict(x_test) >= 0.5).astype(int)[:,0]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    precision_1 = precision_score(y_test, y_pred)
    print("Precision_1:", precision_1)
    recall_1 = recall_score(y_test, y_pred)
    print("Recall_1:", recall_1)
    f1_s_1 = f1_score(y_test, y_pred)
    print("F1-score_1:", f1_s_1)

    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    print("Precision_0:", precision_0)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    print("Recall_0:", recall_0)
    f1_s_0 = f1_score(y_test, y_pred, pos_label=0)
    print("F1-score_0:", f1_s_0)

    return accuracy, precision, recall, f1, conf_matrix

# Cargar el conjunto de datos MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Se obtiene una muestra con igual número de: cincos vs no cincos
idx_y_train_5 = np.where(y_train == 5)[0]
idx_y_train_n5 = np.where(y_train != 5)[0]
idx_y_train_n5 = np.random.choice(idx_y_train_n5, idx_y_train_5.shape[0], replace=False)

idx_y_train = np.concatenate([idx_y_train_5, idx_y_train_n5])
np.random.shuffle(idx_y_train)

x_train = x_train[idx_y_train]
y_train = y_train[idx_y_train]

idx_y_test_5 = np.where(y_test == 5)[0]
idx_y_test_n5 = np.where(y_test != 5)[0]
idx_y_test_n5 = np.random.choice(idx_y_test_n5, idx_y_test_5.shape[0], replace=False)

idx_y_test = np.concatenate([idx_y_test_5, idx_y_test_n5])
np.random.shuffle(idx_y_test)

x_test = x_test[idx_y_test]
y_test = y_test[idx_y_test]

y_train_binary = (y_train == 5).astype(int)
y_test_binary = (y_test == 5).astype(int)

# Realizar entrenamiento y evaluación
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
conf_matrices = []
for _ in range(5):
    accuracy, precision, recall, f1, conf_matrix = train_and_evaluate(x_train, y_train_binary, x_test, y_test_binary)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    conf_matrices.append(conf_matrix)

# Calcular las medias de las métricas
accuracy_mean = np.mean(accuracy_list)
precision_mean = np.mean(precision_list)
recall_mean = np.mean(recall_list)
f1_mean = np.mean(f1_list)

# Imprimir resultados
print("Epochs:", EPOCHS)
print("Batch Size:", BATCH_SIZE)
print("Validation Split:", VALIDATION_SPLIT)
print("Accuracy Mean:", accuracy_mean)
print("Precision Mean:", precision_mean)
print("Recall Mean:", recall_mean)
print("F1-score Mean:", f1_mean)
print()

# Imprimir matriz de confusión promedio
print("Matriz de confusión promedio:")
print(np.mean(conf_matrices, axis=0))


Precision_1: 0.5852568875651526
Recall_1: 0.8811659192825112
F1-score_1: 0.7033557046979865
Precision_0: 0.7596371882086168
Recall_0: 0.3755605381165919
F1-score_0: 0.5026256564141036
Precision_1: 0.7570332480818415
Recall_1: 0.6636771300448431
F1-score_1: 0.7072879330943848
Precision_0: 0.7005988023952096
Recall_0: 0.7869955156950673
F1-score_0: 0.7412882787750792
Precision_1: 0.6061427280939476
Recall_1: 0.7522421524663677
F1-score_1: 0.671335667833917
Precision_0: 0.6735598227474151
Recall_0: 0.5112107623318386
F1-score_0: 0.5812619502868068
Precision_1: 0.6673728813559322
Recall_1: 0.3531390134529148
F1-score_1: 0.46187683284457487
Precision_0: 0.5602134146341463
Recall_0: 0.8239910313901345
F1-score_0: 0.6669691470054446
Precision_1: 0.562369337979094
Recall_1: 0.9047085201793722
F1-score_1: 0.6935969058874086
Precision_0: 0.7564469914040115
Recall_0: 0.29596412556053814
F1-score_0: 0.42546333601933933
Epochs: 4
Batch Size: 2000
Validation Split: 0.01
Accuracy Mean: 0.634865470852

Configuracion propuesta: epochs=10, batch size=30, validation split =0.2

In [7]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Flatten, Dense
from keras.metrics import Precision, Recall
from keras.datasets import mnist

# Definir parámetros de entrenamiento
EPOCHS = 30
BATCH_SIZE = 100
VALIDATION_SPLIT = 0.2

# Definir función para realizar entrenamiento y evaluación
def train_and_evaluate(x_train, y_train, x_test, y_test):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(1)
    ])

    model.compile(loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall()])
    model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=0)
    y_pred = (model.predict(x_test) >= 0.5).astype(int)[:,0]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    precision_1 = precision_score(y_test, y_pred)
    print("Precision_1:", precision_1)
    recall_1 = recall_score(y_test, y_pred)
    print("Recall_1:", recall_1)
    f1_s_1 = f1_score(y_test, y_pred)
    print("F1-score_1:", f1_s_1)

    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    print("Precision_0:", precision_0)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    print("Recall_0:", recall_0)
    f1_s_0 = f1_score(y_test, y_pred, pos_label=0)
    print("F1-score_0:", f1_s_0)

    return accuracy, precision, recall, f1, conf_matrix

# Cargar el conjunto de datos MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Se obtiene una muestra con igual número de: cincos vs no cincos
idx_y_train_5 = np.where(y_train == 5)[0]
idx_y_train_n5 = np.where(y_train != 5)[0]
idx_y_train_n5 = np.random.choice(idx_y_train_n5, idx_y_train_5.shape[0], replace=False)

idx_y_train = np.concatenate([idx_y_train_5, idx_y_train_n5])
np.random.shuffle(idx_y_train)

x_train = x_train[idx_y_train]
y_train = y_train[idx_y_train]

idx_y_test_5 = np.where(y_test == 5)[0]
idx_y_test_n5 = np.where(y_test != 5)[0]
idx_y_test_n5 = np.random.choice(idx_y_test_n5, idx_y_test_5.shape[0], replace=False)

idx_y_test = np.concatenate([idx_y_test_5, idx_y_test_n5])
np.random.shuffle(idx_y_test)

x_test = x_test[idx_y_test]
y_test = y_test[idx_y_test]

y_train_binary = (y_train == 5).astype(int)
y_test_binary = (y_test == 5).astype(int)

# Realizar entrenamiento y evaluación
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
conf_matrices = []
for _ in range(5):
    accuracy, precision, recall, f1, conf_matrix = train_and_evaluate(x_train, y_train_binary, x_test, y_test_binary)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    conf_matrices.append(conf_matrix)

# Calcular las medias de las métricas
accuracy_mean = np.mean(accuracy_list)
precision_mean = np.mean(precision_list)
recall_mean = np.mean(recall_list)
f1_mean = np.mean(f1_list)

# Imprimir resultados
print("Epochs:", EPOCHS)
print("Batch Size:", BATCH_SIZE)
print("Validation Split:", VALIDATION_SPLIT)
print("Accuracy Mean:", accuracy_mean)
print("Precision Mean:", precision_mean)
print("Recall Mean:", recall_mean)
print("F1-score Mean:", f1_mean)
print()

# Imprimir matriz de confusión promedio
print("Matriz de confusión promedio:")
print(np.mean(conf_matrices, axis=0))


Precision_1: 0.8313373253493014
Recall_1: 0.9338565022421524
F1-score_1: 0.8796198521647307
Precision_0: 0.9245524296675192
Recall_0: 0.8105381165919282
F1-score_0: 0.8637992831541218
Precision_1: 0.9174528301886793
Recall_1: 0.8721973094170403
F1-score_1: 0.8942528735632184
Precision_0: 0.8782051282051282
Recall_0: 0.92152466367713
F1-score_0: 0.8993435448577681
Precision_1: 0.8668098818474759
Recall_1: 0.9047085201793722
F1-score_1: 0.8853538123971475
Precision_0: 0.9003516998827668
Recall_0: 0.8609865470852018
F1-score_0: 0.8802292263610316
Precision_1: 0.9332425068119891
Recall_1: 0.7679372197309418
F1-score_1: 0.8425584255842558
Precision_0: 0.8028571428571428
Recall_0: 0.945067264573991
F1-score_0: 0.8681771369721936
Precision_1: 0.8899188876013905
Recall_1: 0.8609865470852018
F1-score_1: 0.8752136752136752
Precision_0: 0.8653637350705755
Recall_0: 0.8934977578475336
F1-score_0: 0.8792057363485936
Epochs: 30
Batch Size: 100
Validation Split: 0.2
Accuracy Mean: 0.8771300448430492
