## TensorFlow Keras

* "y" es números enteros (índices de clase: 0, 1, 2, 3....): SparseCategoricalCrossentropy
* encoding de la salida OneHot: CategoricalCrossEntropy

In [27]:
import tensorflow as tf
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [28]:
keras = tf.keras

In [29]:
X, y = make_classification(
    n_samples=5000,
    n_features=20,
    n_informative=10,
    n_redundant=5,
    n_classes=3,
    weights=[1/3]*3, # 3 clases balanceadas
    random_state=42
)

df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(1, 21)])
df['class'] = y
df.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,...,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,class
0,0.57791,-1.175159,3.182231,0.404955,-0.613865,-1.039464,-0.239075,-2.836809,-8.026848,-9.041112,...,-3.014423,-0.923978,1.09465,0.71198,1.824328,2.145572,0.051517,-3.797374,1.102314,0
1,-3.2561,0.919359,1.291473,0.030575,1.66001,-0.552647,0.659944,-1.471791,0.089973,-0.870679,...,-3.269361,0.624425,0.528576,-2.705067,0.70074,-2.075426,-1.278485,0.82484,-0.712165,1
2,0.263308,-1.464582,1.580419,0.452207,0.693424,-0.664855,-2.134743,-3.664896,-3.941314,-4.077946,...,-2.652247,0.73402,-0.504239,-1.229404,1.276939,0.018643,-1.459663,-2.030924,-2.562835,0
3,2.327166,2.972473,2.546896,-0.887235,0.763177,2.039143,-1.942751,-0.079054,4.994518,-3.459207,...,-4.096714,2.801948,-0.499836,0.92242,0.170235,2.051526,1.737861,1.805639,2.357027,0
4,0.04787,0.033218,-1.179213,0.779208,1.454919,2.839628,1.145734,2.98837,6.476426,8.485573,...,1.31519,0.065581,1.876118,-0.038665,-2.712738,0.374466,-0.72004,0.809654,-2.902803,1


In [30]:
X = df.drop('class', axis=1)
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [31]:
# aplicar one hot encoder a las salidas y para poder calcular métricas como precision, recall, auc
# precision, recall, auc de keras esperan valores en el formato one-hot
# garantiza que las métricas funcionen correctamente sin ajustes extra en las métricas
y_train_encoded = keras.utils.to_categorical(y_train, num_classes=3)
y_test_encoded = keras.utils.to_categorical(y_test, num_classes=3)
y_train_encoded[:5]

array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.]])

In [32]:
# 0. random state
keras.backend.clear_session()
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

# 1. Arquitectura
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(3, activation='softmax') # capa de salida para clasificación multiclase, mismo número de neuronas que de clases a predecir
])

# 2. Compile
model.compile(
    # loss=keras.losses.SparseCategoricalCrossentropy(), # si la "y" no está codificada a one-hot y es enteros 0, 1, 2, 3...
    loss=keras.losses.CategoricalCrossentropy(), # si la "y" ya está codificada a one-hot [0, 0, 1]. En texto sería: "categorical_crossentropy"
    optimizer=keras.optimizers.Adam(),
    metrics=[
        'accuracy', # al escribirlo como texto Keras seleccionará el accuracy más idóneo dependiendo de si es clasificación binaria o multiclase
        keras.metrics.Precision(),
        keras.metrics.Recall(),
        keras.metrics.AUC()
        ]
)

# fit
model.fit(X_train, y_train_encoded, validation_split=0.2, epochs=100, verbose=1, batch_size=32) 
# model.fit(X_train, y_train_encoded, validation_data=(X_test, y_test_encoded), epochs=20, verbose=1, batch_size=32) # aprovechas mejor los datos, pero gastas la oportunidad de validar aparte, puede introducir fuga de datos

Epoch 1/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.5983 - auc: 0.7767 - loss: 0.8716 - precision: 0.6667 - recall: 0.4491 - val_accuracy: 0.8200 - val_auc: 0.9428 - val_loss: 0.4950 - val_precision: 0.8519 - val_recall: 0.7550
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8455 - auc: 0.9529 - loss: 0.4471 - precision: 0.8813 - recall: 0.8038 - val_accuracy: 0.8575 - val_auc: 0.9637 - val_loss: 0.3812 - val_precision: 0.8833 - val_recall: 0.8325
Epoch 3/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8914 - auc: 0.9704 - loss: 0.3426 - precision: 0.9130 - recall: 0.8669 - val_accuracy: 0.8775 - val_auc: 0.9714 - val_loss: 0.3308 - val_precision: 0.8967 - val_recall: 0.8575
Epoch 4/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9080 - auc: 0.9783 - loss: 0.2877 - precision: 0.9182 - recall: 0.8

<keras.src.callbacks.history.History at 0x1e49269e390>

In [33]:
model.summary()

In [46]:
df_history = pd.DataFrame(model.history.history)
df_history.head()

In [45]:
def plot_history(df_history):
    train_metrics = [col for col in df_history.columns if not col.startswith('val_')]
    for metric in train_metrics:
        plt.figure(figsize=(7, 5))
        plt.plot(df_history[metric], label=f'{metric} train')
        
        val_metric = f'val_{metric}'
        if val_metric in df_history.columns:
            plt.plot(df_history[val_metric], label=f'{metric} val test')
            
        plt.title(metric)
        plt.xlabel('epochs')
        plt.ylabel(metric)
        plt.legend()
        plt.show()
        
plot_history(df_history)

* Disminución inicial de ambas pérdidas (loss, val_loss):
    * El modelo está aprendiendo correctamente
    * Lo ideal es que vayan a la par

Cosas que pueden ocurrir:

* A partir de un epoch el loss y val_loss ya no mejoran, ya no hay más aprendizaje, no se logra mejorar más por lo que es mejor cortar ya los epochs.

* El val_loss empieza a aumentar a partir de un determinado epoch. Esto sugiere overfitting el modelo está memorizando en lugar de generalizar, se aprende demasiado los datos de train, por lo que luego los datos de test no es capaz de predecirlos bien. Lo ideal es cortar en el epoch donde val_loss alcanza su mínimo.

* Picos puede deberse a muchas causas:
    * learning_rate alto podría ocasionar ajustes grandes generando inestabilidad
    * Regularización: si no hay regularización puede haber overfitting e inestabilidad. Solución: capas Dropout
    * Cambios en los datos de validación: validation_split se extrae de forma aleatoria en cada epoch, eso significa que en cada epoch se toman ejemplos diferentes. Si los datos son ficticios o raros eso puede dar lugar a picos.

In [44]:
test_loss_bce, test_accuracy, test_precision, test_recall, test_auc = model.evaluate(X_test, y_test_encoded)
print('test_loss_bce', test_loss_bce)
print('test_accuracy', test_accuracy)
print('test_precision', test_precision)
print('test_recall', test_recall)
print('test_auc', test_auc)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9264 - auc: 0.9767 - loss: 0.4280 - precision: 0.9262 - recall: 0.9239
test_loss_bce 0.5063223838806152
test_accuracy 0.9240000247955322
test_precision 0.923923909664154
test_recall 0.9229999780654907
test_auc 0.9740415215492249


In [37]:
model.predict(X_test)[:5] # matriz de probabilidades [n_samples, n_classes]

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


array([[2.5740479e-09, 6.8927496e-14, 1.0000000e+00],
       [9.9995565e-01, 7.8351842e-10, 4.4345135e-05],
       [2.5641359e-04, 2.2808072e-04, 9.9951553e-01],
       [1.0000000e+00, 4.6574625e-16, 1.2467059e-15],
       [1.4736199e-15, 9.9999988e-01, 9.3812581e-08]], dtype=float32)

In [38]:
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1) # devuelve la clase que tiene la probabilidad más alta: 0, 1, 2...
y_pred_labels[:10]

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([2, 0, 2, 0, 1, 0, 0, 2, 1, 0])

In [39]:
df_comparison = pd.DataFrame({'real': y_test, 'prediccion': y_pred_labels})
df_comparison.head()

Unnamed: 0,real,prediccion
480,2,2
2575,0,0
4452,2,2
550,0,0
4351,1,1
