**Importar bibliotecas**

In [67]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, LSTM, ConvLSTM1D
from keras.models import Sequential, Model
X = np.load('X.npy')
Y = np.load('Y.npy')

**Contrucción de la arquitectura de la red neuronal**     
Hemos deicidido partir de crear una implementación de un bloque de la arquitectura transformer parametrizable, para poder crear una red neuronal con un número de bloques y cabezas de atención variable. Este hiperparámetro luego se ajustaron con ayuda de keras tuner. Aunque solo incluiremos la arquitectura final, para evitar redundancia.

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Función para construir el bloque transformer
def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0):
    attn_output = MultiHeadAttention(key_dim=head_size, num_heads=num_heads)(inputs, inputs)
    attn_output = Dropout(dropout)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(attn_output + inputs)
    
    ffn_output = Dense(ff_dim, activation="relu")(out1)
    ffn_output = Dropout(dropout)(ffn_output)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(ffn_output + out1)
    return out2

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_blocks, num_classes, dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_blocks):
        x = transformer_block(x, head_size, num_heads, ff_dim, dropout)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(dropout)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(dropout)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    return Model(inputs, outputs)

# Parámetros del modelo
input_shape = (15, 768)
head_size = 256
num_heads = 4
ff_dim = 448
num_blocks = 5
num_classes = 3
dropout = 0.4

model = build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_blocks, num_classes, dropout)

# Compilar el modelo
optimizer = Adam(learning_rate=0.00007)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Resumen del modelo
model.summary()


**Parte de entrenamiento**

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


# Dividir los datos en conjuntos de entrenamiento y validación
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=115, stratify=Y)

early_stopping = EarlyStopping(monitor='val_loss', patience=45, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')


history = model.fit(
    X_train, Y_train,
    epochs=300,  # Número de épocas, ajusta según sea necesario
    batch_size=71,  # Tamaño del batch, ajusta según sea necesario
    validation_data=(X_val, Y_val),
    callbacks=[
        early_stopping,
        model_checkpoint,
    ]
)

**Parte de desglose de validación**

In [75]:
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = np.argmax(model.predict(X_val), axis=1)
print(classification_report(Y_val, Y_pred))
print(confusion_matrix(Y_val, Y_pred))

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 111ms/step
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        74
           1       0.96      0.97      0.96       323
           2       0.89      0.84      0.86        74

    accuracy                           0.95       471
   macro avg       0.94      0.93      0.94       471
weighted avg       0.95      0.95      0.95       471

[[ 73   1   0]
 [  2 313   8]
 [  0  12  62]]


In [None]:
def plot_history(history):
    import matplotlib.pyplot as plt

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Precisión del modelo')
    plt.ylabel('Precisión')
    plt.xlabel('Época')
    plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
    plt.show()

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Pérdida del modelo')
    plt.ylabel('Pérdida')
    plt.xlabel('Época')
    plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
    plt.show() 

plot_history(history)

**Finalmente que estuvimos satisfechos con el modelos y habiamos decidido ya no hacer más ajustes, se procedió a evaluar el rendimiento del modelo en el conjunto de prueba.**

In [77]:
# load best model
model = tf.keras.models.load_model('best_modelTT.keras')

In [78]:
X = np.load('X-test.npy')
Y = np.load('Y-test.npy')

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = np.argmax(model.predict(X), axis=1)
print(classification_report(Y, Y_pred))
print(confusion_matrix(Y, Y_pred))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = np.argmax(model.predict(X), axis=1)
print(classification_report(Y, Y_pred))
print(confusion_matrix(Y, Y_pred))