In [None]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.activations import linear, relu, sigmoid
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import f1_score

In [None]:
SEED = 1234

In [None]:
training_set = pd.read_csv("../../../data/training_set.csv")
validation_set = pd.read_csv("../../../data/validation_set.csv")
test_set = pd.read_csv("../../../data/test_set.csv")


In [None]:
test_set

In [None]:
# Convertir a numpy arrays
# Omitir la columna 'date' por el formato de fecha
x_train = training_set.drop(columns=['target_trend','date']).values
y_train = training_set['target_trend'].values
x_validation = validation_set.drop(columns=['target_trend','date']).values
y_validation = validation_set['target_trend'].values
x_test = test_set.drop(columns=['target_trend','date']).values
y_test = test_set['target_trend'].values    

In [None]:
# # Asignar las columnas de características (serie temporal)
# x_train = training_set.iloc[:, 1:51]
# x_validation = validation_set.iloc[:, 1:51]
# x_test = test_set.iloc[:, 1:51]
# # Asignar la columna objetivo (última columna)
# y_train = training_set.iloc[:, -1]
# y_validation = validation_set.iloc[:, -1]
# y_test = test_set.iloc[:, -1]


In [None]:
# Convertimos a DataFrame para facilitar la visualización
train_distribution = pd.Series(y_train).value_counts(normalize=True)
validation_distribution = pd.Series(y_validation).value_counts(normalize=True)
test_distribution = pd.Series(y_test).value_counts(normalize=True)

# Comparar visualmente
df = pd.DataFrame({
    'Train': train_distribution,
    'Validation': validation_distribution,
    'Test': test_distribution
}).fillna(0)

df.plot(kind='bar', title='Distribución de Clases')
plt.xlabel('Clase')
plt.ylabel('Proporción')
plt.grid(True)
plt.show()


IMPORTANTE: Hay que sumar a la columna de prediccion porque -1 no funciona en funcion de LOSS

In [None]:
y_train += 1
y_validation += 1
y_test += 1

In [None]:
x_train

Compilacion y entrenamiento de la red

In [None]:
# Setear la semilla para reproducibilidad
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
# Definición de hiperparámetros
learning_rate = 0.001
n_l1 = 100
n_l2 = 60
n_l3 = 30
n_l4 = 10
# n_l5 = 10
# Definición del modelo
model = Sequential(
    [               
        tf.keras.Input(shape=(50,)),
        Dense(n_l1,activation='relu',name='L1'),
        # Dropout(0.2, name='Dropout1'),
        Dense(n_l2,activation='relu',name='L2'),
        # Dropout(0.2, name='Dropout2'),
        Dense(n_l3,activation='relu',name='L3'),
        # Dropout(0.2, name='Dropout3'),
        Dense(n_l4,activation='relu',name='L4'),
        # Dropout(0.2, name='Dropout4'),
        # Dense(n_l5,activation='relu',name='L5'),
        # Dropout(0.2, name='Dropout5'),
        Dense(3,activation='linear',name='L6'),
    ], name = "softmax" 
)
# Compilación del modelo
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    metrics=['accuracy']
)
# Detiene el entrenamiento si val_loss no mejora después de 10 épocas seguidas
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)
# checkpoint = ModelCheckpoint(
#     filepath='best_model.keras',
#     monitor='val_loss',
#     save_best_only=True
# )
history = model.fit(
    x_train,y_train,
    epochs=40,
    validation_data=(x_validation, y_validation),
    callbacks=[early_stop]
)


In [None]:
model.summary()

GUARDADO EN .CSV DE LOS PARAMETROS DE LOS MODELOS ENTRENADOS

In [None]:
# Inicializar lista para guardar resultados
resultados = []
# Calcula métricas en validation
loss, acc = model.evaluate(x_validation, y_validation, verbose=0)
y_pred_val = np.argmax(model.predict(x_validation), axis=1)
f1 = f1_score(y_validation, y_pred_val, average='macro')

# Guarda los resultados y los hiperparámetros usados
resultados.append({
    'run': len(resultados)+1,
    'val_loss': loss,
    'val_accuracy': acc,
    'val_f1_macro': f1,
    'epochs': len(history.history['loss']),
    'learning_rate': learning_rate,
    'n_l1': n_l1,
    'n_l2': n_l2,
    'n_l3': n_l3,
    'n_l4': n_l4,
    #'n_l5': n_l5,
    # Agregar mas parametros si es necesario
})
# Convierte la lista de resultados a DataFrame
df_nuevos = pd.DataFrame(resultados)

# Si el archivo ya existe, lo leemos y concatenamos
csv_path = 'resultados_entrenamientos.csv'
if os.path.exists(csv_path):
    df_existente = pd.read_csv(csv_path)
    df_final = pd.concat([df_existente, df_nuevos], ignore_index=True)
else:
    df_final = df_nuevos

# Guardamos el DataFrame combinado
df_final.to_csv(csv_path, index=False)

In [None]:
loss, accuracy = model.evaluate(x_validation, y_validation)

Grafica de Loss y Accuracy en train_set & validation_set

In [None]:
# Loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Value")
plt.show()

# Accuracy
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title("Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Value")
plt.show()


F1 SCORE

In [106]:
# Obtener predicciones
y_pred_val = np.argmax(model.predict(x_validation), axis=1)
y_pred = np.argmax(model.predict(x_test), axis=1)
# Reporte completo: precisión, recall y F1 por clase
print("Validación:")
print(confusion_matrix(y_validation, y_pred_val))
print(classification_report(y_validation, y_pred_val))
print("Test:")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Validación:
[[15 12  0]
 [ 2  4  2]
 [ 6 16 73]]
              precision    recall  f1-score   support

           0       0.65      0.56      0.60        27
           1       0.12      0.50      0.20         8
           2       0.97      0.77      0.86        95

    accuracy                           0.71       130
   macro avg       0.58      0.61      0.55       130
weighted avg       0.85      0.71      0.76       130

Test:
[[34 17  2]
 [ 5  7  1]
 [ 2 13 50]]
              precision    recall  f1-score   support

           0     0.8293    0.6415    0.7234        53
           1     0.1892    0.5385    0.2800        13
           2     0.9434    0.7692    0.8475        65

    accuracy                         0.6947       131
   macro avg     0.6540    0.6497    0.6170       131
weighted avg     0.8224    0.6947    0.7410       131

