# **Equipo C - 2024 - 1**

# **Modelo LSTM**

Importación de bibliotecas:

In [1]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
from sklearn.feature_selection import mutual_info_regression, SelectKBest, f_regression
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error

Limpieza de Datos

In [2]:
def clean_data(data):
    data = data.dropna()
    return data

Normalización de las Variables

In [3]:
def normalize_data(data):
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)
    return pd.DataFrame(data_scaled, columns=data.columns), scaler

Selección de Variables utilizando diferentes métodos

In [4]:
def select_features(X, y, num_features):
    mutual_info = mutual_info_regression(X, y)
    k_best = SelectKBest(score_func=f_regression, k=num_features).fit(X, y)
    features = X.columns[k_best.get_support(indices=True)]
    return features.tolist()

Definición de la función para entrenar el modelo LSTM:

In [5]:
def train_lstm(X_train, y_train, input_shape):
    """
    Función para entrenar un modelo LSTM.

    Parámetros:
    X_train (numpy array): Conjunto de datos de entrenamiento (características).
    y_train (numpy array): Conjunto de datos de entrenamiento (objetivo).
    input_shape (tuple): Forma de los datos de entrada.

    Retorna:
    model (Sequential): Modelo LSTM entrenado.
    """
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2)

    return model

Preparacion de datos

In [6]:
# Cargar datos
data = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/FSM?period1=1597123200&period2=1628659200&interval=1d&events=history&includeAdjustedClose=true')

# Mantener la columna de fechas para las gráficas
dates = data['Date']
data = data.drop(columns=['Date'])

# Limpiar y Normalizar
data = clean_data(data)
data, scaler = normalize_data(data)

# Seleccionar Variables
target_column = 'Close'
num_features = 5  # Número de características a seleccionar
selected_features = select_features(data.drop(columns=[target_column]), data[target_column], num_features)
selected_features.append(target_column)
data = data[selected_features]

# Separar características y objetivo
X = data.drop(columns=[target_column])
y = data[target_column]

# Dividir los datos en conjuntos de entrenamiento y prueba
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
dates_train, dates_test = dates[:train_size], dates[train_size:]

print(f'Características seleccionadas: {selected_features}')

Características seleccionadas: ['Open', 'High', 'Low', 'Adj Close', 'Volume', 'Close']


Preparación de los datos para LSTM:


In [7]:
# Los datos de entrenamiento y prueba se reestructuran en un formato 3D requerido por LSTM (samples, timesteps, features)
X_train_lstm = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

Entrenamiento del modelo LSTM:


In [8]:
# Entrenamos el modelo LSTM usando los datos de entrenamiento reestructurados
lstm_model = train_lstm(X_train_lstm, y_train, (1, X_train.shape[1]))


Epoch 1/500


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 68ms/step - loss: 0.3243 - val_loss: 0.2164
Epoch 2/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.2819 - val_loss: 0.1837
Epoch 3/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.2257 - val_loss: 0.1503
Epoch 4/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1841 - val_loss: 0.1157
Epoch 5/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.1404 - val_loss: 0.0805
Epoch 6/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.1087 - val_loss: 0.0468
Epoch 7/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0656 - val_loss: 0.0197
Epoch 8/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0272 - val_loss: 0.0047
Epoch 9/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6

Predicciones con el modelo LSTM:


In [9]:
# Generamos predicciones sobre los datos de prueba usando el modelo LSTM entrenado
lstm_predictions = pd.Series(lstm_model.predict(X_test_lstm).flatten(), index=X_test.index)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step


Cálculo de métricas de validación:


In [10]:
# Calculamos el MAPE (Mean Absolute Percentage Error) para evaluar la precisión del modelo
mape_lstm = mean_absolute_percentage_error(y_test, lstm_predictions)
# Calculamos el RMSE (Root Mean Squared Error) para evaluar el error del modelo
rmse_lstm = np.sqrt(mean_squared_error(y_test, lstm_predictions))

# Imprimimos las métricas de validación
print(f'MAPE LSTM: {mape_lstm}')
print(f'RMSE LSTM: {rmse_lstm}')


MAPE LSTM: 15701556967866.256
RMSE LSTM: 0.04600810431843332
