In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import joblib
import keras
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
tickers = ["XYL","AWK","SVTRF","UUGWF","WTRG","VEOEF","ECL","GBERF","AQUA","WMS","KTWIF","PNR","TTEK","SBS","AWR","STN","CWT","FELE","WTS","BMI","VMI","PEGRF","FCHRF","SJW","FERG","ROP","DHR","AOS","TTC","IEX","ZWS","MLI","LNN","MWA","ERII","GRC","NWPX","MSEX","ARTNA","YORW","GWRS","WAT","ITRI","IDXX","ACM","A","AQN","FLS","PRMW","CNM","HWKN","MEG","GEBNE.SW","SVT","UU.L","SPX.L"]

In [None]:
# Load the CSV file into a pandas DataFrame
df = pd.read_csv('water_assets.csv', index_col='Date',decimal=".", parse_dates=True)
df=df.fillna(method="bfill")
df=df.asfreq("7d")
tipo_interes = df['interest_rate'].values  # Añadir una variable adicional de tipo de interés

In [None]:
n_steps = 30
n_out=1

param_grid = {
    'units1': [64, 128, 256],  # Unidades para la primera capa LSTM
    'units2': [32, 64, 128],  # Unidades para la segunda capa LSTM
    'units3': [32, 64, 128],  # Unidades para la tercera capa LSTM
    'dropout_rate': [0.0, 0.05, 0.1, 0.15, 0.2],  # Tasas de dropout
    'batch_size': [16, 32]  # Tamaños de batch
}

In [None]:
def prepare_data(data, tipo_interes, n_steps, n_out):
    """
    Prepara datos para una red LSTM con múltiples entradas (incluyendo el tipo de interés).
    
    Args:
    - data: matriz numpy con los valores de la serie temporal principal.
    - tipo_interes: matriz numpy con los valores del tipo de interés (otra serie temporal).
    - n_steps: número de pasos en la secuencia.
    - n_out: número de pasos de predicción.
    
    Returns:
    - X: matriz numpy con las características de entrada, que incluye data y tipo_interes.
    - y: matriz numpy con las etiquetas de salida.
    """
    X, y = [], []
    for i in range(len(data) - (n_steps + n_out) + 1):
        # Usar `data` y `tipo_interes` como entradas
        x_data = data[i:(i + n_steps), 0]  # Serie principal
        x_interest = tipo_interes[i:(i + n_steps)]  # Tipo de interés
        X.append(np.column_stack((x_data, x_interest)))  # Combinar ambas características
        y.append(data[i + n_steps:i + n_steps + n_out, 0])  # Etiquetas de salida
    return np.array(X), np.array(y)

for i in tickers:
    print(".....................................................")
    print(i)
    df_i = df[[i]]
    interest = tipo_interes.reshape(-1, 1)  # Asegurar que tipo_interes tenga la misma forma
    combined_data = np.hstack((df_i.values, interest))  # Combinar datos con tipo de interés

    # Preparar los datos
    X, y = prepare_data(combined_data, interest, n_steps, n_out)

    # Dividir los datos en entrenamiento, validación y prueba
    X_train = X[:int(X.shape[0] * 0.8)]
    X_test = X[int(X.shape[0] * 0.8):int(X.shape[0] * 0.9)]
    X_val = X[int(X.shape[0] * 0.9):]
    
    y_train = y[:int(y.shape[0] * 0.8)]
    y_test = y[int(y.shape[0] * 0.8):int(y.shape[0] * 0.9)]
    y_val = y[int(y.shape[0] * 0.9):]

    # Aplicar Min-Max Scaling después del split
    scaler_X = MinMaxScaler()
    n_features = X_train.shape[2]  # Número de características
    X_train = scaler_X.fit_transform(X_train.reshape(-1, n_features)).reshape(X_train.shape)
    X_test = scaler_X.transform(X_test.reshape(-1, n_features)).reshape(X_test.shape)
    X_val = scaler_X.transform(X_val.reshape(-1, n_features)).reshape(X_val.shape)
    
    scaler_y = MinMaxScaler()
    y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).reshape(y_train.shape)
    y_test = scaler_y.transform(y_test.reshape(-1, 1)).reshape(y_test.shape)
    y_val = scaler_y.transform(y_val.reshape(-1, 1)).reshape(y_val.shape)

    # Definir la arquitectura base del modelo
    def create_model(units1, units2, units3, dropout_rate, n_out = 1, n_steps = 30, n_features = 2):
        model = Sequential()
        model.add(LSTM(units=units1, return_sequences=True, input_shape=(n_steps, n_features)))
        model.add(Dropout(dropout_rate))
        model.add(LSTM(units=units2, return_sequences=True))
        model.add(Dropout(dropout_rate))
        model.add(LSTM(units=units3))
        model.add(Dropout(dropout_rate))
        model.add(Dense(units=n_out))
        model.compile(optimizer='adam', loss='mean_squared_error')
        return model

    # Realizar Grid Search
    best_model = None
    best_score = float('inf')
    
    for units1 in param_grid['units1']:
        for units2 in param_grid['units2']:
            for units3 in param_grid['units3']:
                for dropout_rate in param_grid['dropout_rate']:
                    for batch_size in param_grid['batch_size']:
                        print(f'Tuning: units1={units1}, units2={units2}, units3={units3}, dropout_rate={dropout_rate}, batch_size={batch_size}')
                        model = create_model(units1, units2, units3, dropout_rate)
                        
                        # Ajustar el modelo
                        history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=batch_size, verbose=0)
                        
                        # Evaluar el modelo
                        score = model.evaluate(X_test, y_test, verbose=0)
                        if score < best_score:
                            best_score = score
                            best_model = model

    # Guardar el mejor modelo encontrado
    model = best_model
    model.save(f'{i}_best_model.h5')

    # Guardar los scalers
    joblib.dump(scaler_X, f'{i}_scaler_X.pkl')
    joblib.dump(scaler_y, f'{i}_scaler_y.pkl')

    print(f"Best model and scalers for {i} saved as {i}_best_model.h5, {i}_scaler_X.pkl, and {i}_scaler_y.pkl")


In [None]:
errores = {}
predicciones = {}

In [None]:
import joblib
from sklearn.metrics import mean_absolute_error

for i in tickers:
    print(".....................................................")
    print(f"Procesando predicciones para {i}")

    # Cargar el modelo y los escaladores guardados
    model = keras.models.load_model(f'{i}_best_model.h5')
    scaler_X = joblib.load(f'{i}_scaler_X.pkl')
    scaler_y = joblib.load(f'{i}_scaler_y.pkl')

    # Preparar los datos de entrada
    df_i = df[[i]]
    interest = tipo_interes.reshape(-1, 1)  # Asegurar que tipo_interes tenga la misma forma
    combined_data = np.hstack((df_i.values, interest))  # Combinar datos con tipo de interés

    # Preparar los datos de entrada y salida
    X, y = prepare_data(combined_data, interest, n_steps, n_out)

    # Dividir los datos en entrenamiento, validación y prueba
    X_train = X[:int(X.shape[0] * 0.8)]
    X_test = X[int(X.shape[0] * 0.8):int(X.shape[0] * 0.9)]
    X_val = X[int(X.shape[0] * 0.9):]
    
    y_train = y[:int(y.shape[0] * 0.8)]
    y_test = y[int(y.shape[0] * 0.8):int(y.shape[0] * 0.9)]
    y_val = y[int(y.shape[0] * 0.9):]

    # Escalar los datos utilizando los scalers cargados
    n_features = X_train.shape[2]
    X_train = scaler_X.transform(X_train.reshape(-1, n_features)).reshape(X_train.shape)
    X_test = scaler_X.transform(X_test.reshape(-1, n_features)).reshape(X_test.shape)
    X_val = scaler_X.transform(X_val.reshape(-1, n_features)).reshape(X_val.shape)
    
    y_train = scaler_y.transform(y_train.reshape(-1, 1)).reshape(y_train.shape)
    y_test = scaler_y.transform(y_test.reshape(-1, 1)).reshape(y_test.shape)
    y_val = scaler_y.transform(y_val.reshape(-1, 1)).reshape(y_val.shape)

    # Realizar predicciones
    y_pred_val = model.predict(X_val)
    y_pred_val = scaler_y.inverse_transform(y_pred_val)  # Desescalar predicciones
    y_val = scaler_y.inverse_transform(y_val)  # Desescalar valores reales

    y_pred_train = model.predict(X_train)
    y_pred_train = scaler_y.inverse_transform(y_pred_train)
    y_train = scaler_y.inverse_transform(y_train)

    y_pred_test = model.predict(X_test)
    y_pred_test = scaler_y.inverse_transform(y_pred_test)
    y_test = scaler_y.inverse_transform(y_test)

    # Guardar las predicciones y errores
    predicciones[f'pred_{i}'] = y_pred_val.flatten()
    errores[f'{i}'] = mean_absolute_error(y_pred_val.flatten(), y_val.flatten())

    print(f"Error MAE para {i}: {errores[f'{i}']}")

In [None]:
# Convertir las predicciones a un DataFrame
predicciones_df = pd.DataFrame(predicciones)