# Búsqueda de hiperparámetros del Modelo LSTM


En este cuaderno de jupyter realizaremos la búsqueda de los mejores hiperparámetros para el modelo LSTM. 

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

# Escalamiento
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Métricas
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Modelo LSTM
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


# hiperparametros
import keras_tuner as kt
from keras_tuner.tuners import RandomSearch 
from sklearn.model_selection import TimeSeriesSplit

## Lectura de datasets  

In [18]:
# Nombre de las acciones
tickers = [
    'TSCO', 'AZN', 'BARC', 'BP', 'BATS', 'HLMA',
    'HSBA', 'JMAT', 'LGEN', 'MKS', 'PSON', 'REL',
    'NWG', 'SHEL', 'SGE', 'SBRY', 'SDR', 'SVT',
    'SMIN', 'SSE', 'VOD'
]

data_frames = {} # Inicializo diciconario de data frames

for ticker in tickers: 
    
    df = pd.read_csv(f'datasets_features/{ticker}.csv')  # leo el dataset
    data_frames[ticker] = df                             # guardo dataframes
    
print(data_frames.keys())

dict_keys(['TSCO', 'AZN', 'BARC', 'BP', 'BATS', 'HLMA', 'HSBA', 'JMAT', 'LGEN', 'MKS', 'PSON', 'REL', 'NWG', 'SHEL', 'SGE', 'SBRY', 'SDR', 'SVT', 'SMIN', 'SSE', 'VOD'])


## Trabajo solo con UNA acción y UN solo bloque

In [130]:
# supongo que escojo una acción (por ejemplo 'AZN') 
df = data_frames['AZN']
df_bloque = df[0:1000]
df_bloque.shape

(1000, 28)

**Nota:** Para una acción dada, dividiré todo el dataset en bloques de 1000 días (600 train, 150 validación, 250 test) usando rolling window:  

In [131]:
# Escribo los índices para cada  bloque
# for j in range(20):
#     n_bloque = j
    
#     ini_train = 250*n_bloque
#     fin_train = 250*n_bloque + 599 

#     ini_val = 250*n_bloque + 600
#     fin_val = 250*n_bloque + 749


#     ini_test = 250*n_bloque + 750
#     fin_test = 250*n_bloque + 999

#     print(f'Bloque {n_bloque}: train=({ini_train}:{fin_train}) - val=({ini_val}:{fin_val}) - test=({ini_test}:{fin_test}) ')

### 1) Partición de datos

In [136]:
df_features = df_bloque.drop(columns=['Date', 'Open', 'High', 'Low','Close','Volume', 'prev_close', 'target']) # todas columnas r_i
target      = df_bloque['target']  # columna del target
close       = df_bloque['Close']   # columna precio cierre
    
ini_train, fin_train = 0, 599
ini_val, fin_val = 600, 749
ini_test, fin_test = 750, 999 
    
X_train = df_features.iloc[ : fin_train + 1]
X_val = df_features.iloc[ini_val : fin_val + 1]
X_test  = df_features.iloc[ini_test : fin_test + 1]


y_train = target.iloc[ : fin_train + 1]
y_val   = target.iloc[ini_val : fin_val + 1]
y_test  = target.iloc[ini_test : fin_test + 1]

y_test_close = close.iloc[ini_test : fin_test + 1]
    
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

#display(X_train.head())
#display(X_val.head())
#display(X_test.head())

(600, 20)
(150, 20)
(250, 20)
(600,)
(150,)
(250,)


### 2) Estandarización (un bloque - una acción)

**IMPORTANTE:**  
Tengo que estandarizar los datos usando SOLAMENTE el conjunto de TRAIN de cada uno de los bloques 

**IMPORTANTE:**
Usar la función `fit()` SOLO en conjunto de entrenamiento. 


In [137]:
# Features 
scaler_x = MinMaxScaler(feature_range=(-1,1))
X_train_s = scaler_x.fit_transform(X_train)     
X_val_s   = scaler_x.transform(X_val)
X_test_s  = scaler_x.transform(X_test)

# Target
scaler_y = MinMaxScaler(feature_range=(-1,1))
y_train_s = scaler_y.fit_transform(y_train.values.reshape(-1, 1))    # Necesario reshape, scaler espera 2D      
y_val_s   = scaler_y.transform(y_val.values.reshape(-1, 1))
y_test_s  = scaler_y.transform(y_test.values.reshape(-1, 1))



print(f'X_train_s - {type(X_train_s)} -  {X_train_s.shape}')
print(f'X_val_s   - {type(X_val_s)} -  {X_val_s.shape}')
print(f'X_test_s  - {type(X_test_s)} -  {X_test_s.shape}')

print(f'y_train_s - {type(y_train_s)} -  {y_train_s.shape}')
print(f'y_val_s   - {type(y_val_s)} -  {y_val_s.shape}')
print(f'y_test_s  - {type(y_test_s)} -  {y_test_s.shape}')

X_train_s - <class 'numpy.ndarray'> -  (600, 20)
X_val_s   - <class 'numpy.ndarray'> -  (150, 20)
X_test_s  - <class 'numpy.ndarray'> -  (250, 20)
y_train_s - <class 'numpy.ndarray'> -  (600, 1)
y_val_s   - <class 'numpy.ndarray'> -  (150, 1)
y_test_s  - <class 'numpy.ndarray'> -  (250, 1)


### 3) Función crear secuencias (necesario para LSTM)

In [138]:
def crear_secuencias(array, timesteps = 50):
    '''
    array: array en 2D
    '''
    
    rows, cols = array.shape
    X_secuencias, y_secuencias = [], [] 
    
    for i in range(rows - timesteps): 
        
        x = array[i : i + timesteps, 0:cols]
        y = array[i + timesteps, 0]  # La primera columna debe ser r1=retorno simple
    
        X_secuencias.append(x)
        y_secuencias.append(y)
        
    X_secuencias = np.array(X_secuencias)
    y_secuencias = np.array(y_secuencias).reshape(-1, 1)
    
    return X_secuencias, y_secuencias 

In [161]:
timesteps=70

# 2.1) --------------Crear secuencias ----------------------
array_features = np.vstack((X_train_s, X_val_s, X_test_s))
    
X_secuencias_s, y_secuencias_s = crear_secuencias(array_features, timesteps=timesteps)
    
X_train_secuencias_s = X_secuencias_s[0                   : fin_train - timesteps + 1]
X_val_secuencias_s   = X_secuencias_s[ini_val - timesteps : fin_val - timesteps + 1]
X_test_secuencias_s  = X_secuencias_s[ini_test - timesteps: ]
    
y_train_secuencias_s = y_secuencias_s[0                   : fin_train - timesteps + 1]
y_val_secuencias_s   = y_secuencias_s[ini_val - timesteps : fin_val - timesteps + 1]
y_test_secuencias_s  = y_secuencias_s[ini_test - timesteps: ]

print(X_secuencias_s.shape)
print('X_train_secuencias_s', X_train_secuencias_s.shape)
print('X_val_secuencias_s', X_val_secuencias_s.shape)
print('X_test_secuencias_s', X_test_secuencias_s.shape)

print('y_train_secuencias_s', y_train_secuencias_s.shape)
print('y_val_secuencias_s', y_val_secuencias_s.shape)
print('y_test_secuencias_s', y_test_secuencias_s.shape)

(930, 70, 20)
X_train_secuencias_s (530, 70, 20)
X_val_secuencias_s (150, 70, 20)
X_test_secuencias_s (250, 70, 20)
y_train_secuencias_s (530, 1)
y_val_secuencias_s (150, 1)
y_test_secuencias_s (250, 1)


In [152]:
# Compruebo 

print()
display(df_features.iloc[595:605])
display(pd.DataFrame(array_features[595:605]))
display('Ultima seq del X_train_secuecnias_s:', pd.DataFrame(X_train_secuencias_s[-1]))
display('Ultima seq del y_train_secuecnias_s:', pd.DataFrame(y_train_secuencias_s[-1]))
print()
display('Primera seq del X_val_secuecnias_s:', pd.DataFrame(X_val_secuencias_s[0]))
display('Primera seq del y_val_secuecnias_s:', pd.DataFrame(y_val_secuencias_s[0]))




Unnamed: 0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,RSI_14,Momentum_10,True_Range,ATR_14,PSAR
595,0.005635,0.014236,0.009363,-0.002171,0.007005,0.02545776,0.03409475,0.031203,0.017046,0.008637,0.001439,-0.014876,-0.00359,-0.004343,-0.010872,65.604594,3.115777,29.949583,20.653501,1325.262476
596,0.011882,0.005635,0.014236,0.009363,0.016772,0.01465747,0.03311036,0.041747,0.007005,0.017046,0.008637,-0.002822,-0.014876,-0.00359,-0.004343,69.994286,4.348265,27.056744,21.110875,1329.946623
597,-0.004181,0.011882,0.005635,0.014236,0.006223,0.02230548,0.02019073,0.038644,0.016772,0.007005,0.017046,-0.009056,-0.002822,-0.014876,-0.00359,66.753328,4.366541,21.261024,21.1216,1336.396432
598,-0.001395,-0.004181,0.011882,0.005635,0.006944,0.002769123,0.01885167,0.016737,0.006223,0.016772,0.007005,-0.005584,-0.009056,-0.002822,-0.014876,65.663972,3.768041,17.397178,20.85557,1344.290692
599,0.006267,-0.001395,-0.004181,0.011882,0.009759,0.004175251,4.382586e-07,0.016083,0.006944,0.006223,0.016772,0.0,-0.005584,-0.009056,-0.002822,68.182191,3.668659,13.533332,20.332553,1351.23764
600,0.015839,0.006267,-0.001395,-0.004181,0.01718,0.02762882,0.02204473,0.01787,0.009759,0.006944,0.006223,-0.003466,0.0,-0.005584,-0.009056,73.526012,5.933985,28.98861,20.950843,1357.350955
601,-0.000687,0.015839,0.006267,-0.001395,0.0068,0.02193164,0.03238028,0.026796,0.01718,0.009759,0.006944,-0.006847,-0.003466,0.0,-0.005584,72.949687,5.631672,19.319126,20.834292,1365.927025
602,-0.010305,-0.000687,0.015839,0.006267,0.004088,0.003409024,0.0185407,0.028989,0.0068,0.01718,0.009759,-0.013068,-0.006847,-0.003466,0.0,64.786619,4.775937,24.163778,21.072112,1375.437119
603,-0.003461,-0.010305,-0.000687,0.015839,0.004149,-0.008919034,-0.009598292,0.005533,0.004088,0.0068,0.01718,-0.008336,-0.013068,-0.006847,-0.003466,62.283234,3.440891,17.397178,20.809617,1383.425597
604,-0.006951,-0.003461,-0.010305,-0.000687,0.002074,2.76666e-07,-0.01306786,-0.013747,0.004149,0.004088,0.0068,-0.00836,-0.008336,-0.013068,-0.006847,57.502394,1.272295,14.494307,20.358523,1425.365109


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,-0.21094,-0.062586,-0.146648,-0.345592,-0.887754,-0.155981,-0.011407,-0.184902,-0.726853,-0.861599,-0.976936,0.668177,0.919912,0.903125,0.757473,0.335249,0.013693,-0.688841,-0.022214,0.874597
1,-0.103203,-0.21094,-0.062586,-0.146648,-0.731241,-0.286093,-0.022505,-0.078153,-0.887754,-0.726853,-0.861599,0.937057,0.668177,0.919912,0.903125,0.464632,0.121345,-0.718896,0.004712,0.887643
2,-0.380259,-0.103203,-0.21094,-0.062586,-0.900283,-0.193956,-0.168162,-0.109574,-0.731241,-0.887754,-0.726853,0.797991,0.937057,0.668177,0.919912,0.369107,0.122941,-0.77911,0.005344,0.905605
3,-0.332202,-0.380259,-0.103203,-0.21094,-0.88873,-0.429313,-0.183258,-0.331347,-0.900283,-0.731241,-0.887754,0.875432,0.797991,0.937057,0.668177,0.336999,0.070665,-0.819253,-0.010318,0.92759
4,-0.200054,-0.332202,-0.380259,-0.103203,-0.843616,-0.412374,-0.395788,-0.337967,-0.88873,-0.900283,-0.731241,1.0,0.875432,0.797991,0.937057,0.411222,0.061985,-0.859396,-0.041109,0.946937
5,-0.034943,-0.200054,-0.332202,-0.380259,-0.724704,-0.129826,-0.14726,-0.319877,-0.843616,-0.88873,-0.900283,0.922685,1.0,0.875432,0.797991,0.568726,0.25985,-0.698825,-0.004709,0.963963
6,-0.319985,-0.034943,-0.200054,-0.332202,-0.891037,-0.19846,-0.030736,-0.229512,-0.724704,-0.843616,-0.88873,0.847277,0.922685,1.0,0.875432,0.55174,0.233445,-0.799285,-0.011571,0.987847
7,-0.485893,-0.319985,-0.034943,-0.200054,-0.934489,-0.421604,-0.186764,-0.207309,-0.891037,-0.724704,-0.843616,0.70849,0.847277,0.922685,1.0,0.31114,0.1587,-0.748952,0.00243,1.014332
8,-0.367838,-0.485893,-0.319985,-0.034943,-0.933514,-0.570122,-0.504005,-0.444767,-0.934489,-0.891037,-0.724704,0.814058,0.70849,0.847277,0.922685,0.237355,0.04209,-0.819253,-0.013023,1.036579
9,-0.428036,-0.367838,-0.485893,-0.319985,-0.966769,-0.46267,-0.543121,-0.639953,-0.933514,-0.934489,-0.891037,0.813509,0.814058,0.70849,0.847277,0.096443,-0.147326,-0.849412,-0.03958,1.153379


'Ultima seq del X_train_secuecnias_s:'

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,-0.062586,-0.146648,-0.345592,-0.270692,-0.726853,-0.153266,-0.138845,-0.270058,-0.861599,-0.976936,-0.976936,0.919912,0.903125,0.757473,0.870948,0.266083,-0.114159,-0.70881,-0.064312,0.86625
1,-0.21094,-0.062586,-0.146648,-0.345592,-0.887754,-0.155981,-0.011407,-0.184902,-0.726853,-0.861599,-0.976936,0.668177,0.919912,0.903125,0.757473,0.335249,0.013693,-0.688841,-0.022214,0.874597
2,-0.103203,-0.21094,-0.062586,-0.146648,-0.731241,-0.286093,-0.022505,-0.078153,-0.887754,-0.726853,-0.861599,0.937057,0.668177,0.919912,0.903125,0.464632,0.121345,-0.718896,0.004712,0.887643
3,-0.380259,-0.103203,-0.21094,-0.062586,-0.900283,-0.193956,-0.168162,-0.109574,-0.731241,-0.887754,-0.726853,0.797991,0.937057,0.668177,0.919912,0.369107,0.122941,-0.77911,0.005344,0.905605
4,-0.332202,-0.380259,-0.103203,-0.21094,-0.88873,-0.429313,-0.183258,-0.331347,-0.900283,-0.731241,-0.887754,0.875432,0.797991,0.937057,0.668177,0.336999,0.070665,-0.819253,-0.010318,0.92759


'Ultima seq del y_train_secuecnias_s:'

Unnamed: 0,0
0,-0.200054





'Primera seq del X_val_secuecnias_s:'

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,-0.21094,-0.062586,-0.146648,-0.345592,-0.887754,-0.155981,-0.011407,-0.184902,-0.726853,-0.861599,-0.976936,0.668177,0.919912,0.903125,0.757473,0.335249,0.013693,-0.688841,-0.022214,0.874597
1,-0.103203,-0.21094,-0.062586,-0.146648,-0.731241,-0.286093,-0.022505,-0.078153,-0.887754,-0.726853,-0.861599,0.937057,0.668177,0.919912,0.903125,0.464632,0.121345,-0.718896,0.004712,0.887643
2,-0.380259,-0.103203,-0.21094,-0.062586,-0.900283,-0.193956,-0.168162,-0.109574,-0.731241,-0.887754,-0.726853,0.797991,0.937057,0.668177,0.919912,0.369107,0.122941,-0.77911,0.005344,0.905605
3,-0.332202,-0.380259,-0.103203,-0.21094,-0.88873,-0.429313,-0.183258,-0.331347,-0.900283,-0.731241,-0.887754,0.875432,0.797991,0.937057,0.668177,0.336999,0.070665,-0.819253,-0.010318,0.92759
4,-0.200054,-0.332202,-0.380259,-0.103203,-0.843616,-0.412374,-0.395788,-0.337967,-0.88873,-0.900283,-0.731241,1.0,0.875432,0.797991,0.937057,0.411222,0.061985,-0.859396,-0.041109,0.946937


'Primera seq del y_val_secuecnias_s:'

Unnamed: 0,0
0,-0.034943


In [153]:
print()
display(df_features.iloc[745:755])
display(pd.DataFrame(array_features[745:755]))
display('Ultima seq del X_val_secuecnias_s:', pd.DataFrame(X_val_secuencias_s[-1]))
display('Ultima seq del y_val_secuecnias_s:', pd.DataFrame(y_val_secuencias_s[-1]))
print()
display('Primera seq del X_test_secuecnias_s:', pd.DataFrame(X_test_secuencias_s[0]))
display('Primera seq del y_test_secuecnias_s:', pd.DataFrame(y_test_secuencias_s[0]))




Unnamed: 0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,RSI_14,Momentum_10,True_Range,ATR_14,PSAR
745,-0.02259,0.030034,0.018105,-0.008187,0.028987,0.045,0.062176,0.061267,0.031772,0.024327,0.011475,-0.028642,-0.017482,-0.006411,-0.008545,57.420112,5.7088,94.711152,48.387101,1543.341463
746,0.005031,-0.02259,0.030034,0.018105,0.014994,0.026125,0.042137,0.059313,0.028987,0.031772,0.024327,-0.040984,-0.028642,-0.017482,-0.006411,58.898843,5.153783,91.808045,51.488597,1552.216393
747,0.008235,0.005031,-0.02259,0.030034,0.018666,0.004587,0.015718,0.031731,0.014994,0.028987,0.031772,-0.007699,-0.040984,-0.028642,-0.017482,61.284657,5.141858,43.426999,50.912768,1560.558828
748,0.01164,0.008235,0.005031,-0.02259,0.0,0.018993,0.004914,0.016045,0.018666,0.014994,0.028987,0.0,-0.007699,-0.040984,-0.028642,64.456616,4.475395,19.321167,48.656225,1568.400717
749,0.013516,0.01164,0.008235,0.005031,0.0,0.013516,0.032509,0.018431,0.0,0.018666,0.014994,0.0,0.0,-0.007699,-0.040984,67.797574,5.705097,22.718872,46.803557,1575.772092
750,0.030094,0.013516,0.01164,0.008235,0.0,0.030094,0.043611,0.062603,0.0,0.0,0.018666,0.0,0.0,0.0,-0.007699,73.826808,9.099307,51.700562,47.153343,1585.088567
751,-0.009465,0.030094,0.013516,0.01164,0.0,-0.009465,0.02063,0.034146,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69.3817,7.941124,16.427612,44.958648,1600.972569
752,-0.015508,-0.009465,0.030094,0.013516,0.0,-0.015508,-0.024973,0.005122,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.792916,7.153778,26.58374,43.646155,1615.268171
753,-0.012863,-0.015508,-0.009465,0.030094,0.0,-0.012863,-0.028371,-0.037835,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57.946931,3.886321,21.738159,42.081298,1628.134213
754,-0.009545,-0.012863,-0.015508,-0.009465,0.0,-0.009545,-0.022408,-0.037916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,54.615999,-0.145138,15.951904,40.214913,1639.71365


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,-0.697779,0.209901,0.004143,-0.449359,-0.535503,0.079446,0.305187,0.119451,-0.490876,-0.610185,-0.816117,0.361091,0.610036,0.857002,0.809398,0.094018,0.240181,-0.016005,1.610499,1.481938
1,-0.221359,-0.697779,0.209901,0.004143,-0.75974,-0.147947,0.079263,0.099678,-0.535503,-0.490876,-0.610185,0.085791,0.361091,0.610036,0.857002,0.137602,0.191703,-0.046166,1.793088,1.506655
2,-0.166094,-0.221359,-0.697779,0.209901,-0.700899,-0.407407,-0.218584,-0.179556,-0.75974,-0.535503,-0.490876,0.828259,0.085791,0.361091,0.610036,0.207922,0.190662,-0.548818,1.759189,1.529888
3,-0.107363,-0.166094,-0.221359,-0.697779,-1.0,-0.233868,-0.34039,-0.338351,-0.700899,-0.75974,-0.535503,1.0,0.828259,0.085791,0.361091,0.301413,0.132449,-0.799264,1.626343,1.551727
4,-0.075006,-0.107363,-0.166094,-0.221359,-1.0,-0.29984,-0.029286,-0.314201,-1.0,-0.700899,-0.75974,1.0,1.0,0.828259,0.085791,0.399885,0.239858,-0.763964,1.517274,1.572256
5,0.210938,-0.075006,-0.107363,-0.166094,-1.0,-0.100123,0.095877,0.132982,-1.0,-1.0,-0.700899,1.0,1.0,1.0,0.828259,0.577592,0.536326,-0.46286,1.537866,1.598202
6,-0.47139,0.210938,-0.075006,-0.107363,-1.0,-0.576694,-0.163211,-0.155104,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,0.446576,0.435164,-0.829326,1.408662,1.642439
7,-0.575633,-0.47139,0.210938,-0.075006,-1.0,-0.649502,-0.677338,-0.448935,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,0.252377,0.366393,-0.72381,1.331394,1.682251
8,-0.530002,-0.575633,-0.47139,0.210938,-1.0,-0.617631,-0.715648,-0.883811,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,0.109546,0.080997,-0.774153,1.239268,1.718083
9,-0.472782,-0.530002,-0.575633,-0.47139,-1.0,-0.577666,-0.648421,-0.884628,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,0.011369,-0.271132,-0.834269,1.129392,1.750331


'Ultima seq del X_val_secuecnias_s:'

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.209901,0.004143,-0.449359,-0.287305,-0.490876,0.12702,0.145804,-0.039022,-0.610185,-0.816117,-0.835767,0.610036,0.857002,0.809398,0.905133,0.395779,0.200252,-0.166605,1.400718,1.467353
1,-0.697779,0.209901,0.004143,-0.449359,-0.535503,0.079446,0.305187,0.119451,-0.490876,-0.610185,-0.816117,0.361091,0.610036,0.857002,0.809398,0.094018,0.240181,-0.016005,1.610499,1.481938
2,-0.221359,-0.697779,0.209901,0.004143,-0.75974,-0.147947,0.079263,0.099678,-0.535503,-0.490876,-0.610185,0.085791,0.361091,0.610036,0.857002,0.137602,0.191703,-0.046166,1.793088,1.506655
3,-0.166094,-0.221359,-0.697779,0.209901,-0.700899,-0.407407,-0.218584,-0.179556,-0.75974,-0.535503,-0.490876,0.828259,0.085791,0.361091,0.610036,0.207922,0.190662,-0.548818,1.759189,1.529888
4,-0.107363,-0.166094,-0.221359,-0.697779,-1.0,-0.233868,-0.34039,-0.338351,-0.700899,-0.75974,-0.535503,1.0,0.828259,0.085791,0.361091,0.301413,0.132449,-0.799264,1.626343,1.551727


'Ultima seq del y_val_secuecnias_s:'

Unnamed: 0,0
0,-0.075006





'Primera seq del X_test_secuecnias_s:'

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,-0.697779,0.209901,0.004143,-0.449359,-0.535503,0.079446,0.305187,0.119451,-0.490876,-0.610185,-0.816117,0.361091,0.610036,0.857002,0.809398,0.094018,0.240181,-0.016005,1.610499,1.481938
1,-0.221359,-0.697779,0.209901,0.004143,-0.75974,-0.147947,0.079263,0.099678,-0.535503,-0.490876,-0.610185,0.085791,0.361091,0.610036,0.857002,0.137602,0.191703,-0.046166,1.793088,1.506655
2,-0.166094,-0.221359,-0.697779,0.209901,-0.700899,-0.407407,-0.218584,-0.179556,-0.75974,-0.535503,-0.490876,0.828259,0.085791,0.361091,0.610036,0.207922,0.190662,-0.548818,1.759189,1.529888
3,-0.107363,-0.166094,-0.221359,-0.697779,-1.0,-0.233868,-0.34039,-0.338351,-0.700899,-0.75974,-0.535503,1.0,0.828259,0.085791,0.361091,0.301413,0.132449,-0.799264,1.626343,1.551727
4,-0.075006,-0.107363,-0.166094,-0.221359,-1.0,-0.29984,-0.029286,-0.314201,-1.0,-0.700899,-0.75974,1.0,1.0,0.828259,0.085791,0.399885,0.239858,-0.763964,1.517274,1.572256


'Primera seq del y_test_secuecnias_s:'

Unnamed: 0,0
0,0.210938


In [162]:
# RESUMIENDO 

print(f'\nPara entrenar el modelo usaré {X_train_secuencias_s.shape[0]} secuencias:')
print(X_train_secuencias_s.shape)

print(f'\nPara validación usaré {X_val_secuencias_s.shape[0]} secuencias:')
print(X_val_secuencias_s.shape)

print(f'\nPara test usaré {X_test_secuencias_s.shape[0]} secuencias:')
print(X_test_secuencias_s.shape)


Para entrenar el modelo usaré 530 secuencias:
(530, 70, 20)

Para validación usaré 150 secuencias:
(150, 70, 20)

Para test usaré 250 secuencias:
(250, 70, 20)


### 4) Busqueda hiperparámetros (LSTM)

In [30]:
def build_model(hp):
    model = tf.keras.Sequential()
    
    # Primera capa LSTM
    model.add(layers.LSTM(
        units=hp.Int('lstm_units', min_value=32, max_value=256, step=32),
        return_sequences=hp.Boolean('return_sequences'),
        input_shape=(X_train_secuencias_s.shape[1], X_train_secuencias_s.shape[2])
    ))
    
    # (Opcional) segunda capa LSTM
    if hp.Boolean('add_second_lstm'):
        model.add(layers.LSTM(
            units=hp.Int('lstm_units_2', min_value=32, max_value=128, step=32),
            return_sequences=False
        ))
    
    # Capa densa intermedia
    model.add(layers.Dense(
        units=hp.Int('dense_units', min_value=16, max_value=128, step=16),
        activation=hp.Choice('dense_activation', ['relu', 'tanh'])
    ))
    
    # Capa de salida
    model.add(layers.Dense(1))
    
    # Compilación
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')
        ),
        loss='mse',
        metrics=['mae']
    )
    
    return model

In [106]:
from tensorflow.keras import layers 

def build_model_2(hp):
    model = tf.keras.Sequential()
    
    input_shape=(X_train_secuencias.shape[1], X_train_secuencias.shape[2])
    
    # Capas lstm
    if hp.Boolean('add_second_lstm'):
        model.add(layers.LSTM(
            units=hp.Choice('lstm_units', values=[16, 32, 64, 128]),
            return_sequences=True,   # True si hay segunda capa LSTM
            input_shape=input_shape
        ))
        model.add(layers.LSTM(
            units=hp.Choice('lstm_units_2', values=[16, 32, 64, 128]),
            return_sequences=False
        ))
    else:
        model.add(layers.LSTM(
            units=hp.Choice('lstm_units', values=[16, 32, 64, 128]),
            return_sequences=False,
            input_shape=input_shape
        ))
    
    # Capa densa intermedia
    model.add(layers.Dense(
        units=hp.Choice('dense_units', values = [8, 16, 32, 64, 128]),
        activation=hp.Choice('dense_activation', values = ['relu', 'tanh'])
    ))
    
    # Capa de salida
    model.add(layers.Dense(1))
    
    # Compilación
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float('learning_rate', 1e-4, 1e-2)
        ),
        loss='mse',
        metrics=['mae']
    )
    
    return model

In [163]:
def build_model_3(hp):
    model = tf.keras.Sequential()
    
    # Primera capa LSTM
    model.add(layers.LSTM(
        units=hp.Int('lstm_units', min_value=16, max_value=128, step=16),
        return_sequences=False,
        input_shape=(X_train_secuencias_s.shape[1], X_train_secuencias_s.shape[2])
    ))
    
    # # (Opcional) segunda capa LSTM
    # if hp.Boolean('add_second_lstm'):
    #     model.add(layers.LSTM(
    #         units=hp.Int('lstm_units_2', min_value=16, max_value=128, step=16),
    #         return_sequences=False
    #     ))
    
    # Capa densa intermedia
    model.add(layers.Dense(
        units=hp.Int('dense_units', min_value=8, max_value=64, step=8),
        activation=hp.Choice('dense_activation', ['relu', 'tanh'])
    ))
    
    # Capa de salida
    model.add(layers.Dense(1))
    
    # Compilación
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            #learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')
            learning_rate=hp.Choice('learning_rate', [0.01, 0.001, 0.0001])
        ),
        loss='mse',
        metrics=['mae']
    )
    
    return model

In [87]:
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='tuning_results',
    project_name='lstm_stock'
)


Reloading Tuner from tuning_results\lstm_stock\tuner0.json


In [92]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=50,     # Número total de combinaciones de hiperparámetros a probar
    executions_per_trial=1,  # Cuántas veces se evalúa cada combinación
    directory='tuning_results',
    project_name='lstm_tuning_random'
)



Reloading Tuner from tuning_results\lstm_tuning_random\tuner0.json


In [107]:
tuner_2 = kt.RandomSearch(
    build_model_2,
    objective='val_loss',
    max_trials=50,     # Número total de combinaciones de hiperparámetros a probar
    executions_per_trial=1,  # Cuántas veces se evalúa cada combinación
    directory='tuning_results',
    project_name='lstm_tuning_random_2'
)

Reloading Tuner from tuning_results\lstm_tuning_random_2\tuner0.json


In [166]:
tuner_3 = kt.RandomSearch(
    build_model_3,
    objective='val_loss',
    max_trials=50,     # Número total de combinaciones de hiperparámetros a probar
    executions_per_trial=1,  # Cuántas veces se evalúa cada combinación
    directory='tuning_results',
    project_name='lstm_tuning_random_4'
)

In [39]:
# Early stopping
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Búsqueda de hiperparámetros
tuner.search(
    X_train_secuencias, y_train_secuencias,
    epochs=50,
    validation_data=(X_val_secuencias, y_val_secuencias),
    callbacks=[stop_early],
    verbose=1
)

Trial 50 Complete [00h 00m 10s]
val_loss: 0.04346628487110138

Best val_loss So Far: 0.042161986231803894
Total elapsed time: 00h 16m 48s


In [100]:
# Early stopping
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=7)

# Búsqueda de hiperparámetros
tuner_2.search(
    X_train_secuencias, y_train_secuencias,
    epochs=50,
    validation_data=(X_val_secuencias, y_val_secuencias),
    callbacks=[stop_early],
    verbose=1
)

Trial 50 Complete [00h 00m 09s]
val_loss: 0.044548261910676956

Best val_loss So Far: 0.04234534874558449
Total elapsed time: 00h 14m 25s


In [167]:
# Early stopping
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=7)

# Búsqueda de hiperparámetros
tuner_3.search(
    X_train_secuencias_s, y_train_secuencias_s,
    epochs=50,
    validation_data=(X_val_secuencias_s, y_val_secuencias_s),
    callbacks=[stop_early],
    verbose=1
)

Trial 50 Complete [00h 00m 09s]
val_loss: 0.038834888488054276

Best val_loss So Far: 0.0376565046608448
Total elapsed time: 00h 14m 39s


In [169]:
best_param_4 = tuner_3.get_best_hyperparameters(num_trials=1)[0]
print(best_param_4.values)

{'lstm_units': 32, 'dense_units': 8, 'dense_activation': 'relu', 'learning_rate': 0.0009689679055404804}


In [40]:
best_hps_random = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps_random.values)

{'lstm_units': 64, 'return_sequences': False, 'add_second_lstm': False, 'dense_units': 48, 'dense_activation': 'relu', 'learning_rate': 0.0010140092234766135, 'lstm_units_2': 32}


In [37]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)

{'lstm_units': 96, 'return_sequences': False, 'add_second_lstm': False, 'dense_units': 48, 'dense_activation': 'relu', 'learning_rate': 0.0009064595479379301, 'lstm_units_2': 96, 'tuner/epochs': 6, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}


In [102]:
parameters = tuner_2.get_best_hyperparameters(num_trials=1)[0]
print(parameters.values)

{'add_second_lstm': False, 'lstm_units': 128, 'dense_units': 32, 'dense_activation': 'tanh', 'learning_rate': 0.0004118601837276136, 'lstm_units_2': 64}


In [116]:
best_param_3 = tuner_3.get_best_hyperparameters(num_trials=1)[0]
print(best_param_3.values)

{'lstm_units': 80, 'return_sequences': False, 'add_second_lstm': False, 'dense_units': 64, 'dense_activation': 'relu', 'learning_rate': 0.0013825682381639706, 'lstm_units_2': 32}


#### 4.1) Timesteps hiperparametro

In [123]:
timesteps_list = [30, 50, 70, 90, 120]
results = []

for t in timesteps_list:
    print(f"\n🕒 Probando timestep = {t}")
    
    # 1️⃣ Crear las secuencias de entrada
    X_train_secuencias, y_train_secuencias = crear_secuencias(X_train_s, t)
    X_val_secuencias, y_val_secuencias     = crear_secuencias(X_val_s, t)
    
    # 2️⃣ Crear tuner nuevo
    tuner = kt.RandomSearch(
        build_model_3,
        objective='val_loss',
        max_trials=20,
        directory='tuning_results',
        project_name=f'tuning_t{t}'
    )
    
    # 3️⃣ Lanzar búsqueda
    tuner.search(
        X_train_secuencias, y_train_secuencias,
        epochs=50,
        validation_data=(X_val_secuencias, y_val_secuencias),
        callbacks=[stop_early],
        verbose=1
    )
    
    # 4️⃣ Guardar resultado
    best_hp = tuner.get_best_hyperparameters(1)[0]
    best_val_loss = tuner.results_summary()
    results.append({'timesteps': t, 'best_hp': best_hp.values})


Trial 20 Complete [00h 00m 11s]
val_loss: 0.040684401988983154

Best val_loss So Far: 0.03714457526803017
Total elapsed time: 00h 04m 04s
Results summary
Results in tuning_results\tuning_t120
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 07 summary
Hyperparameters:
lstm_units: 32
dense_units: 32
dense_activation: tanh
learning_rate: 0.002011419620747071
Score: 0.03714457526803017

Trial 00 summary
Hyperparameters:
lstm_units: 112
dense_units: 16
dense_activation: tanh
learning_rate: 0.00015034638891913403
Score: 0.03872714191675186

Trial 06 summary
Hyperparameters:
lstm_units: 96
dense_units: 24
dense_activation: relu
learning_rate: 0.0017301949786254914
Score: 0.03945592790842056

Trial 16 summary
Hyperparameters:
lstm_units: 32
dense_units: 56
dense_activation: relu
learning_rate: 0.004187215793882192
Score: 0.03999831900000572

Trial 04 summary
Hyperparameters:
lstm_units: 48
dense_units: 8
dense_activation: tanh
learning_rate: 0.008850221174397929
Score

In [124]:
results

[{'timesteps': 30,
  'best_hp': {'lstm_units': 80,
   'return_sequences': False,
   'dense_units': 64,
   'dense_activation': 'tanh',
   'learning_rate': 0.00015347292104683503}},
 {'timesteps': 60,
  'best_hp': {'lstm_units': 96,
   'dense_units': 16,
   'dense_activation': 'relu',
   'learning_rate': 0.00034863892704459074}},
 {'timesteps': 90,
  'best_hp': {'lstm_units': 64,
   'dense_units': 64,
   'dense_activation': 'relu',
   'learning_rate': 0.000229747208769072}},
 {'timesteps': 120,
  'best_hp': {'lstm_units': 32,
   'dense_units': 32,
   'dense_activation': 'tanh',
   'learning_rate': 0.002011419620747071}}]

### 5) Construcción del modelo

In [43]:
best_model = tuner.hypermodel.build(best_hps)

history = best_model.fit(
    X_train_secuencias, y_train_secuencias,
    validation_data=(X_val_secuencias, y_val_secuencias),
    epochs=100,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
    verbose=1
)


loss, mae = best_model.evaluate(X_test_secuencias, y_test_secuencias)
print(f'MAE en test: {mae:.4f}')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
MAE en test: 0.2110


In [81]:
best_model_random = tuner.hypermodel.build(best_hps_random)

history = best_model_random.fit(
    X_train_secuencias, y_train_secuencias,
    validation_data=(X_val_secuencias, y_val_secuencias),
    epochs=100,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
    verbose=1
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


In [103]:
best_model_2 = tuner.hypermodel.build(parameters)

history = best_model_2.fit(
    X_train_secuencias, y_train_secuencias,
    validation_data=(X_val_secuencias, y_val_secuencias),
    epochs=100,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
    verbose=1
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


In [None]:
best_model_3 = tuner.hypermodel.build(best_param_3)

history = best_model_3.fit(
    X_train_secuencias, y_train_secuencias,
    validation_data=(X_val_secuencias, y_val_secuencias),
    epochs=100,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
    verbose=1
)

### 6) Predicciones 

In [50]:
y_pred_s = best_model.predict(X_test_secuencias)
y_pred = scaler_y.inverse_transform(y_pred_s.reshape(-1, 1)).ravel() 


X_test_secuencias.shape
y_pred_s.shape
y_pred.shape



(200,)

In [104]:
y_pred_s = best_model_2.predict(X_test_secuencias)
y_pred = scaler_y.inverse_transform(y_pred_s.reshape(-1, 1)).ravel() 


X_test_secuencias.shape
y_pred_s.shape
y_pred.shape



(200,)

In [62]:
# Métricas 
# mse   = mean_squared_error(y_true = y_test_secuencias, y_pred = y_pred)     
# mse_s = mean_squared_error(y_true = y_test_s, y_pred = y_pred_s)
# rmse   = np.sqrt(mse)
# rmse_s = np.sqrt(mse_s)
# mae   = mean_absolute_error(y_true = y_test, y_pred = y_pred)
# mae_s = mean_absolute_error(y_true = y_test_s, y_pred = y_pred_s)
# r2   = r2_score(y_true = y_test, y_pred = y_pred)
# r2_s = r2_score(y_true = y_test_s, y_pred = y_pred_s)


mse_s = mean_squared_error(y_true = y_test_secuencias, y_pred = y_pred_s)
mse   = mean_squared_error(y_true = y_test.values[50:], y_pred = y_pred)

print(mse_s)
print(mse)


0.07194358633167186
0.00024093204962128984


In [83]:
mse_s = mean_squared_error(y_true = y_test_secuencias, y_pred = y_pred_s)
mse   = mean_squared_error(y_true = y_test.values[50:], y_pred = y_pred)

print(mse_s)
print(mse)

0.0693361211724518
0.00023325195553974636


In [105]:
mse_s = mean_squared_error(y_true = y_test_secuencias, y_pred = y_pred_s)
mse   = mean_squared_error(y_true = y_test.values[50:], y_pred = y_pred)

print(mse_s)
print(mse)

0.10059048594207769
0.0003359642236094072
