In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers

# Descargar los datos
df = pd.read_csv('amzn.us.txt')
df = df[['Date', 'Open']]
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Normalizar los datos
scaler = MinMaxScaler()
df['Open'] = scaler.fit_transform(df[['Open']])

# Función para dividir los datos de entrenamiento y testeo
def train_test_split(data, train_size):
    train_index = int(len(data) * train_size)
    train_data, test_data = data[:train_index], data[train_index:]
    return train_data, test_data

# Función para crear secuencias de datos para el modelo RNN
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# Función para crear el modelo Transformer
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0, attention_axes=1):
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout, attention_axes=attention_axes)(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_transformer(head_size, num_heads, ff_dim, num_trans_blocks, mlp_units, dropout=0, mlp_dropout=0):
    n_timesteps, n_features, n_outputs = seq_length, 1, 1 
    inputs = tf.keras.Input(shape=(n_timesteps, n_features))
    x = inputs 
    for _ in range(num_trans_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
  
    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)

    outputs = layers.Dense(n_outputs, activation='relu')(x)
    return tf.keras.Model(inputs, outputs)

# Parámetros
seq_length = 60  # longitud de la secuencia de entrada
train_size_initial = 0.5  # tamaño inicial del conjunto de entrenamiento
train_size_increment = 0.1  # incremento en el tamaño del conjunto de entrenamiento

# Ejecutar el proceso 20 veces
for iteration in range(20):
    print(f"Running iteration {iteration + 1}...")

    mse_train = []
    mse_test = []

    for i in range(5):  # 5 iteraciones para 50%, 60%, ..., 90% de los datos
        train_size = train_size_initial + i * train_size_increment
        if train_size > 0.9:
            break

        # Tamaño del conjunto de entrenamiento y conjunto para la predicción
        train_index_end = int(train_size * len(df))
        test_index_start = train_index_end
        test_index_end = int((train_size + train_size_increment) * len(df))
        if test_index_end > len(df):
            test_index_end = len(df)

        train_start_date = df.index[0]
        train_end_date = df.index[train_index_end - 1]
        test_start_date = df.index[test_index_start] if test_index_start < len(df) else df.index[-1]
        test_end_date = df.index[test_index_end - 1] if test_index_end <= len(df) else df.index[-1]

        # Crear el modelo
        model = build_transformer(head_size=128, num_heads=4, ff_dim=2, num_trans_blocks=4, mlp_units=[256], mlp_dropout=0.10, dropout=0.10)
        model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=["mae", 'mape'])

        # Entrenar el modelo
        train_data, _ = train_test_split(df['Open'].values, train_size)
        X_train, y_train = create_sequences(train_data, seq_length)
        if len(X_train) == 0:
            continue
        X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
        model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

        # Calcular el error cuadrático medio en el conjunto de entrenamiento
        y_train_pred = model.predict(X_train)
        mse_train.append(mean_squared_error(y_train, y_train_pred))

        # Realizar la predicción en el siguiente conjunto de prueba
        test_data = df['Open'].values[test_index_start:test_index_end]
        X_test, y_test = create_sequences(test_data, seq_length)
        if len(X_test) == 0:
            continue
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        y_pred = model.predict(X_test)
        mse_test.append(mean_squared_error(y_test, y_pred))

        # Guardar y_pred rescaled en el archivo correspondiente
        y_pred_rescaled = scaler.inverse_transform(y_pred).flatten()
        y_pred_str = ",".join(map(str, y_pred_rescaled))
        with open(f'y_pred_rescaled_part_{i + 1}_TRANS.csv', 'a') as f:
            f.write(f"Iteration {iteration + 1},{y_pred_str}\n")

    # Guardar los resultados de las métricas
    with open('train_mse_TRANS.csv', 'a') as f:
        f.write(f"Iteration {iteration + 1}," + ",".join(map(str, mse_train)) + "\n")

    with open('test_mse_TRANS.csv', 'a') as f:
        f.write(f"Iteration {iteration + 1}," + ",".join(map(str, mse_test)) + "\n")

print("All iterations completed.")


2024-06-02 19:50:53.517849: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Running iteration 1...


2024-06-02 19:50:55.112673: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-02 19:50:55.118657: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-02 19:50:55.118726: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-02 19:50:55.121750: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-02 19:50:55.121820: I external/local_xla/xla/stream_executor

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 157ms/step
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 132ms/step
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
Running iteration 2...
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step
[1m15/

In [None]:
I