In [None]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Dropout, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [None]:
# Exploración de los datos
dataset_train = pd.read_csv('google_train.csv')
dataset_train.head(3)

In [None]:
# Normalización de los datos
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0, 1))
train = dataset_train.loc[:, ['Open']].values
train_scaled = scaler.fit_transform(train)

plt.plot(train_scaled)
plt.xlabel("Dia")
plt.ylabel("Precio de apertura")
plt.grid(True)
plt.show()

In [None]:
# Creación de conjunto de entrenamiento
X_train = []
y_train = []
timesteps = 50

for i in range(timesteps, 1250):
    X_train.append(train_scaled[i - timesteps:i, 0])
    y_train.append(train_scaled[i, 0])
    
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print("Dimensión de atributos:", X_train.shape)
print("Dimensión de etiquetas:", y_train.shape)

In [None]:
# Modelo con RNN simple
import tensorflow as tf
tf.random.set_seed(1)
model = Sequential()
model.add(SimpleRNN(units = 50, activation='tanh', return_sequences=True, input_shape= (X_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(SimpleRNN(units = 50, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))
model.add(SimpleRNN(units = 50, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(units = 1, activation='tanh'))
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

In [None]:
# Entrenamiento del modelo anterior
model.fit(X_train, y_train, epochs=10, batch_size=32)

In [None]:
prediccion_rnn_train = model.predict(X_train) #RNN
prediccion_rnn_train_desescalada = scaler.inverse_transform(prediccion_rnn_train)
plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)), color='red', label='Precio real')
plt.plot(prediccion_rnn_train_desescalada, color='blue', label='Predicción con RNN simple')
plt.title('Predicción en entrenamiento')
plt.xlabel('Día')
plt.ylabel('Precio')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Modelo con LSTM
model2 = Sequential()
model2.add(LSTM(10, input_shape= (X_train.shape[1],1)))
model2.add(Dense(1))
model2.compile(loss='mean_squared_error', optimizer='adam')
model2.summary()

In [None]:
# Entrenamiento con LSTM
hist2 = model2.fit(X_train, y_train, epochs=10, batch_size=32)

In [None]:
plt.plot(hist2.history['loss'])
plt.grid(True)
plt.title("Pérdida en entrenamiento LSTM")
plt.ylabel("MSE")
plt.xlabel("Épocas")
plt.show()

In [None]:
# carga de datos de validación
dataset_test = pd.read_csv('google_test.csv')
dataset_test.head()

In [None]:
# normalización
precio_real = dataset_test.loc[:, ['Open']].values
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis=0)
inputs = dataset_total[len(dataset_total)-len(dataset_test) - timesteps:].values.reshape(-1,1)
inputs = scaler.transform(inputs)

In [None]:
# predicción
X_test = []
y_test = []
for i in range(timesteps, 70):
    X_test.append(inputs[i-timesteps:i,0])
    y_test.append(inputs[i, 0])
X_test = np.array(X_test)
y_test = np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
prediccion_rnn = model.predict(X_test) # RNN
prediccion_lstm = model2.predict(X_test) # LSTM
prediccion_rnn_train = model.predict(X_train) #RNN
prediccion_lstm_train = model2.predict(X_train) # LSTM
prediccion_rnn_desescalada = scaler.inverse_transform(prediccion_rnn)
prediccion_lstm_desescalada = scaler.inverse_transform(prediccion_lstm)
prediccion_rnn_train_desescalada = scaler.inverse_transform(prediccion_rnn_train)
prediccion_lstm_train_desescalada = scaler.inverse_transform(prediccion_lstm_train)

In [None]:
y_train1 = scaler.inverse_transform([y_train])
y_test1 = scaler.inverse_transform([y_test])

costo_rnn_train = math.sqrt(mean_squared_error(y_train1[0], prediccion_rnn_train_desescalada[:,0]))
print('RMSE entrenamiento RNN: %.2f' % (costo_rnn_train))
costo_rnn = math.sqrt(mean_squared_error(y_test1[0], prediccion_rnn_desescalada[:,0]))
print('RMSE validación RNN: %.2f' % (costo_rnn))

costo_lstm_train = math.sqrt(mean_squared_error(y_train1[0], prediccion_lstm_train[:,0]))
print('RMSE entrenamiento LSTM: %.2f' % (costo_lstm_train))
costo_lstm = math.sqrt(mean_squared_error(y_test1[0], prediccion_lstm_desescalada[:,0]))
print('RMSE validación LSTM: %.2f' % (costo_lstm))

In [None]:
plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)), color='red', label='Precio real')
plt.plot(prediccion_rnn_train_desescalada, color='blue', label='Predicción con RNN simple')
plt.plot(prediccion_lstm_train_desescalada, color='black', label='Predicción con LSTM')
plt.title('Predicción en entrenamiento')
plt.xlabel('Día')
plt.ylabel('Precio')
plt.legend()
plt.subplots_adjust(bottom=0.5, top=1)
plt.grid(True)
plt.show()

plt.plot(precio_real, color='red', label='Precio real')
plt.plot(prediccion_rnn_desescalada, color='blue', label='Predicción con RNN simple')
plt.plot(prediccion_lstm_desescalada, color='black', label='Predicción con LSTM')
plt.title('Predicción en validación')
plt.xlabel('Día')
plt.ylabel('Precio')
plt.legend()
plt.grid(True)
plt.subplots_adjust(bottom=0.5, top=1)
plt.show()