In [11]:
import pandas as pd
import datetime as dt
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


data = pd.read_csv("../raw_data/stock_prices_latest.csv", parse_dates = True)

In [None]:
is_sym = data['symbol']=='MSFT'
df_sym = data[is_sym]
df_sym = df_sym[(df_sym['date'] >= '2013-01-01') & (df_sym['date'] <= '2018-12-31')]
df_sym = df_sym.sort_values('date')
df_sym

In [10]:
fig = px.line(
    df_sym.reset_index(), x="date", y="close_adjusted", title="Microsoft Stock price - Close adjusted"
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [13]:
def create_dataset(dataset, look_back=60):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i : (i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return pd.DataFrame(np.array(dataX)), np.array(dataY)


X_close, Y_close = create_dataset(df_sym[["close_adjusted"]].values)
aux = X_close
aux["Target"] = Y_close
aux

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,Target
0,24.1945,23.8704,23.4236,23.3798,23.2572,23.3886,23.1783,23.5025,23.5550,23.8353,...,24.8168,24.8875,25.0067,24.8256,24.9493,24.8698,24.8654,25.0553,25.2628,25.2672
1,23.8704,23.4236,23.3798,23.2572,23.3886,23.1783,23.5025,23.5550,23.8353,23.6864,...,24.8875,25.0067,24.8256,24.9493,24.8698,24.8654,25.0553,25.2628,25.2672,25.4350
2,23.4236,23.3798,23.2572,23.3886,23.1783,23.5025,23.5550,23.8353,23.6864,23.8704,...,25.0067,24.8256,24.9493,24.8698,24.8654,25.0553,25.2628,25.2672,25.4350,25.2231
3,23.3798,23.2572,23.3886,23.1783,23.5025,23.5550,23.8353,23.6864,23.8704,23.8704,...,24.8256,24.9493,24.8698,24.8654,25.0553,25.2628,25.2672,25.4350,25.2231,25.2540
4,23.2572,23.3886,23.1783,23.5025,23.5550,23.8353,23.6864,23.8704,23.8704,23.7828,...,24.9493,24.8698,24.8654,25.0553,25.2628,25.2672,25.4350,25.2231,25.2540,25.3467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1210,78.4131,78.4330,78.4629,78.2340,78.3634,83.3879,83.4675,82.7611,82.7611,83.6267,...,87.1100,88.1900,88.2800,88.2200,87.8200,88.0800,89.6000,88.3500,90.1400,90.1000
1211,78.4330,78.4629,78.2340,78.3634,83.3879,83.4675,82.7611,82.7611,83.6267,83.7163,...,88.1900,88.2800,88.2200,87.8200,88.0800,89.6000,88.3500,90.1400,90.1000,90.0000
1212,78.4629,78.2340,78.3634,83.3879,83.4675,82.7611,82.7611,83.6267,83.7163,84.0446,...,88.2800,88.2200,87.8200,88.0800,89.6000,88.3500,90.1400,90.1000,90.0000,91.6100
1213,78.2340,78.3634,83.3879,83.4675,82.7611,82.7611,83.6267,83.7163,84.0446,83.8456,...,88.2200,87.8200,88.0800,89.6000,88.3500,90.1400,90.1000,90.0000,91.6100,91.9000


In [14]:
del aux

In [19]:
#The dimensions is (n_sample, t_delays, 1).
#Divide between train and test.

def split_dataset(X, Y):
    X_train = X[:-100].values
    y_train = Y[:-100]

    X_test = X[-100:].values
    y_test = Y[-100:]
    return X_train, y_train, X_test, y_test


# Close dataset
X_train_close, y_train_close, X_test_close, y_test_close = split_dataset(
    X_close, Y_close
)

In [20]:
#Using MixMaxScaler on feature

from sklearn.preprocessing import MinMaxScaler

sc_close_x = MinMaxScaler()
sc_close_y = MinMaxScaler()

# X Scale
X_train_close_scaled = sc_close_x.fit_transform(X_train_close).reshape(
    *X_train_close.shape, 1
)
X_test_close_scaled = sc_close_x.transform(X_test_close).reshape(*X_test_close.shape, 1)


# Y Scale
y_train_close_scaled = sc_close_y.fit_transform(y_train_close.reshape(-1, 1)).reshape(
    -1
)
y_test_close_scaled = sc_close_y.transform(y_test_close.reshape(-1, 1)).reshape(-1)


In [21]:
print(X_train_close_scaled.shape)
print(y_train_close_scaled.shape)

(1115, 61, 1)
(1115,)


In [22]:
#RNN
#Creating and traing the RNN

import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

def gen_model():
    rnn = Sequential()
    rnn.add(LSTM(units=360, return_sequences=True, input_shape=(X_train_close_scaled.shape[1], 1)))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360))
    rnn.add(Dropout(0.2))
    rnn.add(Dense(units=1,activation='relu'))
    rnn.compile(optimizer="adam", loss=["mean_squared_error",'cosine_similarity'])
    callback = tensorflow.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        min_delta=0,
        patience=0,
        verbose=False,
        mode="auto",
        baseline=None,
        restore_best_weights=False,
    )
    return rnn

rnn = gen_model()
history = rnn.fit(X_train_close_scaled, y_train_close_scaled,validation_data=(X_test_close_scaled,y_test_close_scaled), epochs=40, batch_size=32)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [23]:
#Predictions and take the real values

history_df = pd.DataFrame(history.history)
go.Figure() \
.add_trace(go.Scatter(x=history_df.index, y=history_df.loss, mode="lines", name="Train Loss")) \
.add_trace(go.Scatter(x=history_df.index, y=history_df.val_loss, mode="lines", name="Validation Loss")) \
.update_layout(
    title="Train Metrics",
    xaxis_title="Epochs",
    yaxis_title="Loss",
)


In [25]:
#Plot results and evaluation
#Just using R2 and MSE to mensure the RNN performace and ploting the graph to compare predict values and real values as well.


# Model
y_pred = rnn.predict(X_test_close_scaled)
y_true = y_test_close_scaled
test_dates = df_sym.reset_index()["date"][-100:]

# Original values
y_pred_original = sc_close_y.inverse_transform(y_pred.reshape(-1, 1)).reshape(-1)
y_true_original = sc_close_y.inverse_transform(y_true.reshape(-1, 1)).reshape(-1)


In [27]:
# Create traces
go.Figure().add_trace(
    go.Scatter(x=test_dates, y=y_true_original, mode="lines", name="Real Close",)
).add_trace(
    go.Scatter(x=test_dates, y=y_pred_original, mode="lines", name="Predicted Close",)
).update_layout(
    title="Stock price - Microsoft",
    xaxis_title="Date",
    yaxis_title="Stock price ($)",
).show()

print("r2_score close :", r2_score(y_true, y_pred))
print("mean_squared_error close:", mean_squared_error(y_true, y_pred))

r2_score close : 0.9305365567631082
mean_squared_error close: 0.0008877195928129959
