In [32]:
import pandas as pd
import datetime as dt
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


data = pd.read_csv("../raw_data/AMZN 2012-2021.csv", parse_dates = True)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2012-01-03,175.889999,179.479996,175.550003,179.029999,179.029999,5110800
1,2012-01-04,179.210007,180.5,176.070007,177.509995,177.509995,4205200
2,2012-01-05,175.940002,178.25,174.050003,177.610001,177.610001,3809100
3,2012-01-06,178.070007,184.649994,177.5,182.610001,182.610001,7008400
4,2012-01-09,182.759995,184.369995,177.0,178.559998,178.559998,5056900


In [33]:
fig = px.line(
    data.reset_index(), x="Date", y="Close", title="Tesla Stock Price - Close"
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [38]:
def create_dataset(dataset, look_back=60):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i : (i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return pd.DataFrame(np.array(dataX)), np.array(dataY)


X_close, Y_close = create_dataset(data[["Close","Volume","High","Low"]].values)
aux = X_close
aux["Target"] = Y_close
aux

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,Target
0,179.029999,177.509995,177.610001,182.610001,178.559998,179.339996,178.899994,175.929993,178.419998,181.660004,...,185.050003,185.520004,192.330002,191.729996,192.399994,195.039993,202.869995,205.440002,201.160004,204.610001
1,177.509995,177.610001,182.610001,178.559998,179.339996,178.899994,175.929993,178.419998,181.660004,189.440002,...,185.520004,192.330002,191.729996,192.399994,195.039993,202.869995,205.440002,201.160004,204.610001,202.509995
2,177.610001,182.610001,178.559998,179.339996,178.899994,175.929993,178.419998,181.660004,189.440002,194.449997,...,192.330002,191.729996,192.399994,195.039993,202.869995,205.440002,201.160004,204.610001,202.509995,198.050003
3,182.610001,178.559998,179.339996,178.899994,175.929993,178.419998,181.660004,189.440002,194.449997,190.929993,...,191.729996,192.399994,195.039993,202.869995,205.440002,201.160004,204.610001,202.509995,198.050003,199.660004
4,178.559998,179.339996,178.899994,175.929993,178.419998,181.660004,189.440002,194.449997,190.929993,186.089996,...,192.399994,195.039993,202.869995,205.440002,201.160004,204.610001,202.509995,198.050003,199.660004,193.990005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2310,3057.639893,3113.590088,3089.489990,3081.679932,3091.860107,3135.729980,3027.989990,3074.959961,3110.870117,3137.500000,...,3203.080078,3244.989990,3259.050049,3265.159912,3230.110107,3223.070068,3218.649902,3233.989990,3187.010010,3206.219971
2311,3113.590088,3089.489990,3081.679932,3091.860107,3135.729980,3027.989990,3074.959961,3110.870117,3137.500000,3087.070068,...,3244.989990,3259.050049,3265.159912,3230.110107,3223.070068,3218.649902,3233.989990,3187.010010,3206.219971,3198.010010
2312,3089.489990,3081.679932,3091.860107,3135.729980,3027.989990,3074.959961,3110.870117,3137.500000,3087.070068,3046.260010,...,3259.050049,3265.159912,3230.110107,3223.070068,3218.649902,3233.989990,3187.010010,3206.219971,3198.010010,3264.110107
2313,3081.679932,3091.860107,3135.729980,3027.989990,3074.959961,3110.870117,3137.500000,3087.070068,3046.260010,3052.030029,...,3265.159912,3230.110107,3223.070068,3218.649902,3233.989990,3187.010010,3206.219971,3198.010010,3264.110107,3281.149902


In [39]:
del aux

In [40]:
#The dimensions is (n_sample, t_delays, 1).
#Divide between train and test.

def split_dataset(X, Y):
    X_train = X[:-100].values
    y_train = Y[:-100]

    X_test = X[-100:].values
    y_test = Y[-100:]
    return X_train, y_train, X_test, y_test


# Close dataset
X_train_close, y_train_close, X_test_close, y_test_close = split_dataset(
    X_close, Y_close
)

In [41]:
#Using MixMaxScaler on feature

from sklearn.preprocessing import MinMaxScaler

sc_close_x = MinMaxScaler()
sc_close_y = MinMaxScaler()

# X Scale
X_train_close_scaled = sc_close_x.fit_transform(X_train_close).reshape(
    *X_train_close.shape, 1
)
X_test_close_scaled = sc_close_x.transform(X_test_close).reshape(*X_test_close.shape, 1)


# Y Scale
y_train_close_scaled = sc_close_y.fit_transform(y_train_close.reshape(-1, 1)).reshape(
    -1
)
y_test_close_scaled = sc_close_y.transform(y_test_close.reshape(-1, 1)).reshape(-1)

In [42]:
print(X_train_close_scaled.shape)
print(y_train_close_scaled.shape)

(2215, 61, 1)
(2215,)


In [43]:
#RNN
#Creating and traing the RNN

import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

def gen_model():
    rnn = Sequential()
    rnn.add(LSTM(units=360, return_sequences=True, input_shape=(X_train_close_scaled.shape[1], 1)))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360))
    rnn.add(Dropout(0.2))
    rnn.add(Dense(units=1,activation='relu'))
    rnn.compile(optimizer="adam", loss=["mean_squared_error",'cosine_similarity'])
    callback = tensorflow.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        min_delta=0,
        patience=4,
        verbose=False,
        mode="auto",
        baseline=None,
        restore_best_weights=False,
    )
    return rnn

rnn = gen_model()
history = rnn.fit(X_train_close_scaled, y_train_close_scaled,validation_data=(X_test_close_scaled,y_test_close_scaled), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
#Predictions and take the real values

history_df = pd.DataFrame(history.history)
go.Figure() \
.add_trace(go.Scatter(x=history_df.index, y=history_df.loss, mode="lines", name="Train Loss")) \
.add_trace(go.Scatter(x=history_df.index, y=history_df.val_loss, mode="lines", name="Validation Loss")) \
.update_layout(
    title="Train Metrics",
    xaxis_title="Epochs",
    yaxis_title="Loss",
)


In [29]:
#Plot results and evaluation
#Just using R2 and MSE to mensure the RNN performace and ploting the graph to compare predict values and real values as well.


# Model
y_pred = rnn.predict(X_test_close_scaled)
y_true = y_test_close_scaled
test_dates = data.reset_index()["Date"][-100:]

# Original values
y_pred_original = sc_close_y.inverse_transform(y_pred.reshape(-1, 1)).reshape(-1)
y_true_original = sc_close_y.inverse_transform(y_true.reshape(-1, 1)).reshape(-1)


In [30]:
# Create traces
go.Figure().add_trace(
    go.Scatter(x=test_dates, y=y_true_original, mode="lines", name="Real Close",)
).add_trace(
    go.Scatter(x=test_dates, y=y_pred_original, mode="lines", name="Predicted Close",)
).update_layout(
    title="Stock price - Tesla Motors",
    xaxis_title="Date",
    yaxis_title="Stock price ($)",
).show()

print("r2_score close :", r2_score(y_true, y_pred))
print("mean_squared_error close:", mean_squared_error(y_true, y_pred))

r2_score close : 0.47087009182590245
mean_squared_error close: 0.0011897784039321022
