In [1]:
import pandas as pd
import datetime as dt
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


data = pd.read_csv("../raw_data/AAPL.csv", parse_dates = True)
data = data.assign(MVA20 = "",MVA50 = "")
data['MVA20'] = data.iloc[:,6].rolling(window=10).mean()
data['MVA50'] = data.iloc[:,6].rolling(window=20).mean()
data

data.head(30)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,MVA20,MVA50
0,2012-01-03,14.621429,14.732143,14.607143,14.686786,12.629209,302220800,,
1,2012-01-04,14.642857,14.81,14.617143,14.765714,12.697079,260022000,,
2,2012-01-05,14.819643,14.948214,14.738214,14.929643,12.838044,271269600,,
3,2012-01-06,14.991786,15.098214,14.972143,15.085714,12.972249,318292800,,
4,2012-01-09,15.196429,15.276786,15.048214,15.061786,12.951672,394024400,,
5,2012-01-10,15.211071,15.214286,15.053571,15.115714,12.998045,258196400,,
6,2012-01-11,15.095714,15.101786,14.975357,15.091071,12.976856,215084800,,
7,2012-01-12,15.081429,15.103571,14.955357,15.049643,12.941231,212587200,,
8,2012-01-13,14.989286,15.016071,14.952143,14.993214,12.892705,226021600,,
9,2012-01-17,15.15,15.213929,15.105714,15.167857,13.042884,242897200,270061680.0,


In [19]:
fig = px.line(
    data.reset_index(), x="Date", y="Close", title="Tesla Stock Price - Close"
)
fig.update_xaxes(rangeslider_visible=True)

fig.show()

In [3]:
def create_dataset(dataset, look_back=60):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i : (i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return pd.DataFrame(np.array(dataX)), np.array(dataY)


X_close, Y_close = create_dataset(data[["Open","Close"]].values)
aux = X_close
aux["Target"] = Y_close
aux

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,Target
0,14.621429,14.642857,14.819643,14.991786,15.196429,15.211071,15.095714,15.081429,14.989286,15.150000,...,20.882856,21.370358,21.411072,21.526428,21.349285,21.446072,21.421070,21.649286,22.084999,21.885000
1,14.642857,14.819643,14.991786,15.196429,15.211071,15.095714,15.081429,14.989286,15.150000,15.248571,...,21.370358,21.411072,21.526428,21.349285,21.446072,21.421070,21.649286,22.084999,21.885000,21.741785
2,14.819643,14.991786,15.196429,15.211071,15.095714,15.081429,14.989286,15.150000,15.248571,15.362500,...,21.411072,21.526428,21.349285,21.446072,21.421070,21.649286,22.084999,21.885000,21.741785,21.493929
3,14.991786,15.196429,15.211071,15.095714,15.081429,14.989286,15.150000,15.248571,15.362500,15.267500,...,21.526428,21.349285,21.446072,21.421070,21.649286,22.084999,21.885000,21.741785,21.493929,22.403570
4,15.196429,15.211071,15.095714,15.081429,14.989286,15.150000,15.248571,15.362500,15.267500,15.095357,...,21.349285,21.446072,21.421070,21.649286,22.084999,21.885000,21.741785,21.493929,22.403570,22.298214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2309,119.029999,121.690002,122.540001,120.400002,121.410004,125.699997,124.050003,122.879997,119.900002,120.330002,...,125.230003,127.820000,126.010002,127.820000,126.959999,126.440002,125.570000,125.080002,124.279999,124.680000
2310,121.690002,122.540001,120.400002,121.410004,125.699997,124.050003,122.879997,119.900002,120.330002,123.330002,...,127.820000,126.010002,127.820000,126.959999,126.440002,125.570000,125.080002,124.279999,124.680000,124.070000
2311,122.540001,120.400002,121.410004,125.699997,124.050003,122.879997,119.900002,120.330002,123.330002,122.820000,...,126.010002,127.820000,126.959999,126.440002,125.570000,125.080002,124.279999,124.680000,124.070000,126.169998
2312,120.400002,121.410004,125.699997,124.050003,122.879997,119.900002,120.330002,123.330002,122.820000,119.540001,...,127.820000,126.959999,126.440002,125.570000,125.080002,124.279999,124.680000,124.070000,126.169998,126.599998


In [4]:
del aux

In [5]:
#The dimensions is (n_sample, t_delays, 1).
#Divide between train and test.

def split_dataset(X, Y):
    X_train = X[:-100].values
    y_train = Y[:-100]

    X_test = X[-100:].values
    y_test = Y[-100:]
    return X_train, y_train, X_test, y_test


# Close dataset
X_train_close, y_train_close, X_test_close, y_test_close = split_dataset(
    X_close, Y_close
)

In [6]:
#Using MixMaxScaler on feature

from sklearn.preprocessing import MinMaxScaler

sc_close_x = MinMaxScaler()
sc_close_y = MinMaxScaler()

# X Scale
X_train_close_scaled = sc_close_x.fit_transform(X_train_close).reshape(
    *X_train_close.shape, 1
)
X_test_close_scaled = sc_close_x.transform(X_test_close).reshape(*X_test_close.shape, 1)


# Y Scale
y_train_close_scaled = sc_close_y.fit_transform(y_train_close.reshape(-1, 1)).reshape(
    -1
)
y_test_close_scaled = sc_close_y.transform(y_test_close.reshape(-1, 1)).reshape(-1)


In [7]:
print(X_train_close_scaled.shape)
print(y_train_close_scaled.shape)

(2214, 61, 1)
(2214,)


In [8]:
#RNN
#Creating and traing the RNN

import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

def gen_model():
    rnn = Sequential()
    rnn.add(LSTM(units=360, return_sequences=True, input_shape=(X_train_close_scaled.shape[1], 1)))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360, return_sequences=True))
    rnn.add(Dropout(0.2))
    rnn.add(LSTM(units=360))
    rnn.add(Dropout(0.2))
    rnn.add(Dense(units=1,activation='relu'))
    rnn.compile(optimizer="adam", loss=["mean_squared_error",'cosine_similarity'])
    callback = tensorflow.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        min_delta=0,
        patience=0,
        verbose=False,
        mode="auto",
        baseline=None,
        restore_best_weights=False,
    )
    return rnn

rnn = gen_model()
history = rnn.fit(X_train_close_scaled, y_train_close_scaled,validation_data=(X_test_close_scaled,y_test_close_scaled), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
#Predictions and take the real values

history_df = pd.DataFrame(history.history)
go.Figure() \
.add_trace(go.Scatter(x=history_df.index, y=history_df.loss, mode="lines", name="Train Loss")) \
.add_trace(go.Scatter(x=history_df.index, y=history_df.val_loss, mode="lines", name="Validation Loss")) \
.update_layout(
    title="Train Metrics",
    xaxis_title="Epochs",
    yaxis_title="Loss",
)


In [13]:
#Plot results and evaluation
#Just using R2 and MSE to mensure the RNN performace and ploting the graph to compare predict values and real values as well.


# Model
y_pred = rnn.predict(X_test_close_scaled)
y_true = y_test_close_scaled
test_dates = data.reset_index()["Date"][-100:]

# Original values
y_pred_original = sc_close_y.inverse_transform(y_pred.reshape(-1, 1)).reshape(-1)
y_true_original = sc_close_y.inverse_transform(y_true.reshape(-1, 1)).reshape(-1)


In [14]:
# Create traces
go.Figure().add_trace(
    go.Scatter(x=test_dates, y=y_true_original, mode="lines", name="Real Close",)
).add_trace(
    go.Scatter(x=test_dates, y=y_pred_original, mode="lines", name="Predicted Close",)
).update_layout(
    title="Stock price - Tesla Motors",
    xaxis_title="Date",
    yaxis_title="Stock price ($)",
).show()

print("r2_score close :", r2_score(y_true, y_pred))
print("mean_squared_error close:", mean_squared_error(y_true, y_pred))

r2_score close : -0.6205233465613853
mean_squared_error close: 0.003584896293606364
