In [33]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import array

In [34]:
data1 = pd.read_csv('AMZN.csv')
data1

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-12-06,169.649994,173.695496,166.934494,171.368500,171.368500,68860000
1,2021-12-07,174.600006,177.499496,173.334503,176.164505,176.164505,66410000
2,2021-12-08,176.150497,177.179993,174.750504,176.158005,176.158005,45254000
3,2021-12-09,175.750000,176.969498,174.139496,174.171005,174.171005,46062000
4,2021-12-10,175.417007,175.927002,170.500000,172.212006,172.212006,60690000
...,...,...,...,...,...,...,...
248,2022-11-30,92.470001,96.540001,91.529999,96.540001,96.540001,102628200
249,2022-12-01,96.989998,97.230003,94.919998,95.500000,95.500000,68488000
250,2022-12-02,94.480003,95.360001,93.779999,94.129997,94.129997,72427000
251,2022-12-05,93.050003,94.059998,90.820000,91.010002,91.010002,71535500


In [35]:
data1 = data1.iloc[:9] #get the first 9 rows from the dataframe
data1

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-12-06,169.649994,173.695496,166.934494,171.3685,171.3685,68860000
1,2021-12-07,174.600006,177.499496,173.334503,176.164505,176.164505,66410000
2,2021-12-08,176.150497,177.179993,174.750504,176.158005,176.158005,45254000
3,2021-12-09,175.75,176.969498,174.139496,174.171005,174.171005,46062000
4,2021-12-10,175.417007,175.927002,170.5,172.212006,172.212006,60690000
5,2021-12-13,172.0,172.100006,169.130005,169.567505,169.567505,62170000
6,2021-12-14,167.550003,169.498993,166.440002,169.091507,169.091507,55976000
7,2021-12-15,168.598007,173.600006,165.195007,173.315002,173.315002,75794000
8,2021-12-16,173.3685,174.166,168.160507,168.871002,168.871002,60876000


In [36]:
data2 = data1['Close']
data2

0    171.368500
1    176.164505
2    176.158005
3    174.171005
4    172.212006
5    169.567505
6    169.091507
7    173.315002
8    168.871002
Name: Close, dtype: float64

In [37]:
data2 = data2.values
data2

array([171.3685  , 176.164505, 176.158005, 174.171005, 172.212006,
       169.567505, 169.091507, 173.315002, 168.871002])

In [38]:
#Applying Feature Scaling To the data of 'Close' column
#so that every observation is between 0 and 1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [39]:
data2res = data2.reshape(-1, 1)
data2res

array([[171.3685  ],
       [176.164505],
       [176.158005],
       [174.171005],
       [172.212006],
       [169.567505],
       [169.091507],
       [173.315002],
       [168.871002]])

In [40]:
data3 = scaler.fit_transform(data2res)
data3

array([[0.34242777],
       [1.        ],
       [0.9991088 ],
       [0.72667455],
       [0.45807947],
       [0.09549636],
       [0.03023307],
       [0.60930941],
       [0.        ]])

In [41]:
#Specifying The Number Of Timesteps For Our Recurrent Neural Network
#We will use 3 timesteps. This means that for every value as a target, 
#the previous 3 values will be considered as features to determine 
#the target (i.e. the output).


In [42]:
# data preparation:
# this function splits a sequence of numbers into rows of data
def split_sequence(sequence, time_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + time_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [43]:
#convert the 2D array of data to 1D array
data3 = data3.flatten()
data3

array([0.34242777, 1.        , 0.9991088 , 0.72667455, 0.45807947,
       0.09549636, 0.03023307, 0.60930941, 0.        ])

In [44]:
# split the data into sample rows
X, y = split_sequence(data3, 3)

In [45]:
X

array([[0.34242777, 1.        , 0.9991088 ],
       [1.        , 0.9991088 , 0.72667455],
       [0.9991088 , 0.72667455, 0.45807947],
       [0.72667455, 0.45807947, 0.09549636],
       [0.45807947, 0.09549636, 0.03023307],
       [0.09549636, 0.03023307, 0.60930941]])

In [46]:
y

array([0.72667455, 0.45807947, 0.09549636, 0.03023307, 0.60930941,
       0.        ])

In [47]:
# data summary
for i in range(len(X)):
	print(X[i], ' ', y[i])

[0.34242777 1.         0.9991088 ]   0.7266745485674058
[1.         0.9991088  0.72667455]   0.4580794715515992
[0.9991088  0.72667455 0.45807947]   0.09549636162486053
[0.72667455 0.45807947 0.09549636]   0.030233071817477253
[0.45807947 0.09549636 0.03023307]   0.6093094086613782
[0.09549636 0.03023307 0.60930941]   0.0


In [48]:
import tensorflow as tf
#use these two lines when you use tensorflow models.
#This woud avoid several warnings and errors
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

In [49]:
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [50]:
# build an LSTM RNN using a sequential model.

model1 = Sequential([
    # input_shape=(3, 1) implies 3 feature columns and 1 target column  
    layers.InputLayer(input_shape=(3, 1)),
    
    # 50 LSTM units in the hidden layer, 
    # relu activation function at each unit
    layers.LSTM(50, activation='relu'), #hidden layer
    
    # 1 Dense unit in the output layer since there is  
    # only 1 class of value (i.e. number) in the target column
    layers.Dense( 1 ) #output layer
])

In [51]:
# compile the model using adam optimizer, mean_squared_error loss-function 
model1.compile(optimizer='adam', 
              loss='mean_squared_error')

In [52]:
Xrs = X.reshape(X.shape[0], X.shape[1], 1)  # Need to reshape X to 3D (6, 3, 1) for the fit function
Xrs.shape

(6, 3, 1)

In [53]:
# fit model
model1.fit(Xrs, y, verbose=0, epochs=100)

<keras.callbacks.History at 0x1e2934c3b50>

In [54]:
score = model1.evaluate(Xrs, y, verbose=0) #finding accuracy of the model
print("Test loss:", score)

Test loss: 0.07503414154052734


In [55]:
predictions = model1.predict(Xrs) #using the same true feature data for prediction of the target values
predictions



array([[0.3823054 ],
       [0.41719046],
       [0.35744786],
       [0.28078505],
       [0.22691883],
       [0.22874743]], dtype=float32)

In [56]:
pd.Series(y)  # y has true target values

0    0.726675
1    0.458079
2    0.095496
3    0.030233
4    0.609309
5    0.000000
dtype: float64