In [98]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [99]:
data = pd.read_excel("Dataset.xlsx")

In [100]:
data.shape

(10683, 11)

In [101]:
# split into input (X) and output (y) variables
price = data['Price']
price = pd.DataFrame(price)
print(price)

       Price
0       3897
1       7662
2      13882
3       6218
4      13302
...      ...
10678   4107
10679   4145
10680   7229
10681  12648
10682  11753

[10683 rows x 1 columns]


In [102]:
# # Feature scaling using NORMALIZATION

# from sklearn.preprocessing import MinMaxScaler
# sc = MinMaxScaler(feature_range = (0.9,1))
# scaled_data = sc.fit_transform(price)
# print(scaled_data)

In [103]:
# scaled_data.shape

In [104]:
### LSTM are sensitive to the scale of the data. so we apply MinMax scaler
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0.9,1))
scaled_data = sc.fit_transform(np.array(price).reshape(-1,1))

In [105]:
# # Creating a data structure with 7 timesteps and 1 output
X = []
y = []
for i in range(7,10682):
    X.append(scaled_data[i-7:i,0])
    y.append(scaled_data[i,0])
    

X,y = np.array(X),np.array(y)

In [106]:
X.shape

(10675, 7)

In [107]:
y.shape

(10675,)

In [108]:
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

In [109]:
print(X_train.shape), print(y_train.shape)

(7472, 7)
(7472,)


(None, None)

In [110]:
print(X_test.shape), print(y_test.shape)

(3203, 7)
(3203,)


(None, None)

In [111]:
#Before feeding into lstm we must convert dataset into 3d 
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

(X_train.shape, X_test.shape)

((7472, 7, 1), (3203, 7, 1))

In [112]:
#Model Creation 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [113]:
model=Sequential()
model.add(LSTM(50,return_sequences=True, input_shape=(7472,1)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))

model.compile(loss='mean_squared_error',optimizer='adam')

In [114]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_22 (LSTM)               (None, 7472, 50)          10400     
_________________________________________________________________
lstm_23 (LSTM)               (None, 7472, 50)          20200     
_________________________________________________________________
lstm_24 (LSTM)               (None, 50)                20200     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 51        
Total params: 50,851
Trainable params: 50,851
Non-trainable params: 0
_________________________________________________________________


In [115]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1da91e472e0>

In [116]:
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)



In [117]:
# We have scaled it, so we need to reverse scale it to find the o/p
train_predict=sc.inverse_transform(train_predict)
test_predict=sc.inverse_transform(test_predict)

In [118]:
#rmse
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(y_train,train_predict))

11270.729812922164

In [119]:
math.sqrt(mean_squared_error(y_test,test_predict))

11290.438716652925

In [120]:
def rmse(actual, pred):
  return np.sqrt(mean_squared_error(actual, pred))

In [121]:
def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

In [122]:
lstm_rmse = rmse(y_test, model.predict(X_test))
print('RMSE: %.3f' % lstm_rmse)

RMSE: 0.007


In [123]:
lstm_mape = mape(y_test,model.predict(X_test))

print("MAPE: " , lstm_mape)

MAPE:  0.5990543033740185


In [124]:
lstm_rmse = rmse(y_test, model.predict(X_test))
print('RMSE: ',lstm_rmse)

RMSE:  0.006799870858496201
