In [57]:
#importing the necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [58]:
# load dataset
data = pd.read_excel("Dataset.xlsx")

In [59]:
#shape
data.shape

(10683, 11)

In [60]:
# split into input (X) and output (y) variables
price = data['Price']
price = pd.DataFrame(price)
print(price)

       Price
0       3897
1       7662
2      13882
3       6218
4      13302
...      ...
10678   4107
10679   4145
10680   7229
10681  12648
10682  11753

[10683 rows x 1 columns]


In [61]:
# Feature scaling using NORMALIZATION

from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0.9,1))
scaled_data = sc.fit_transform(price)
print(scaled_data)

[[0.90274973]
 [0.90759199]
 [0.91559168]
 ...
 [0.9070351 ]
 [0.9140046 ]
 [0.91285352]]


In [62]:
# Creating a data structure with 7 timesteps and 1 output
X = []
y = []
for i in range(7,10682):
    X.append(scaled_data[i-7:i,0])
    y.append(scaled_data[i,0])
    
X,y = np.array(X),np.array(y)

In [63]:
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

# Root Mean Squared Error

In [64]:
def rmse(actual, pred):
  return np.sqrt(mean_squared_error(actual, pred))

# Mean absolute Percentage Error

In [65]:
def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

# ANN

In [66]:
# define the keras model
model = Sequential()
model.add(Dense(20, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(10, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1, activation='linear'))

In [67]:
# compile the keras model
model.compile(loss='mean_absolute_percentage_error', optimizer='adam')

In [68]:
# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2)

Epoch 1/100
234/234 - 1s - loss: 1.2006
Epoch 2/100
234/234 - 0s - loss: 0.8632
Epoch 3/100
234/234 - 0s - loss: 0.8520
Epoch 4/100
234/234 - 0s - loss: 0.8569
Epoch 5/100
234/234 - 0s - loss: 0.8219
Epoch 6/100
234/234 - 0s - loss: 0.8680
Epoch 7/100
234/234 - 0s - loss: 0.8197
Epoch 8/100
234/234 - 0s - loss: 0.8280
Epoch 9/100
234/234 - 0s - loss: 0.7794
Epoch 10/100
234/234 - 0s - loss: 0.7844
Epoch 11/100
234/234 - 0s - loss: 0.7733
Epoch 12/100
234/234 - 0s - loss: 0.7640
Epoch 13/100
234/234 - 0s - loss: 0.7496
Epoch 14/100
234/234 - 0s - loss: 0.7957
Epoch 15/100
234/234 - 0s - loss: 0.7509
Epoch 16/100
234/234 - 0s - loss: 0.7314
Epoch 17/100
234/234 - 0s - loss: 0.7187
Epoch 18/100
234/234 - 0s - loss: 0.7437
Epoch 19/100
234/234 - 0s - loss: 0.7135
Epoch 20/100
234/234 - 0s - loss: 0.7409
Epoch 21/100
234/234 - 0s - loss: 0.7339
Epoch 22/100
234/234 - 0s - loss: 0.7063
Epoch 23/100
234/234 - 0s - loss: 0.7148
Epoch 24/100
234/234 - 0s - loss: 0.7226
Epoch 25/100
234/234 - 0s

<tensorflow.python.keras.callbacks.History at 0x18ea1e3e1f0>

In [69]:
ANN_rmse = rmse(y_test, model.predict(X_test))
print('RMSE: %.3f' % ANN_rmse)

RMSE: 0.007


In [70]:
ANN_rmse = rmse(y_test, model.predict(X_test))
print('RMSE: ',ANN_rmse)

RMSE:  0.006881902926989151


In [71]:
ANN_mape = mape(y_test,model.predict(X_test))

print("MAPE =",ANN_mape)

MAPE = 0.5579487399131982


# LINEAR REGRESSION

In [72]:
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [73]:
lr_rmse = rmse(y_test, lr.predict(X_test))
print("Root Mean Square Error=",lr_rmse)

Root Mean Square Error= 0.006109087698177261


In [74]:
lr_mape=mape(y_test,lr.predict(X_test))
print("Mean Absolute Percentage error=",lr_mape)

Mean Absolute Percentage error= 0.5202171579180117


# DECISION TREE

In [75]:
dt = DecisionTreeRegressor(min_samples_leaf=1000,min_weight_fraction_leaf=0.5)
dt.fit(X_train,y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1000, min_samples_split=2,
                      min_weight_fraction_leaf=0.5, presort='deprecated',
                      random_state=None, splitter='best')

In [76]:
dt_rmse = rmse(y_test, dt.predict(X_test))
print("Root Mean Square Error=",dt_rmse)

Root Mean Square Error= 0.0061019746207730706


In [77]:
dt_mape=mape(y_test,dt.predict(X_test))
print("Mean Absolute Percentage error=",dt_mape)

Mean Absolute Percentage error= 0.5202012748283921


# RANDOM FOREST

In [78]:
rfr = RandomForestRegressor()
rfr.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [79]:
rfr_rmse = rmse(y_test , rfr.predict(X_test))
print("Root Mean Square Error=",rfr_rmse)

Root Mean Square Error= 0.0062493031305234596


In [80]:
rfr_mape = mape(y_test , rfr.predict(X_test))
print("Mean Absolute Percentage error =" , rfr_mape)

Mean Absolute Percentage error = 0.5308228439738271


In [81]:
import plotly.express as px

In [82]:
algo_name = ('ANN', 'LR', 'DT', 'RF')
rmse_list = (0.008410309713082834, 0.006109087698177261, 0.0061019746207730645, 0.0062402313794453)
mape_list = (0.6831663296497983, 0.5202171579180117, 0.5202012748283965, 0.5291447939343533)

In [83]:
fig = px.bar(x=algo_name,y=rmse_list, title = 'RMSE Plot', color=algo_name, height=700, width= 1300)
fig.update_layout(xaxis_title="Algorithms Applied", yaxis_title="RMSE")
fig.show()

In [84]:
fig1 = px.bar(x=algo_name,y=mape_list, title = 'MAPE Plot', color=algo_name, height=700, width= 1300)
fig1.update_layout(xaxis_title="Algorithms Applied", yaxis_title="MAPE")
fig1.show()