In [1]:
#importing the necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
# load dataset
data = pd.read_excel("Dataset.xlsx")

In [3]:
#shape
data.shape

(4746, 12)

In [6]:
# split into input (X) and output (y) variables
price = data['Rent']
price = pd.DataFrame(price)
print(price)

       Rent
0     10000
1     20000
2     17000
3     10000
4      7500
...     ...
4741  15000
4742  29000
4743  35000
4744  45000
4745  15000

[4746 rows x 1 columns]


In [7]:
# Feature scaling using NORMALIZATION

from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0.9,1))
scaled_data = sc.fit_transform(price)
print(scaled_data)

[[0.90025151]
 [0.90053733]
 [0.90045158]
 ...
 [0.90096605]
 [0.90125186]
 [0.90039442]]


In [9]:
# Creating a data structure with 7 timesteps and 1 output
X = []
y = []
for i in range(7,4746):
    X.append(scaled_data[i-7:i,0])
    y.append(scaled_data[i,0])
    
X,y = np.array(X),np.array(y)

In [10]:
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

# Root Mean Squared Error

In [11]:
def rmse(actual, pred):
  return np.sqrt(mean_squared_error(actual, pred))

# Mean absolute Percentage Error

In [12]:
def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

# ANN

In [13]:
# define the keras model
model = Sequential()
model.add(Dense(20, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(10, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1, activation='linear'))

In [14]:
# compile the keras model
model.compile(loss='mean_absolute_percentage_error', optimizer='adam')

In [15]:
# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2)

Epoch 1/100
104/104 - 1s - loss: 21.6606 - 670ms/epoch - 6ms/step
Epoch 2/100
104/104 - 0s - loss: 0.2122 - 131ms/epoch - 1ms/step
Epoch 3/100
104/104 - 0s - loss: 0.2090 - 118ms/epoch - 1ms/step
Epoch 4/100
104/104 - 0s - loss: 0.2381 - 126ms/epoch - 1ms/step
Epoch 5/100
104/104 - 0s - loss: 0.1978 - 112ms/epoch - 1ms/step
Epoch 6/100
104/104 - 0s - loss: 0.3069 - 120ms/epoch - 1ms/step
Epoch 7/100
104/104 - 0s - loss: 0.4255 - 120ms/epoch - 1ms/step
Epoch 8/100
104/104 - 0s - loss: 0.2106 - 120ms/epoch - 1ms/step
Epoch 9/100
104/104 - 0s - loss: 0.1917 - 126ms/epoch - 1ms/step
Epoch 10/100
104/104 - 0s - loss: 0.1669 - 115ms/epoch - 1ms/step
Epoch 11/100
104/104 - 0s - loss: 0.2533 - 119ms/epoch - 1ms/step
Epoch 12/100
104/104 - 0s - loss: 0.2229 - 110ms/epoch - 1ms/step
Epoch 13/100
104/104 - 0s - loss: 0.1694 - 120ms/epoch - 1ms/step
Epoch 14/100
104/104 - 0s - loss: 0.2395 - 120ms/epoch - 1ms/step
Epoch 15/100
104/104 - 0s - loss: 0.3035 - 120ms/epoch - 1ms/step
Epoch 16/100
104/1

<keras.callbacks.History at 0x237b3c54d00>

In [16]:
ANN_rmse = rmse(y_test, model.predict(X_test))
print('RMSE: %.3f' % ANN_rmse)

RMSE: 0.002


In [17]:
ANN_rmse = rmse(y_test, model.predict(X_test))
print('RMSE: ',ANN_rmse)

RMSE:  0.002419704206520407


In [18]:
ANN_mape = mape(y_test,model.predict(X_test))

print("MAPE =",ANN_mape)

MAPE = 0.16152517110440598


# LINEAR REGRESSION

In [19]:
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [20]:
lr_rmse = rmse(y_test, lr.predict(X_test))
print("Root Mean Square Error=",lr_rmse)

Root Mean Square Error= 0.0017365970348844698


In [21]:
lr_mape=mape(y_test,lr.predict(X_test))
print("Mean Absolute Percentage error=",lr_mape)

Mean Absolute Percentage error= 0.08589239336643324


# DECISION TREE

In [22]:
dt = DecisionTreeRegressor(min_samples_leaf=1000,min_weight_fraction_leaf=0.5)
dt.fit(X_train,y_train)

DecisionTreeRegressor(min_samples_leaf=1000, min_weight_fraction_leaf=0.5)

In [23]:
dt_rmse = rmse(y_test, dt.predict(X_test))
print("Root Mean Square Error=",dt_rmse)

Root Mean Square Error= 0.0018092405272913995


In [24]:
dt_mape=mape(y_test,dt.predict(X_test))
print("Mean Absolute Percentage error=",dt_mape)

Mean Absolute Percentage error= 0.09823217612590339


# RANDOM FOREST

In [25]:
rfr = RandomForestRegressor()
rfr.fit(X_train,y_train)

RandomForestRegressor()

In [26]:
rfr_rmse = rmse(y_test , rfr.predict(X_test))
print("Root Mean Square Error=",rfr_rmse)

Root Mean Square Error= 0.0018656674270578052


In [27]:
rfr_mape = mape(y_test , rfr.predict(X_test))
print("Mean Absolute Percentage error =" , rfr_mape)

Mean Absolute Percentage error = 0.09287632512783615


# LSTM

# Graph

In [31]:
import plotly.express as px

In [32]:
algo_name = ('ANN', 'LR', 'DT', 'RF')
rmse_list = (0.002419704206520407, 0.0017365970348844698, 0.0018092405272913995, 0.0018656674270578052)
mape_list = (0.16152517110440598, 0.08589239336643324, 0.09823217612590339, 0.09287632512783615)

In [35]:
fig = px.bar(x=algo_name,y=rmse_list, title = 'RMSE Plot', color=algo_name, height=700, width= 830)
fig.update_layout(xaxis_title="Algorithms Applied", yaxis_title="RMSE")
fig.show()

In [36]:
fig1 = px.bar(x=algo_name,y=mape_list, title = 'MAPE Plot', color=algo_name, height=700, width= 830)
fig1.update_layout(xaxis_title="Algorithms Applied", yaxis_title="MAPE")
fig1.show()