In [None]:
# Import relevant libraries
import pandas
import matplotlib.pyplot as plt
import numpy
import math

# Import Keras tools
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Import sklean metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

# Read in the dataset
dataset = pandas.read_csv('https://gist.githubusercontent.com/FelixHarvey/964b7e33754e79d46ac36a9126891907/raw/48af32bfcbb0deee3b21a4d9f1b4419fc6a53ee5/dataset.csv', engine = 'python')

# Use one set of carparks
#c1 = "Bull Ring"
#c2 = "BHMBRCBRG02"

# Assign the carpark names
c1 = "BHMEURBRD01"
c2 = "Broad Street"

# Create a list of the selected car parks and filter the data set
carparks = [c1, c2]
dataset_filtered = dataset[dataset.SystemCodeNumber.isin(carparks)]

# Create the occupancy rate column
dataset_filtered["OccupancyRate"] = dataset_filtered["Occupancy"]/dataset_filtered["Capacity"]

# Check for any unusual values greater than 1 or less than 0
print(dataset_filtered[dataset_filtered.OccupancyRate > 1])
print(dataset_filtered[dataset_filtered.OccupancyRate < 0])

# Create the indiviual datasets c1 is the input, c2 is the output
dataset_c1 = dataset_filtered[dataset_filtered.SystemCodeNumber.isin([c1])]
dataset_c2 = dataset_filtered[dataset_filtered.SystemCodeNumber.isin([c2])]

In [None]:
# The training/testing split ration
split_ratio = 0.7

# Calculate the size of each of the training and test dataset
train_size = int(len(dataset_filtered)/2 * split_ratio)
test_size = int(len(dataset_filtered)/2) - train_size

# Create input datasets
trainX = dataset_c1.iloc[0:train_size,:]
testX = dataset_c1.iloc[train_size:int(len(dataset_filtered)/2),:]

# Create output datasets
trainY = dataset_c2.iloc[0:train_size,:]
testY = dataset_c2.iloc[train_size:int(len(dataset_filtered)/2),:]
print(len(trainX), len(testX))
print(len(trainY), len(testY))

# Reshape training and testing data
trainX = trainX[["OccupancyRate"]].to_numpy()
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))

testX = testX[["OccupancyRate"]].to_numpy()
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

trainY = trainY["OccupancyRate"].to_numpy()
testY = testY["OccupancyRate"].to_numpy()

In [None]:
# Create random seed
numpy.random.seed(30)

# Create and train model
model = Sequential()
model.add(LSTM(1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=20, batch_size=1, verbose=2)

In [None]:
# Make predictions based on input data
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)


In [None]:
# Calculate RMSE & MAE 
trainRMSEScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
testRMSEScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))

trainMAEScore = mean_absolute_error(trainY, trainPredict[:,0])
testMAEScore = mean_absolute_error(testY, testPredict[:,0])

# Print details
print('Train Score: %.6f RMSE' % (trainRMSEScore))
print('Test Score: %.6f RMSE' % (testRMSEScore))

print('Train Score: %.6f MAE' % (trainMAEScore))
print('Test Score: %.6f MAE' % (testMAEScore))

In [None]:
# plot baseline and predictions
#plt.plot(dataset_c1["OccupancyRate"].tolist())
#plt.plot(trainPredict.tolist())
plt.plot(testY.tolist()[0:350])
plt.plot(testPredict.tolist()[0:350])
plt.show()