# Prediction of Carpark Availability
This notebook extracts and clean the carpark data, and thereafter applies a machine learning model on the data to do prediction of carpark lot availability based on the carpark number and the date and time.

In [2]:
# Essential Modules

import pandas as pd
import numpy as np
from datetime import datetime
import pickle
import glob

from sklearn.tree import DecisionTreeRegressor
regr = DecisionTreeRegressor(random_state=0)

from sklearn.preprocessing import LabelEncoder
encode = LabelEncoder()

from urllib.request import urlopen
import json
from pandas.io.json import json_normalize

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

from sklearn.externals import joblib

import os
import warnings
warnings.filterwarnings("ignore")

### Essential functions

In [2]:
# def convert_to_list(object):
#     return list(object)

# Convert the string to a datetime object
def convert_to_dateTime(object):
    return datetime.strptime(object, '%Y-%m-%d %H:%M:%S')

# Extract the time only from datetime object
def takeTimeOnly(time):
    return time.strftime('%H:%M:%S')

def convertTimeToSec(time):
    h, m, s = time.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)

def obtainDate(ts):
    return ts.strftime('%Y-%m-%d')

def obtainHour(ts):
    return ts.strftime('%H')

def obtainYear(ts):
    return ts.strftime('%Y')

def checkDayOfWeek(ts):
    #daysInWeek = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    count = 0
    for i in range(0, 7):
        if ts.weekday() == count:
            return count
        count+=1
        
def obtainCarParkNumIndicator(listOfCarparks, carparkNumber):
    count = 0
    for i in listOfCarparks:
        if carparkNumber == i:
            return count
        count+=1

def obtainLotTypeIndicator(lotTypes, lotType):
    count = 0
    for i in lotTypes:
        if lotType == i:
            return count
        count+=1

def WeekdayOrWeekend(ts):
    day = ts.weekday()
    if (day >= 0 and day <= 4):
        return 0
    elif (day >= 5 and day <= 6): return 1
    
def identifyCarpark(carpark):
    count = 0
    for i in allCarparks:
        if i == carpark:
            return count
        count += 1

### Getting the data from the file directory
The data was previously crawled from the API and store in as a JSON format. As the overall data from the Housing Development Board (HDB) is huge, we split the extracting of data into several parts.

In [6]:
path_to_jsonfiles = 'URAData\/'

mainURAdf = None
counter = 0

for file in os.listdir(path_to_jsonfiles):
    full_filename = "%s/%s" % (path_to_jsonfiles, file)
    with open(full_filename,'r') as fi:
        d = json.load(fi)
        print("check", counter)

        for y in range(0,len(d)):#11):
            dn = d[y]["availabilityResults"]
            df = json_normalize(dn)
            df["timeStamp"] = d[y]["timeStamp"]
            if mainURAdf is not None:
                mainURAdf = pd.concat([mainURAdf,df], axis=0)
            else:
                mainURAdf = df
    counter+=1

mainURAdf = mainURAdf[mainURAdf['lotType'] == 'C']
mainURAdf = mainURAdf.sort_values(by = 'timeStamp')
mainURAdf = mainURAdf.reset_index()
mainURAdf = mainURAdf.drop(columns = ['index', 'lotType'])
mainURAdf.to_csv('URAData.csv')
mainURAdf.head()

In [7]:
# ##############################  HDB 1 ##############################################################
path_to_jsonfiles = 'HDBData_Json1\/'
mainHDBdf1 = None
count = 0

#for (i, file) in enumerate(os.listdir(path_to_jsonfiles)):
for file in os.listdir(path_to_jsonfiles):
    full_filename = "%s/%s" % (path_to_jsonfiles, file)
    with open(full_filename,'r') as fi:
        d = json.load(fi)
        print("check", count)
        for y in range(0,len(d)):
            try:
                dn = d[y]['items'][0]['carpark_data']
                for z in range(0,len(dn)):
                    carparkNum = dn[z]['carpark_number']
                    datetimeString = dn[z]['update_datetime']
                    datetimeString = datetimeString[:10] + " " + datetimeString[11:]
                    dn[z]['carparkNo'] = carparkNum
                    dn[z]['timeStamp'] = datetimeString
                    dn[z]['lotType'] = dn[z]['carpark_info'][0]['lot_type']
                    dn[z]['lotsAvailable'] = dn[z]['carpark_info'][0]['lots_available']
                    
                df = json_normalize(dn)
                df = df.drop(['carpark_info'], axis=1)
                if mainHDBdf1 is not None:
                    mainHDBdf1 = pd.concat([mainHDBdf1,df], axis=0)
                else:
                    mainHDBdf1 = df
            except:
                continue
        count+=1
                
# mainHDBdf1.columns = ['carparkNo','lotType','lotsAvailable','timeStamp']
mainHDBdf1 = mainHDBdf1.reset_index()

mainHDBdf1['year'] = mainHDBdf1['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainHDBdf1['year'] = mainHDBdf1['year'].apply(lambda x: obtainYear(x))
mainHDBdf1 = mainHDBdf1[mainHDBdf1['year'] >= '2020']
mainHDBdf1 = mainHDBdf1[mainHDBdf1['lotType'] == 'C']
mainHDBdf1 = mainHDBdf1.drop(columns = ['index', 'year', 'carpark_number', 'update_datetime'])

In [8]:
# ##############################  HDB 2 ##############################################################
path_to_jsonfiles = 'HDBData_Json2\/'
mainHDBdf2 = None
count = 0

#for (i, file) in enumerate(os.listdir(path_to_jsonfiles)):
for file in os.listdir(path_to_jsonfiles):
    full_filename = "%s/%s" % (path_to_jsonfiles, file)
    with open(full_filename,'r') as fi:
        d = json.load(fi)
        print("check", count)
        for y in range(0,len(d)):
            try:
                dn = d[y]['items'][0]['carpark_data']
                for z in range(0,len(dn)):
                    carparkNum = dn[z]['carpark_number']
                    datetimeString = dn[z]['update_datetime']
                    datetimeString = datetimeString[:10] + " " + datetimeString[11:]
                    dn[z]['carparkNo'] = carparkNum
                    dn[z]['timeStamp'] = datetimeString
                    dn[z]['lotType'] = dn[z]['carpark_info'][0]['lot_type']
                    dn[z]['lotsAvailable'] = dn[z]['carpark_info'][0]['lots_available']
                    
                df = json_normalize(dn)
                df = df.drop(['carpark_info'], axis=1)
                if mainHDBdf2 is not None:
                    mainHDBdf2 = pd.concat([mainHDBdf2,df], axis=0)
                else:
                    mainHDBdf2 = df
            except:
                continue
        count+=1
                
#mainHDBdf2.columns = ['carparkNo','lotType','lotsAvailable','timeStamp']
mainHDBdf2 = mainHDBdf2.reset_index()

mainHDBdf2['year'] = mainHDBdf2['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainHDBdf2['year'] = mainHDBdf2['year'].apply(lambda x: obtainYear(x))
mainHDBdf2 = mainHDBdf2[mainHDBdf2['year'] >= '2020']
mainHDBdf2 = mainHDBdf2[mainHDBdf2['lotType'] == 'C']
mainHDBdf2 = mainHDBdf2.drop(columns = ['index', 'year', 'carpark_number', 'update_datetime'])

In [9]:
# ##############################  HDB 3 ##############################################################
path_to_jsonfiles = 'HDBData_Json3\/'
mainHDBdf3 = None
count = 0

#for (i, file) in enumerate(os.listdir(path_to_jsonfiles)):
for file in os.listdir(path_to_jsonfiles):
    full_filename = "%s/%s" % (path_to_jsonfiles, file)
    with open(full_filename,'r') as fi:
        d = json.load(fi)
        print("check", count)
        for y in range(0,len(d)):
            try:
                dn = d[y]['items'][0]['carpark_data']
                for z in range(0,len(dn)):
                    carparkNum = dn[z]['carpark_number']
                    datetimeString = dn[z]['update_datetime']
                    datetimeString = datetimeString[:10] + " " + datetimeString[11:]
                    dn[z]['carparkNo'] = carparkNum
                    dn[z]['timeStamp'] = datetimeString
                    dn[z]['lotType'] = dn[z]['carpark_info'][0]['lot_type']
                    dn[z]['lotsAvailable'] = dn[z]['carpark_info'][0]['lots_available']
                    
                df = json_normalize(dn)
                df = df.drop(['carpark_info'], axis=1)
                if mainHDBdf3 is not None:
                    mainHDBdf3 = pd.concat([mainHDBdf3,df], axis=0)
                else:
                    mainHDBdf3 = df
            except:
                continue
        count+=1
                
#mainHDBdf3.columns = ['carparkNo','lotType','lotsAvailable','timeStamp']
mainHDBdf3 = mainHDBdf3.reset_index()

mainHDBdf3['year'] = mainHDBdf3['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainHDBdf3['year'] = mainHDBdf3['year'].apply(lambda x: obtainYear(x))
mainHDBdf3 = mainHDBdf3[mainHDBdf3['year'] >= '2020']
mainHDBdf3 = mainHDBdf3[mainHDBdf3['lotType'] == 'C']
mainHDBdf3 = mainHDBdf3.drop(columns = ['index', 'year', 'carpark_number', 'update_datetime'])

In [10]:
# ##############################  HDB 4 ##############################################################
path_to_jsonfiles = 'HDBData_Json4\/'
mainHDBdf4 = None
count = 0

#for (i, file) in enumerate(os.listdir(path_to_jsonfiles)):
for file in os.listdir(path_to_jsonfiles):
    full_filename = "%s/%s" % (path_to_jsonfiles, file)
    with open(full_filename,'r') as fi:
        d = json.load(fi)
        print("check", count)
        for y in range(0,len(d)):
            try:
                dn = d[y]['items'][0]['carpark_data']
                for z in range(0,len(dn)):
                    carparkNum = dn[z]['carpark_number']
                    datetimeString = dn[z]['update_datetime']
                    datetimeString = datetimeString[:10] + " " + datetimeString[11:]
                    dn[z]['carparkNo'] = carparkNum
                    dn[z]['timeStamp'] = datetimeString
                    dn[z]['lotType'] = dn[z]['carpark_info'][0]['lot_type']
                    dn[z]['lotsAvailable'] = dn[z]['carpark_info'][0]['lots_available']
                    
                df = json_normalize(dn)
                df = df.drop(['carpark_info'], axis=1)
                if mainHDBdf4 is not None:
                    mainHDBdf4 = pd.concat([mainHDBdf4,df], axis=0)
                else:
                    mainHDBdf4 = df
            except:
                continue
        count+=1
                
#mainHDBdf4.columns = ['carparkNo','lotType','lotsAvailable','timeStamp']
mainHDBdf4 = mainHDBdf4.reset_index()

mainHDBdf4['year'] = mainHDBdf4['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainHDBdf4['year'] = mainHDBdf4['year'].apply(lambda x: obtainYear(x))
mainHDBdf4 = mainHDBdf4[mainHDBdf4['year'] >= '2020']
mainHDBdf4 = mainHDBdf4[mainHDBdf4['lotType'] == 'C']
mainHDBdf4 = mainHDBdf4.drop(columns = ['index', 'year', 'carpark_number', 'update_datetime'])

In [11]:
# ##############################  HDB 5 ##############################################################
path_to_jsonfiles = 'HDBData_Json5\/'
mainHDBdf5 = None
count = 0

#for (i, file) in enumerate(os.listdir(path_to_jsonfiles)):
for file in os.listdir(path_to_jsonfiles):
    full_filename = "%s/%s" % (path_to_jsonfiles, file)
    with open(full_filename,'r') as fi:
        d = json.load(fi)
        print("check", count)
        for y in range(0,len(d)):
            try:
                dn = d[y]['items'][0]['carpark_data']
                for z in range(0,len(dn)):
                    carparkNum = dn[z]['carpark_number']
                    datetimeString = dn[z]['update_datetime']
                    datetimeString = datetimeString[:10] + " " + datetimeString[11:]
                    dn[z]['carparkNo'] = carparkNum
                    dn[z]['timeStamp'] = datetimeString
                    dn[z]['lotType'] = dn[z]['carpark_info'][0]['lot_type']
                    dn[z]['lotsAvailable'] = dn[z]['carpark_info'][0]['lots_available']
                    
                df = json_normalize(dn)
                df = df.drop(['carpark_info'], axis=1)
                if mainHDBdf5 is not None:
                    mainHDBdf5 = pd.concat([mainHDBdf5,df], axis=0)
                else:
                    mainHDBdf5 = df
            except:
                continue
        count+=1
                
#mainHDBdf5.columns = ['carparkNo','lotType','lotsAvailable','timeStamp']
mainHDBdf5 = mainHDBdf5.reset_index()

mainHDBdf5['year'] = mainHDBdf5['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainHDBdf5['year'] = mainHDBdf5['year'].apply(lambda x: obtainYear(x))
mainHDBdf5 = mainHDBdf5[mainHDBdf5['year'] >= '2020']
mainHDBdf5 = mainHDBdf5[mainHDBdf5['lotType'] == 'C']
mainHDBdf5 = mainHDBdf5.drop(columns = ['index', 'year', 'carpark_number', 'update_datetime'])

As the extraction of data was done in several parts, we concatenate the different parts into 1 single data frame.

In [13]:
mainHDBdf = pd.concat([mainHDBdf1, mainHDBdf2, mainHDBdf3, mainHDBdf4, mainHDBdf5])

mainHDBdf = mainHDBdf.drop(columns = ['index'])

### Data Cleaning and Features Extraction
Cleaning of the data, and getting features from the original attributes.
The original data has the following attributes, carpark number, date and time, and the carpark lot availability. Hence, with the date and time of each data point, we generate the details such as the time in seconds ('timeInSec'), the hour ('hour') and the day of the week ('dayOfWeek').

In [20]:
mainURAdf['timeStamp'] = mainURAdf['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainURAdf['dayOfWeek'] = mainURAdf['timeStamp'].apply(lambda x: checkDayOfWeek(x))
mainURAdf['hour'] = mainURAdf['timeStamp'].apply(lambda x: obtainHour(x))
mainURAdf['timeOnly'] = mainURAdf['timeStamp'].apply(lambda x: takeTimeOnly(x))
mainURAdf['timeInSec'] = mainURAdf['timeOnly'].apply(lambda x: convertTimeToSec(x))
mainURAdf['lotsAvailable'] = mainURAdf['lotsAvailable'].apply(lambda x: int(x))
lotsURA = mainURAdf.lotsAvailable
mainURAdf = mainURAdf.drop(columns = ['timeOnly', 'lotsAvailable'])
mainURAdf = pd.concat([mainURAdf, lotsURA], axis='columns')

In [None]:
mainHDBdf['timeStamp'] = mainHDBdf['timeStamp'].apply(lambda x: convert_to_dateTime(x))
mainHDBdf['dayOfWeek'] = mainHDBdf['timeStamp'].apply(lambda x: checkDayOfWeek(x))
mainHDBdf['hour'] = mainHDBdf['timeStamp'].apply(lambda x: obtainHour(x))
mainHDBdf['timeOnly'] = mainHDBdf['timeStamp'].apply(lambda x: takeTimeOnly(x))
mainHDBdf['timeInSec'] = mainHDBdf['timeOnly'].apply(lambda x: convertTimeToSec(x))
mainHDBdf['lotsAvailable'] = mainHDBdf['lotsAvailable'].apply(lambda x: int(x))
lotsHDB = mainHDBdf.lotsAvailable
mainHDBdf = mainHDBdf.drop(columns = ['index', 'Unnamed: 0', 'lotType', 'timeOnly', 'lotsAvailable'])
mainHDBdf = pd.concat([mainHDBdf, lotsHDB], axis='columns')

Thereafter, we combine the data from both the HDB and the URA.

In [15]:
combinedDF = pd.concat([mainURAdf, mainHDBdf])
# combinedDF = combinedDF.drop(columns = ['Unnamed: 0', 'modelParts'])
combinedDF.head()

Unnamed: 0,carparkNo,timeStamp,dayOfWeek,hour,timeInSec,lotsAvailable,carparkNoIndicator
0,S0049,2020-02-05 13:34:05,2,13,48845,107,1140
1,S0049,2020-02-05 13:34:05,2,13,48845,107,1140
2,S0049,2020-02-05 13:39:07,2,13,49147,107,1140
3,S0049,2020-02-05 13:39:07,2,13,49147,107,1140
4,S0049,2020-02-05 13:44:09,2,13,49449,107,1140


The list of carparks from the data are also saved into a seperate CSV file for record.

In [17]:
# print("----------URA----------")
# print(mainURAdf.carparkNo.unique(), "\n")
# print("Total number of carpaks: ", len(mainURAdf.carparkNo.unique()), "\n")

# listOfURACarparks = list(mainURAdf.carparkNo.unique())

# print("----------HDB----------")
# print(mainHDBdf.carparkNo.unique(), "\n")
# print("Total number of carpaks: ", len(mainHDBdf.carparkNo.unique()), "\n")

listOfHDBCarparks = list(mainHDBdf.carparkNo.unique())

allCarparks = listOfURACarparks + listOfHDBCarparks

allCarparks = list(combinedDF.carparkNo.unique())

allCarparks.sort()

In [18]:
usedCarparks = pd.DataFrame(allCarparks, index =list(range(0, len(allCarparks))), columns =['carparkNo']) 
# usedCarparks.to_csv('usedCarparks.csv')
# print("done")

In [19]:
print("Understanding of indicator data")

print("\nDay of week\n")
daysOfWeek = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
count = 0
for i in daysOfWeek:
    print(i, ": \t", count)
    count+=1

Understanding of indicator data

Day of week

Monday : 	 0
Tuesday : 	 1
Wednesday : 	 2
Thursday : 	 3
Friday : 	 4
Saturday : 	 5
Sunday : 	 6


### Pre-processing of Data
The carpark number in the data frame is encoded as it is in string type. We encode and convert the carpark numbers to integer using the sklearn Label Encoder. As the Label Encoder encodes the variable in alphabetical order, we need the used carparks data frame as an understanding to which encoded number is to which carpark. The used carparks data frame has been sorted in alphabetical order as well.

In [24]:
usedCarparks = pd.read_csv('usedCarparks.csv')
usedCarparks = usedCarparks.drop(columns = ['Unnamed: 0'])
usedCarparks.head()

Unnamed: 0,carparkNo,modelParts
0,A0007,0
1,A0021,0
2,A0024,0
3,A0046,0
4,A10,0


In [26]:
combinedDF['carparkNoIndicator'] = encode.fit_transform(combinedDF['carparkNo'])
combinedDF = pd.merge(combinedDF, usedCarparks, on = "carparkNo")
combinedDF

Unnamed: 0,carparkNo,timeStamp,dayOfWeek,hour,timeInSec,lotsAvailable,carparkNoIndicator,modelParts
0,S0049,2020-02-05 13:34:05,2,13,48845,107,1140,19
1,S0049,2020-02-05 13:34:05,2,13,48845,107,1140,19
2,S0049,2020-02-05 13:39:07,2,13,49147,107,1140,19
3,S0049,2020-02-05 13:39:07,2,13,49147,107,1140,19
4,S0049,2020-02-05 13:44:09,2,13,49449,107,1140,19
...,...,...,...,...,...,...,...,...
26519606,SK53,2020-02-29 09:41:02,5,9,34862,400,1336,22
26519607,SK53,2020-02-29 09:43:02,5,9,34982,400,1336,22
26519608,SK53,2020-02-29 09:45:02,5,9,35102,400,1336,22
26519609,SK53,2020-02-29 09:47:02,5,9,35222,400,1336,22


### Apply with Decision Tree Regression
sklearn Decision Tree Regression has been chosen for the model to train the data and predict the carpark availability. This represents a regression problem with categorical predictors. As the saved model is set to be huge due to the large data frame, we will need to seperate the data frame based on carparks numbers then apply models on the carpark numbers.

In [27]:
for i in range(31):
    print("combinedDF_part" + str(i), "= combinedDF[combinedDF['modelParts'] ==", i, "]")

print('combinedDF_Array = [')
for i in range(31):
    print("combinedDF_part" + str(i) + ",", end = '')

combinedDF_part0 = combinedDF[combinedDF['modelParts'] == 0 ]
combinedDF_part1 = combinedDF[combinedDF['modelParts'] == 1 ]
combinedDF_part2 = combinedDF[combinedDF['modelParts'] == 2 ]
combinedDF_part3 = combinedDF[combinedDF['modelParts'] == 3 ]
combinedDF_part4 = combinedDF[combinedDF['modelParts'] == 4 ]
combinedDF_part5 = combinedDF[combinedDF['modelParts'] == 5 ]
combinedDF_part6 = combinedDF[combinedDF['modelParts'] == 6 ]
combinedDF_part7 = combinedDF[combinedDF['modelParts'] == 7 ]
combinedDF_part8 = combinedDF[combinedDF['modelParts'] == 8 ]
combinedDF_part9 = combinedDF[combinedDF['modelParts'] == 9 ]
combinedDF_part10 = combinedDF[combinedDF['modelParts'] == 10 ]
combinedDF_part11 = combinedDF[combinedDF['modelParts'] == 11 ]
combinedDF_part12 = combinedDF[combinedDF['modelParts'] == 12 ]
combinedDF_part13 = combinedDF[combinedDF['modelParts'] == 13 ]
combinedDF_part14 = combinedDF[combinedDF['modelParts'] == 14 ]
combinedDF_part15 = combinedDF[combinedDF['modelParts'] == 1

In [28]:
combinedDF_part0 = combinedDF[combinedDF['modelParts'] == 0 ]
combinedDF_part1 = combinedDF[combinedDF['modelParts'] == 1 ]
combinedDF_part2 = combinedDF[combinedDF['modelParts'] == 2 ]
combinedDF_part3 = combinedDF[combinedDF['modelParts'] == 3 ]
combinedDF_part4 = combinedDF[combinedDF['modelParts'] == 4 ]
combinedDF_part5 = combinedDF[combinedDF['modelParts'] == 5 ]
combinedDF_part6 = combinedDF[combinedDF['modelParts'] == 6 ]
combinedDF_part7 = combinedDF[combinedDF['modelParts'] == 7 ]
combinedDF_part8 = combinedDF[combinedDF['modelParts'] == 8 ]
combinedDF_part9 = combinedDF[combinedDF['modelParts'] == 9 ]
combinedDF_part10 = combinedDF[combinedDF['modelParts'] == 10 ]
combinedDF_part11 = combinedDF[combinedDF['modelParts'] == 11 ]
combinedDF_part12 = combinedDF[combinedDF['modelParts'] == 12 ]
combinedDF_part13 = combinedDF[combinedDF['modelParts'] == 13 ]
combinedDF_part14 = combinedDF[combinedDF['modelParts'] == 14 ]
combinedDF_part15 = combinedDF[combinedDF['modelParts'] == 15 ]
combinedDF_part16 = combinedDF[combinedDF['modelParts'] == 16 ]
combinedDF_part17 = combinedDF[combinedDF['modelParts'] == 17 ]
combinedDF_part18 = combinedDF[combinedDF['modelParts'] == 18 ]
combinedDF_part19 = combinedDF[combinedDF['modelParts'] == 19 ]
combinedDF_part20 = combinedDF[combinedDF['modelParts'] == 20 ]
combinedDF_part21 = combinedDF[combinedDF['modelParts'] == 21 ]
combinedDF_part22 = combinedDF[combinedDF['modelParts'] == 22 ]
combinedDF_part23 = combinedDF[combinedDF['modelParts'] == 23 ]
combinedDF_part24 = combinedDF[combinedDF['modelParts'] == 24 ]
combinedDF_part25 = combinedDF[combinedDF['modelParts'] == 25 ]
combinedDF_part26 = combinedDF[combinedDF['modelParts'] == 26 ]
combinedDF_part27 = combinedDF[combinedDF['modelParts'] == 27 ]
combinedDF_part28 = combinedDF[combinedDF['modelParts'] == 28 ]
combinedDF_part29 = combinedDF[combinedDF['modelParts'] == 29 ]
combinedDF_part30 = combinedDF[combinedDF['modelParts'] == 30 ]

This array is for the iteration to save the 30 models.

In [31]:
combinedDF_Array = [combinedDF_part0,combinedDF_part1,combinedDF_part2,combinedDF_part3,combinedDF_part4,combinedDF_part5,combinedDF_part6,combinedDF_part7,combinedDF_part8,combinedDF_part9,combinedDF_part10,combinedDF_part11,combinedDF_part12,combinedDF_part13,combinedDF_part14,combinedDF_part15,combinedDF_part16,combinedDF_part17,combinedDF_part18,combinedDF_part19,combinedDF_part20,combinedDF_part21,combinedDF_part22,combinedDF_part23,combinedDF_part24,combinedDF_part25,combinedDF_part26,combinedDF_part27,combinedDF_part28,combinedDF_part29,combinedDF_part30]

### Saving the model

In [None]:
classifierDF = combinedDF.copy()

for i in range(31):
    classifierDF = combinedDF_Array[i].copy()
    classifierDF = classifierDF[['carparkNoIndicator', 'timeInSec', 'dayOfWeek', 'lotsAvailable']]

    X2 = classifierDF[['carparkNoIndicator', 'timeInSec', 'dayOfWeek']]
    y2 = classifierDF['lotsAvailable'].values.reshape(-1,1)
    DTM = regr.fit(X2, y2)

    model_file = 'DT_model' + str(i) + '.sav'
    pickle.dump(DTM, open(model_file, 'wb'))

### Template to load the model and try the prediction

In [None]:
carparkNumber = str(input("Enter the carpark number. "))
timeInSec = str(input("Enter the time in HH:MM format to check. "))
dayInTheWeek = int(input("Enter the day of week. "))

carpark = identifyCarpark(carparkNumber)
convertedTime = convertTimeToSec(timeInSec + ":00")

int(DTM.predict([[carpark, convertedTime, dayInTheWeek]]))