In [5]:
import os
from glob import glob
import pandas as pd
import numpy as np
import keras as ke
import haversine as hs   
from haversine import Unit

In [6]:
# load datasets

dataPath = os.path.join("data/", "*.csv")
files = glob(dataPath)

In [7]:
def sortByTime(data):
    traj_raw = data.values[:,1:]
    traj = np.array(sorted(traj_raw,key = lambda d:d[2]))
    label = data.iloc[0][0]
    return [traj,label]

def reduceData(sortedData):
    data = []
    try:
        for [traj, plate] in sortedData:
            distanceUnoccupied = 0
            distanceOccupied = 0
            prevLong = traj[0][0]
            prevLat = traj[0][1]
            for row in traj:
                try:
                    if (row[-1] == 0):
                        distanceUnoccupied += hs.haversine(
                            (prevLat, prevLong), (row[1], row[0]), unit=Unit.KILOMETERS)
                    else:
                        distanceOccupied += hs.haversine((prevLat, prevLong),
                                                        (row[1], row[0]), unit=Unit.KILOMETERS)
                except Exception as e:
                    print("Skipping Invalid data point",row)
                prevLat = row[1]
                prevLong = row[0]
            data.append([distanceOccupied, distanceUnoccupied, plate])
    except:
        print("Skipping Invalid Data with len:",len(sortedData))
    return data


In [8]:
# remove warning
import warnings
warnings.filterwarnings('ignore')

combinedData = []
for file in files:
    df = pd.read_csv(file)
    groupData = df.groupby('plate')
    sortedData = groupData.apply(sortByTime)
    combinedData.extend(reduceData(sortedData))


Skipping Invalid data point [246.000793 16.295 '2016-11-26 11:00:00' 0]
Skipping Invalid data point [114.164551 22.566833 '2016-11-26 11:00:06' 0]
Skipping Invalid data point [767.0096599999999 128.028 '2016-12-09 20:26:09' 0]
Skipping Invalid data point [114.117569 22.544268 '2016-12-09 20:26:36' 0]
Skipping Invalid Data with len: 0
Skipping Invalid data point [290.5175 162.028 '2016-12-11 11:10:09' 0]
Skipping Invalid data point [114.135216 22.55825 '2016-12-11 11:10:22' 0]
Skipping Invalid data point [262.117 12102.8 '2016-11-25 18:30:41' 0]
Skipping Invalid data point [114.1464 22.555567 '2016-11-25 18:30:44' 1]
Skipping Invalid data point [100.267 28028.0 '2016-12-10 22:34:23' 1]
Skipping Invalid data point [113.961121 22.55295 '2016-12-10 22:34:36' 0]


In [9]:
newData = np.array(combinedData)
newDataFrame = pd.DataFrame(newData, columns = ['occupied','unoccupied','plate'])
newDataFrame.head(10)

Unnamed: 0,occupied,unoccupied,plate
0,245.344354,145.465781,0.0
1,261.753492,174.031607,1.0
2,323.302293,226.395005,2.0
3,386.068903,145.777275,3.0
4,266.352191,267.222992,4.0
5,219.909586,173.813741,0.0
6,308.335977,131.811883,1.0
7,323.237094,177.09257,2.0
8,222.662593,131.444884,3.0
9,296.106018,150.249425,4.0


In [10]:
# remove warning
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

x = newDataFrame[['occupied', 'unoccupied']]
y = newDataFrame[['plate']]

min_max_scaler = preprocessing.MinMaxScaler()
xScaled = min_max_scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(xScaled, y, test_size=0.2, random_state=12, shuffle=True)

yTrain = ke.utils.to_categorical(y_train)
yTest = ke.utils.to_categorical(y_test)


In [30]:
from keras import layers as lr

network = ke.models.Sequential()

network.add(lr.Dense(64, input_dim=2))
network.add(lr.Activation('tanh'))

network.add(lr.Dense(128))

network.add(lr.Dense(256))
network.add(lr.Activation('relu'))

network.add(lr.Dense(128))
network.add(lr.Activation('tanh'))

network.add(lr.Dense(64))
network.add(lr.Activation('relu'))

network.add(lr.Dense(5))
network.add(lr.Activation('softmax'))
network.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 64)                192       
                                                                 
 activation_5 (Activation)   (None, 64)                0         
                                                                 
 dense_7 (Dense)             (None, 128)               8320      
                                                                 
 dense_8 (Dense)             (None, 256)               33024     
                                                                 
 activation_6 (Activation)   (None, 256)               0         
                                                                 
 dense_9 (Dense)             (None, 128)               32896     
                                                                 
 activation_7 (Activation)   (None, 128)              

In [31]:
network.compile(loss='categorical_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [32]:
network.fit(x_train, yTrain, epochs=100, batch_size=15)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7f03ac1052a0>

In [33]:
network.save("mymodel.keras")

In [34]:
model = ke.models.load_model('mymodel.keras')

In [35]:
loss, acc = model.evaluate(x_test, yTest, batch_size=15)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))


Test accuracy: 33.7%


In [36]:
test_data = pd.read_pickle('test.pkl')[0]

In [37]:
#process testdata
distanceUnoccupied = 0
distanceOccupied = 0
prevLong = test_data[0][0]
prevLat = test_data[0][1]
testing = []
for row in test_data:
    try:
        if (row[-1] == 0):
            distanceUnoccupied += hs.haversine(
                (prevLat, prevLong), (row[1], row[0]), unit=Unit.KILOMETERS)
        else:
            distanceOccupied += hs.haversine((prevLat, prevLong),
                                            (row[1], row[0]), unit=Unit.KILOMETERS)
    except Exception as e:
        print("Invalid data point", row)
    prevLat = row[1]
    prevLong = row[0]
testing.append([distanceOccupied, distanceUnoccupied])

min_max_scaler = preprocessing.MinMaxScaler()
testingScaled = min_max_scaler.fit_transform(testing)


In [38]:
predictions = model.predict(testingScaled)[0]
plate = predictions.argmax()+1
print(plate)

5
