Processing Data

In [103]:
import pandas as pd
import numpy as np

df = pd.read_csv('mot_labels.csv')
df['category'] = df['category'].map({'car': 1, 'pedestrian': 0, 3: 3}).fillna(3).astype(int)

ids = df[["id"]].drop_duplicates()

print(ids)

#This code gets a list of all unique ID's in the dataset and stores it in ids

  df = pd.read_csv('mot_labels.csv')


              id
0        89537.0
1        89538.0
2        89539.0
3        89540.0
4        89541.0
...          ...
2890739  70589.0
2890768  70590.0
2890792  70591.0
2890813  70592.0
2890817  70593.0

[112820 rows x 1 columns]


In [104]:
#dimentions of the video
min_value = 0
max_xValue = 1280
max_yValue = 720
#Normalization is when a value is mapped between 0 and 1
#This maps the coordinants of the screen between 1 and 0
def normalize(df, min, maxX, maxY):
  #print(df['box2d.x2'])
  
  df['box2d.x1'] = (df['box2d.x1'] - min) / (maxX - min)
  df['box2d.x2'] = (df['box2d.x2'] - min) / (maxX - min)
  df['box2d.y1'] = (df['box2d.y1'] - min) / (maxY - min)
  df['box2d.y2'] = (df['box2d.y2'] - min) / (maxY - min)
  return df

In [105]:
#Gets the Object id of the object that appears most in the video, meaning that the object will have the longest path
counts = df['id'].value_counts()

# get the value that appears the most number of times
most_common_value = counts.idxmax()

print(most_common_value)

87944.0


In [106]:
#minimum length of a desired path
minLength = 150
#The maximum number of paths generated
samples = 100 #turned down for this demonstration. Was previously 5000

list = []
i = 0
LENGTH = 0

for ind in ids.index:
  
  #break out of loop if we hit maximum samples
  if i>samples:
    break
  
  #gets the path with the specific ID
  value = ids['id'][ind]
  path = df.loc[df['id'] == value] 

  #skip if the length of the path is too small
  if len(path)<minLength:
    continue
  
  #Gets the 'category','box2d.x1','box2d.x2','box2d.y1','box2d.y2', which is the important data
  path = path[['category','box2d.x1','box2d.x2','box2d.y1','box2d.y2']]
  path['box2d.x1']= path['box2d.x1'].astype(np.float64)
  path['box2d.x2']= path['box2d.x1'].astype(np.float64)
  path['box2d.y1']= path['box2d.x1'].astype(np.float64)
  path['box2d.y2']= path['box2d.x1'].astype(np.float64)
  
  #normalizes this data
  print(i)
  path = normalize(path, min_value, max_xValue, max_yValue)
  
  #appends it to list
  list.append(path)
  i+=1


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [107]:
#list contains a list of individual object paths
#Prints the path of the first object
print("Sample")
print(list[0])

Sample
       category  box2d.x1  box2d.x2  box2d.y1  box2d.y2
9171          1  0.227488  0.227488  0.404423  0.404423
9176          1  0.227850  0.227850  0.405066  0.405066
9181          1  0.227773  0.227773  0.404929  0.404929
9187          1  0.228546  0.228546  0.406304  0.406304
9193          1  0.227722  0.227722  0.404840  0.404840
...         ...       ...       ...       ...       ...
10342         1  0.235769  0.235769  0.419144  0.419144
10352         1  0.235928  0.235928  0.419428  0.419428
10361         1  0.236247  0.236247  0.419995  0.419995
10368         1  0.234972  0.234972  0.417727  0.417727
10375         1  0.233443  0.233443  0.415010  0.415010

[202 rows x 5 columns]


In [108]:
#For each sequence we give the LTSM a small window of it with length of lookback
#Given a sequence of [1, 2, 3, 4, 5, 6] and a lookback of 3, the method would return:
"""
[[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[3, 5, 6]]
"""
#We do this because we want to give the ltsm [1, 2, 3] to predict 4, then [2, 3, 4] to predict 5 and so on
def split_data(paths, lookback):
    data_raw = paths.to_numpy() # convert to numpy array
    
    data = []
    
    # create all possible sequences of length lookback
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data)
    test_set_size = int(np.round(0.2*data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)

    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    x_test = data[train_set_size:,:-1]
    y_test = data[train_set_size:,-1,:]
    
    return [x_train, y_train, x_test, y_test]

Training

In [109]:
import torch
import torch.nn as nn

lookback = 10  # choose sequence length

In [110]:
#initialize LTSM class
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out

In [111]:
input_dim = 5
hidden_dim = 32
num_layers = 2
output_dim = 5
num_epochs = 150 #turned down for this demonstration. Was 500

In [112]:
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

In [113]:
import time


hist = np.zeros(num_epochs)
start_time = time.time()
lstm = []

#Idealy, we should only train it on one sequence, but due to the limited data, we will train it accross multiple sequences
for x in range(len(list)):
    
    dataf = list[x]
    
    if dataf.empty or dataf.shape[0]<= lookback:
        continue
    
    x_train, y_train, x_test, y_test = split_data(list[x], lookback)

    x_train = torch.from_numpy(x_train).type(torch.Tensor)
    x_test = torch.from_numpy(x_test).type(torch.Tensor)
    y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor)
    y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor)
    
    for t in range(num_epochs):
        y_train_pred = model(x_train)

        loss = criterion(y_train_pred, y_train_lstm)
        #print("Epoch ", t, "MSE: ", loss.item())
        hist[t] = loss.item()

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
    print("Data ", x, "MSE: ", loss.item())
    
training_time = time.time()-start_time
print("Training time: {}".format(training_time))


Data  0 MSE:  0.0002041455009020865
Data  1 MSE:  7.186168659245595e-06
Data  2 MSE:  8.661724677949678e-06
Data  3 MSE:  4.687935415859101e-06
Data  4 MSE:  6.113641575211659e-06
Data  5 MSE:  1.0578409273875877e-05
Data  6 MSE:  1.1384614481357858e-05
Data  7 MSE:  5.2960945140512194e-06
Data  8 MSE:  7.787951290083583e-06
Data  9 MSE:  2.1761268726550043e-05
Data  10 MSE:  0.0010517039336264133
Data  11 MSE:  0.0003122840716969222
Data  12 MSE:  0.00022397132124751806
Data  13 MSE:  6.762067641830072e-05
Data  14 MSE:  0.0004992866306565702
Data  15 MSE:  0.00020765162480529398
Data  16 MSE:  2.0578776457114145e-05
Data  17 MSE:  1.0877552995225415e-05
Data  18 MSE:  2.1493595340871252e-05
Data  19 MSE:  1.7314569049631245e-05
Data  20 MSE:  1.5929665096336976e-05
Data  21 MSE:  1.739465369610116e-05
Data  22 MSE:  1.1309490218991414e-05
Data  23 MSE:  3.306853977846913e-05
Data  24 MSE:  1.6327032426488586e-05
Data  25 MSE:  0.004059011582285166
Data  26 MSE:  0.0001360336609650403

In [114]:
import pickle
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [115]:

print(x_test)

print(model(x_test))

tensor([[[1.0000, 0.1038, 0.1038, 0.1845, 0.1845],
         [1.0000, 0.1136, 0.1136, 0.2020, 0.2020],
         [1.0000, 0.1103, 0.1103, 0.1960, 0.1960],
         ...,
         [1.0000, 0.1295, 0.1295, 0.2301, 0.2301],
         [1.0000, 0.1393, 0.1393, 0.2477, 0.2477],
         [1.0000, 0.1413, 0.1413, 0.2512, 0.2512]],

        [[1.0000, 0.1136, 0.1136, 0.2020, 0.2020],
         [1.0000, 0.1103, 0.1103, 0.1960, 0.1960],
         [1.0000, 0.1235, 0.1235, 0.2196, 0.2196],
         ...,
         [1.0000, 0.1393, 0.1393, 0.2477, 0.2477],
         [1.0000, 0.1413, 0.1413, 0.2512, 0.2512],
         [1.0000, 0.1449, 0.1449, 0.2576, 0.2576]],

        [[1.0000, 0.1103, 0.1103, 0.1960, 0.1960],
         [1.0000, 0.1235, 0.1235, 0.2196, 0.2196],
         [1.0000, 0.1235, 0.1235, 0.2196, 0.2196],
         ...,
         [1.0000, 0.1413, 0.1413, 0.2512, 0.2512],
         [1.0000, 0.1449, 0.1449, 0.2576, 0.2576],
         [1.0000, 0.1449, 0.1449, 0.2576, 0.2576]],

        ...,

        [[1.0000, 0.

In [116]:
def unNormalize(df, min, maxX, maxY):
    if(df.ndim ==3):
        for i in range(df.shape[0]):
        
        
            #for j in range(df.shape[1]):
            df[i][1] = (df[i][1]  + min) * (maxX - min)
            df[i][2] = (df[i][2]  + min) * (maxX - min)
            df[i][3] = (df[i][3]  + min) * (maxY - min)
            df[i][4] = (df[i][4]  + min) * (maxY - min)
        
    if(df.ndim ==2):
        for i in range(df.shape[0]):
            print("wow")
            
            what  = df[i][3]
            print(what)
            print((what  + min) * (maxY - min))
            df[i][1] = (df[i][1]  + min) * (maxX - min)
            df[i][2] = (df[i][2]  + min) * (maxX - min)
            df[i][3] = (what  + min) * (maxY - min)
            df[i][4] = (df[i][4]  + min) * (maxY - min)
    return df


Prediction Example

In [117]:
#We see an example of a prediction it would give
print("x_test")
print(x_test)
#unNorm = unNormalize(x_test,  min_value, max_xValue, max_yValue)

#print("Unnormalized test")
#print(unNorm)

prediction = model(x_test)
print("prediction:")
print(prediction)

#unNormPred = unNormalize(prediction,  min_value, max_xValue, max_yValue)

#print(unNormPred)




x_test
tensor([[[1.0000, 0.1038, 0.1038, 0.1845, 0.1845],
         [1.0000, 0.1136, 0.1136, 0.2020, 0.2020],
         [1.0000, 0.1103, 0.1103, 0.1960, 0.1960],
         ...,
         [1.0000, 0.1295, 0.1295, 0.2301, 0.2301],
         [1.0000, 0.1393, 0.1393, 0.2477, 0.2477],
         [1.0000, 0.1413, 0.1413, 0.2512, 0.2512]],

        [[1.0000, 0.1136, 0.1136, 0.2020, 0.2020],
         [1.0000, 0.1103, 0.1103, 0.1960, 0.1960],
         [1.0000, 0.1235, 0.1235, 0.2196, 0.2196],
         ...,
         [1.0000, 0.1393, 0.1393, 0.2477, 0.2477],
         [1.0000, 0.1413, 0.1413, 0.2512, 0.2512],
         [1.0000, 0.1449, 0.1449, 0.2576, 0.2576]],

        [[1.0000, 0.1103, 0.1103, 0.1960, 0.1960],
         [1.0000, 0.1235, 0.1235, 0.2196, 0.2196],
         [1.0000, 0.1235, 0.1235, 0.2196, 0.2196],
         ...,
         [1.0000, 0.1413, 0.1413, 0.2512, 0.2512],
         [1.0000, 0.1449, 0.1449, 0.2576, 0.2576],
         [1.0000, 0.1449, 0.1449, 0.2576, 0.2576]],

        ...,

        [[1.0