In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from keras.losses import sparse_categorical_crossentropy
from keras.optimizers import Adam
import cv2
from utils.video_decoding import VideoDecoder
import time
import ast



In [2]:
video_data = VideoDecoder("data_gather/cctv_data/output_12_31_11.mp4")

In [3]:
def get_label(frame_number):
    """
    Get the label for the frame number

    Parameters:
         frame_number: int
    Return:
        in format [tuple(x, y),....]
    """
    with open("test/output_12_31_11.mp4_coordinates.txt", "r") as f:
        for line in f:
            if line.split(":")[0] == str(frame_number):
                values = ast.literal_eval(line.split(":")[1].rstrip().lstrip())
                values = [[x[0], x[1]] for x in values]
                return np.array(values)

In [4]:
def generate_next_frames(amount_of_frames):
    frames = []

    # looping to get 10 frames with label that is not empty --> meaning frame had 0 cars in it
    while True:
        frame = video_data.current_frame

        # if frame was empty, then we want to skip it
        if frame is None:
            continue

        # frame = cv2.resize(frame, (224, 224))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # getting label for current frame
        labels = np.array(get_label(video_data.current_frame_number))
        # checking here that current frame had at least 1 car
        # print(labels.shape)
        if labels.dtype == object or labels.dtype == np.float64 or labels.shape != (1, 2):
            video_data.next_frame()
            continue

        # combining frame and label data
        combined_data = np.array([labels[0], frame], dtype=object)
        # appending to frames
        frames.append(combined_data)

        if len(frames) == amount_of_frames:
            break

        video_data.next_frame()

    return np.array(frames)

In [5]:
get_label(350)

array([[195, 195],
       [ 15, 315]])

In [6]:
data = generate_next_frames(100)

In [7]:
len(data)

100

In [8]:
data_dict = {}
for i, x in enumerate(data):
    data_dict[i] = {
        "label": x[0],
        "frame": x[1]
    }
    print(x[1].shape, end=" ")
    print(x[0])

(480, 640) [285 165]
(480, 640) [285 165]
(480, 640) [285 165]
(480, 640) [255 165]
(480, 640) [285 165]
(480, 640) [285 165]
(480, 640) [285 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [165  15]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [285 165]
(480, 640) [255 165]
(480, 640) [315 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [315 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [255 165]
(480, 640) [2

In [9]:
data_dict[0]

{'label': array([285, 165]),
 'frame': array([[ 0,  0,  0, ...,  0,  0,  0],
        [ 1,  1,  4, ...,  0,  0,  0],
        [ 2,  1,  0, ...,  0,  0,  0],
        ...,
        [23, 24, 24, ..., 45, 36, 38],
        [24, 25, 25, ..., 51, 50, 52],
        [24, 25, 25, ..., 50, 50, 53]], dtype=uint8)}

In [10]:
for k, v in data_dict.items():
    data_dict[k]["label_number"] = (data_dict[k]["label"][0] - 1) * 640 + data_dict[k]["label"][1]

In [11]:
data_dict[0]

{'label': array([285, 165]),
 'frame': array([[ 0,  0,  0, ...,  0,  0,  0],
        [ 1,  1,  4, ...,  0,  0,  0],
        [ 2,  1,  0, ...,  0,  0,  0],
        ...,
        [23, 24, 24, ..., 45, 36, 38],
        [24, 25, 25, ..., 51, 50, 52],
        [24, 25, 25, ..., 50, 50, 53]], dtype=uint8),
 'label_number': 181925}

In [12]:
new_data = []
for k, v in data_dict.items():
    new_data.append([data_dict[k]["label_number"], data_dict[k]["frame"]])

In [13]:
X = [x[1] for x in new_data]
y = [x[0] for x in new_data]

In [14]:
X = np.array(X).astype(np.float32)
y = np.array(y)

In [15]:
X = X/255
y = y/307200

In [16]:
# y_list = [[data[0][0], data[0][1]] for data in y]

In [17]:
# y_list

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [19]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [20]:
validate_data = (X_test[:10], y_test[:10])

In [21]:
for x in y_train:
    print(x)
    print(type(x))

0.5922037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5298014322916667
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.46730143229166665
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5298014322916667
<class 'numpy.float64'>
0.5922037760416666
<class 'numpy.float64'>
0.5298014322916667
<class 'numpy.float64'>
0.46730143229166665
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5298014322916667
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5298014322916667
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.46730143229166665
<class 'numpy.float64'>
0.5297037760416666
<class 'numpy.float64'>
0.529703

In [22]:
X_train.shape

(80, 480, 640)

In [23]:
y_train.shape

(80,)

In [24]:
y_train

array([0.59220378, 0.52970378, 0.52970378, 0.52980143, 0.52970378,
       0.46730143, 0.52970378, 0.52980143, 0.59220378, 0.52980143,
       0.46730143, 0.52970378, 0.52980143, 0.52970378, 0.52980143,
       0.52970378, 0.52970378, 0.52970378, 0.52970378, 0.52970378,
       0.52970378, 0.46730143, 0.52970378, 0.52970378, 0.59220378,
       0.52970378, 0.59220378, 0.34171549, 0.52970378, 0.52970378,
       0.52970378, 0.52970378, 0.52980143, 0.52980143, 0.52970378,
       0.59220378, 0.59220378, 0.59220378, 0.52970378, 0.52970378,
       0.52980143, 0.52970378, 0.52970378, 0.65470378, 0.52970378,
       0.52970378, 0.59220378, 0.52970378, 0.52970378, 0.52970378,
       0.52970378, 0.52970378, 0.52970378, 0.65470378, 0.52970378,
       0.52970378, 0.52970378, 0.52970378, 0.52970378, 0.65470378,
       0.52970378, 0.59220378, 0.52970378, 0.52970378, 0.52970378,
       0.52970378, 0.46730143, 0.52980143, 0.52970378, 0.52970378,
       0.52970378, 0.52970378, 0.52970378, 0.52970378, 0.52970

# How to train the model
Somehow the problem needs to be turned into classification problem --> maybe give the model max error of 5 pixels from guessing the car location and labeling 0 or 1 if the model guessed all the cars within 5 pixels of the actual location or not.

In [25]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(480, 640, 1), padding='same'))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 480, 640, 32)      320       
                                                                 
 conv2d_1 (Conv2D)           (None, 480, 640, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 240, 320, 32)     0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 240, 320, 32)      0         
                                                                 
 conv2d_2 (Conv2D)           (None, 240, 320, 64)      18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 240, 320, 64)      36928     
                                                        

In [27]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])

  super().__init__(name, **kwargs)


In [28]:
print(type(X_train))
print(type(y_train))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [29]:
history = model.fit(X_train, y_train, batch_size=5, epochs=10, verbose=1, validation_data=validate_data, shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
