In [1]:
import cv2
import numpy as np
import pandas as pd
from keras.layers import Conv2D, Flatten, Dense, LSTM, TimeDistributed, \
    MaxPooling2D
from keras.models import Sequential

IMAGE_SIZE = (64, 64, 3)

def extract_training_data(filename, csv_filename, image_size=(64, 64, 3)):
    cap = cv2.VideoCapture(filename)
    labels = pd.read_csv(csv_filename, sep="\t")

    frame_counter = 0
    processed_frames = []

    training_images = []
    training_label_ids = []
    while True:
        frame_counter += 1
        result, frame = cap.read()
        if result and frame_counter % 12 == 0:
            # cv2.imshow("img", frame)
            resized = cv2.resize(frame, image_size[:2])
            processed_frames.append(resized)
            if len(processed_frames) >= 4:
                # cv2.imshow('frame', resized)

                # stacked_image = np.concatenate(processed_frames, axis=2)
                # training_labels.append(labels[frame_counter])
                training_images.append(processed_frames.copy())
                training_label_ids.append(frame_counter)
                processed_frames.pop(0)

        if cv2.waitKey(1) & 0xFF == ord('q') or not result:
            break

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()

    training_images = np.array(training_images)
    training_labels = labels.loc[training_label_ids]

    assert training_images.shape[0] == training_labels.shape[0]
    return training_images, training_labels



Using TensorFlow backend.


In [2]:

images, labels = extract_training_data("Data/20171029-201949.h264.avi", "Data/20171029-201949.h264.csv", IMAGE_SIZE)

In [3]:
model = Sequential()
model.add(TimeDistributed(Conv2D(32, (3, 3), kernel_initializer="he_normal", activation='relu'),
                          input_shape=(4, 64, 64, 3)))
model.add(TimeDistributed(MaxPooling2D((2, 2))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(4, activation='linear'))

model.compile(loss="mse", optimizer="adam")
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_1 (TimeDist (None, 4, 62, 62, 32)     896       
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 31, 31, 32)     0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, 4, 30752)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                3940480   
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 132       
Total params: 3,941,508
Trainable params: 3,941,508
Non-trainable params: 0
_________________________________________________________________


In [4]:
print(labels.head())
y_train = labels.as_matrix(columns=labels.columns[1:])

prediction = model.predict(images)

for i in range(len(prediction)):
    if i % 10 == 0:
        print(np.mean(images[i], axis=(1, 2, 3)), prediction[i])

# for i in range(len(images)):
#     # print(np.mean(images[i], axis=(1,2,3)))
#     print(labels.head(10))

history = model.fit(images, y_train, batch_size=64, epochs=5, validation_split=0.04)

    Timestamp_s  Steering  Braking  Throttle  Gear
48     0.800033 -0.126390      0.0       1.0   2.0
60     1.000025  0.270588      0.0       1.0   2.0
72     1.200017  0.242210      0.0       1.0   2.0
84     1.400009  0.163224      0.0       1.0   1.0
96     1.600001 -0.268356      0.0       1.0   0.0
[ 104.18440755  110.47045898  101.62109375  102.18180339] [-0.42458004 -0.30166167 -0.09278001  0.42026782]
[ 94.78483073  99.2582194   99.39916992  99.93066406] [-0.40404826  0.39874944  0.53612435  0.51450998]
[  91.58040365  109.36092122  108.70768229  111.84090169] [-0.00253846  0.19317962  0.41751745  0.62058353]
[ 106.91145833  110.4991862   112.81437174  113.81966146] [-0.12738128  0.18774994 -0.34626338  0.64406598]
[ 104.86889648  100.56681315   99.45727539  100.4152832 ] [-0.32616454 -0.18462539 -0.61047947 -0.17227808]
[ 102.41105143  107.25227865  103.63614909  112.02213542] [ 0.47756708 -0.27511597  0.02464648 -0.11309636]
[ 106.71199544  102.18066406  102.61246745  106.07

[ 89.9769694   94.38248698  96.66853841  94.27791341] [ 0.57626748  0.37536746 -0.31726745  0.09904946]
[ 91.45222982  89.97949219  97.12084961  99.20125326] [ 0.00421996 -0.27704948  0.41033089  0.36036533]
[ 104.66935221  104.62915039  102.21679688  105.78125   ] [-0.20843866  0.34921122  0.50951064  0.80383688]
[ 103.62858073  102.66682943  100.45035807   98.30061849] [ 0.65331894  0.48212248  0.29119566  0.48312908]
[ 95.55777995  96.82698568  93.87076823  97.83154297] [ 0.58695716 -0.24035263 -0.30878383 -0.26891688]
[ 106.50065104  101.16943359  101.24886068  104.86499023] [-0.44353026 -0.28873795  0.72490406  0.75995463]
[ 101.81437174   97.7203776    91.89347331   95.20564779] [-0.07320574  0.18180378  0.09504177  1.01668489]
[ 93.12866211  96.75398763  95.76546224  95.12556966] [-0.13522604 -0.11790133 -0.25357682 -0.09420819]
[ 95.11027018  96.71370443  98.08072917  94.41438802] [ 0.33284265  0.01783253  0.28813875  0.3933371 ]
[ 107.6632487   106.94620768  106.27913411  106.

Train on 1671 samples, validate on 70 samples
Epoch 1/5
 128/1671 [=>............................] - ETA: 53s - loss: 0.8740

KeyboardInterrupt: 

In [None]:
images, _ = extract_training_data("Data/20171029-201639.h264.avi", "Data/20171029-201639.h264.csv", IMAGE_SIZE)

prediction = model.predict(images)

for i in range(len(prediction)):
    if i % 10 == 0:
        print(np.mean(images[i], axis=(1,2,3)), prediction[i])

# #
# for i in range(len(images)):
#     print(np.mean(images[i], axis=(1,2,3)))