In [1]:
import os
from os.path import join
import sys
import cv2 as cv
import numpy as np
from numpy.random import RandomState
import pickle
import matplotlib.pyplot as plt

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def load_labels(specific_video=None):
    """ Loads in image data as numpy arrays """
    sequence = []
    none_count = 0 
    filedir = join(os.getcwd(),"labels")
    for file in os.listdir(filedir):
        ## change current seq when video_id change or marker number changes
        if file.endswith(".jpg"):
            file = file.split(".")[0]
            file = file.split("_")
            if specific_video == None:
                video_id, marker_num, marker_type, frame_num, x_pos, y_pos = file[0], int(file[1]), int(file[2]), int(file[3]), int(file[4]), int(file[5])
                current_seq = [video_id, marker_num, marker_type, frame_num, x_pos, y_pos]
                sequence.append(current_seq)
            else:
                if file[0] == video_id:
                    video_id, marker_num, marker_type, frame_num, x_pos, y_pos = file[0], int(file[1]), int(file[2]), int(file[3]), int(file[4]), int(file[5])
                    current_seq = [video_id, marker_num, marker_type, frame_num, x_pos, y_pos]
                    sequence.append(current_seq)
    return sequence

In [3]:
def sort_labels(sequence):
    sequence.sort(key=lambda x: x[3]) ## Sort by frame number
    sequence.sort(key=lambda x: x[1]) ## Sort by marker_num
    sequence.sort(key=lambda x: x[0]) ## Sort by video_name
    return sequence

In [4]:
def assign_labels(sequence):
    ##  ball change or vid change
    prev_vid_id, prev_marker_num = sequence[0][0][0], sequence[0][0][1]
    for idx, seq in enumerate(sequence):
        vid_id, marker_num = seq[0], seq[1]
        if (vid_id != prev_vid_id) or (marker_num != prev_marker_num):
            prev_vid_id, prev_marker_num = vid_id, marker_num
            prev_coords, prev_frame_num = np.array([-1,-1]), -1
        frame_num, x_pos, y_pos = seq[3], seq[4], seq[5]
        current_coords = np.array([x_pos, y_pos])
        if (prev_coords[0] == -1) & (prev_coords[1] == -1):
            dist = -1
        else:
            dist = np.linalg.norm(current_coords - prev_coords)
        if (prev_frame_num == -1):
            frame_diff = -1
        else:
            frame_diff = frame_num - prev_frame_num
        prev_coords = current_coords
        prev_frame_num = frame_num
        sequence[idx].append(dist)
        sequence[idx].append(frame_diff)
    return sequence

In [5]:
def load_video(video_path, video_name, flip, display = False): ## Convert 3rd element in video name into flip
    width, height = 960, 540
    cap = cv.VideoCapture(join(video_path,video_name))
    ret, frame = cap.read()
    if (flip):
        frame = cv.flip(frame, 0)
    clone = cv.resize(frame, (width,height))
    if (display):
        cv.namedWindow("Video")
    frame_num = 0
    frames = []
    while (ret):
        if (display):
            cv.imshow("Video", clone)
        frames.append( [frame, frame_num] )
        if (display):
            key = cv.waitKey(0)
            if key == 113:
                break
        ret, frame = cap.read()
        if (ret):
            frame_num += 1
            if (flip):
                frame = cv.flip(frame, 0)
            clone = cv.resize(frame, (width, height))
    print("Frames: {}".format(frames[-1][1] + 1))
    cap.release()
    if (display):
        cv.destroyAllWindows()
    return frames

In [16]:
def load_data_and_labels(sequence, vid_format=".avi"):
    print("Loading videos")
    filedir = join(os.getcwd(),"resources")
    video_recorded = []
    for file in os.listdir(filedir):
        video_id = file.split(".")[0]
        video_recorded.append(video_id)
    video_annotated = list(sorted(set([i[0] for i in sequence])))
    video_data = []
    """ Checking the videos annotated is in the video recorded """
    for rec in video_recorded:
        if rec in video_annotated:
            data = []
            print("#####################################")
            print("Found: {}".format(rec))
            labels = [i for i in sequence if i[0] == rec]
            labels.sort(key=lambda x: x[3]) ## Sort by frame number
            labels = np.asarray(labels)[:,1:].astype('float32')
            print(labels)
            print("#####################################")
            frames = load_video(filedir, rec + vid_format, False)
            frames = np.asarray(frames)
            print(frames.shape)
            prev_frame = 0
            frame_labels = []
            print(len(labels))
            for idx, label in enumerate(labels):
                print("idx: {}, curr_frame = {}".format(idx, int(label[2])))
                curr_frame = int(label[2])
                if (curr_frame != prev_frame):
                    data.append([frames[prev_frame][0], frame_labels])                    
                    frame_labels = [label]
                elif idx == len(labels) - 1:
                    frame_labels.append(label)
                    data.append([frames[curr_frame][0],frame_labels])
                else:
                    frame_labels.append(label)
                prev_frame = int(curr_frame)
            video_data.append(data)
    print("Search Complete")
    return video_data    

In [17]:
def data_to_np(data):
    data_np = np.asarray(data)
    x_values = []
    y_values = []
    
    for i in range(len(data_np)):
        print("Frames in video {}: {}".format(i,len(data_np[i])))            
    
    for vid_pos in range(len(data_np)):
        ##print("vid: {}".format(vid_pos))
        
        ### loop through the number of frames not the points capturedd
        
        for frame_pos in range(len(data_np[vid_pos])):
            ##print("frame num: {}".format(frame_pos))
            x_np = data_np[vid_pos][frame_pos][0]
            x_shape = list(x_np[0].shape)
            x_shape[:0] = [len(x_np)]
            x_np = np.concatenate(x_np).reshape(x_shape)
            x_values.append(x_np)

            y_np = data_np[vid_pos][frame_pos][1]
            y_np = np.asarray(y_np)
            zero_np = np.zeros((16,7))
            if y_np.shape != (0,):
                zero_np[:y_np.shape[0],:y_np.shape[1]] = y_np
            y_values.append(zero_np)
        
    x_img = np.asarray(x_values)
    y_values = np.asarray(y_values)
    x_diff = y_values[:, :, 5:]
    y_cords = y_values[:, :, 3:5]
    print("x_img shape: {}, x_diff shape: {}, y_values shape: {}".format(x_img.shape, x_diff.shape, y_cords.shape))
    return x_img, x_diff, y_cords

In [18]:
def load_data():
    seq = load_labels()
    seq = sort_labels(seq)
    seq = assign_labels(seq)
    data = load_data_and_labels(seq)
    x_img, x_diff, y = data_to_np(data)
    return x_img, x_diff, y

In [19]:
x_img, x_diff, y = load_data()

Loading videos
#####################################
Found: v0-0
[[  0.          1.          0.        ... 531.         -1.
   -1.       ]
 [  1.          2.          0.        ... 719.         -1.
   -1.       ]
 [  2.          1.          0.        ... 883.         -1.
   -1.       ]
 ...
 [  0.          1.        149.        ... 528.          4.
    1.       ]
 [  1.          2.        149.        ... 721.          5.3851647
    1.       ]
 [  2.          1.        149.        ... 878.          2.
    1.       ]]
#####################################
Frames: 150
(150, 2)
450
idx: 0, curr_frame = 0
idx: 1, curr_frame = 0
idx: 2, curr_frame = 0
idx: 3, curr_frame = 1
idx: 4, curr_frame = 1
idx: 5, curr_frame = 1
idx: 6, curr_frame = 2
idx: 7, curr_frame = 2
idx: 8, curr_frame = 2
idx: 9, curr_frame = 3
idx: 10, curr_frame = 3
idx: 11, curr_frame = 3
idx: 12, curr_frame = 4
idx: 13, curr_frame = 4
idx: 14, curr_frame = 4
idx: 15, curr_frame = 5
idx: 16, curr_frame = 5
idx: 17, curr_fra

Frames: 150
(150, 2)
450
idx: 0, curr_frame = 0
idx: 1, curr_frame = 0
idx: 2, curr_frame = 0
idx: 3, curr_frame = 1
idx: 4, curr_frame = 1
idx: 5, curr_frame = 1
idx: 6, curr_frame = 2
idx: 7, curr_frame = 2
idx: 8, curr_frame = 2
idx: 9, curr_frame = 3
idx: 10, curr_frame = 3
idx: 11, curr_frame = 3
idx: 12, curr_frame = 4
idx: 13, curr_frame = 4
idx: 14, curr_frame = 4
idx: 15, curr_frame = 5
idx: 16, curr_frame = 5
idx: 17, curr_frame = 5
idx: 18, curr_frame = 6
idx: 19, curr_frame = 6
idx: 20, curr_frame = 6
idx: 21, curr_frame = 7
idx: 22, curr_frame = 7
idx: 23, curr_frame = 7
idx: 24, curr_frame = 8
idx: 25, curr_frame = 8
idx: 26, curr_frame = 8
idx: 27, curr_frame = 9
idx: 28, curr_frame = 9
idx: 29, curr_frame = 9
idx: 30, curr_frame = 10
idx: 31, curr_frame = 10
idx: 32, curr_frame = 10
idx: 33, curr_frame = 11
idx: 34, curr_frame = 11
idx: 35, curr_frame = 11
idx: 36, curr_frame = 12
idx: 37, curr_frame = 12
idx: 38, curr_frame = 12
idx: 39, curr_frame = 13
idx: 40, curr_f

Frames: 150
(150, 2)
343
idx: 0, curr_frame = 33
idx: 1, curr_frame = 34
idx: 2, curr_frame = 35
idx: 3, curr_frame = 36
idx: 4, curr_frame = 36
idx: 5, curr_frame = 37
idx: 6, curr_frame = 37
idx: 7, curr_frame = 38
idx: 8, curr_frame = 38
idx: 9, curr_frame = 38
idx: 10, curr_frame = 39
idx: 11, curr_frame = 39
idx: 12, curr_frame = 39
idx: 13, curr_frame = 40
idx: 14, curr_frame = 40
idx: 15, curr_frame = 40
idx: 16, curr_frame = 41
idx: 17, curr_frame = 41
idx: 18, curr_frame = 41
idx: 19, curr_frame = 42
idx: 20, curr_frame = 42
idx: 21, curr_frame = 42
idx: 22, curr_frame = 43
idx: 23, curr_frame = 43
idx: 24, curr_frame = 43
idx: 25, curr_frame = 44
idx: 26, curr_frame = 44
idx: 27, curr_frame = 44
idx: 28, curr_frame = 45
idx: 29, curr_frame = 45
idx: 30, curr_frame = 45
idx: 31, curr_frame = 46
idx: 32, curr_frame = 46
idx: 33, curr_frame = 46
idx: 34, curr_frame = 47
idx: 35, curr_frame = 47
idx: 36, curr_frame = 47
idx: 37, curr_frame = 48
idx: 38, curr_frame = 48
idx: 39, c

Frames: 150
(150, 2)
415
idx: 0, curr_frame = 0
idx: 1, curr_frame = 0
idx: 2, curr_frame = 1
idx: 3, curr_frame = 1
idx: 4, curr_frame = 2
idx: 5, curr_frame = 2
idx: 6, curr_frame = 3
idx: 7, curr_frame = 3
idx: 8, curr_frame = 4
idx: 9, curr_frame = 4
idx: 10, curr_frame = 5
idx: 11, curr_frame = 5
idx: 12, curr_frame = 6
idx: 13, curr_frame = 6
idx: 14, curr_frame = 7
idx: 15, curr_frame = 7
idx: 16, curr_frame = 8
idx: 17, curr_frame = 8
idx: 18, curr_frame = 9
idx: 19, curr_frame = 9
idx: 20, curr_frame = 10
idx: 21, curr_frame = 10
idx: 22, curr_frame = 11
idx: 23, curr_frame = 11
idx: 24, curr_frame = 12
idx: 25, curr_frame = 12
idx: 26, curr_frame = 13
idx: 27, curr_frame = 13
idx: 28, curr_frame = 14
idx: 29, curr_frame = 14
idx: 30, curr_frame = 15
idx: 31, curr_frame = 15
idx: 32, curr_frame = 16
idx: 33, curr_frame = 16
idx: 34, curr_frame = 17
idx: 35, curr_frame = 17
idx: 36, curr_frame = 18
idx: 37, curr_frame = 18
idx: 38, curr_frame = 19
idx: 39, curr_frame = 19
idx: 

Frames: 150
(150, 2)
176
idx: 0, curr_frame = 0
idx: 1, curr_frame = 64
idx: 2, curr_frame = 64
idx: 3, curr_frame = 65
idx: 4, curr_frame = 65
idx: 5, curr_frame = 66
idx: 6, curr_frame = 66
idx: 7, curr_frame = 67
idx: 8, curr_frame = 67
idx: 9, curr_frame = 68
idx: 10, curr_frame = 68
idx: 11, curr_frame = 69
idx: 12, curr_frame = 69
idx: 13, curr_frame = 70
idx: 14, curr_frame = 70
idx: 15, curr_frame = 70
idx: 16, curr_frame = 71
idx: 17, curr_frame = 71
idx: 18, curr_frame = 71
idx: 19, curr_frame = 72
idx: 20, curr_frame = 72
idx: 21, curr_frame = 73
idx: 22, curr_frame = 73
idx: 23, curr_frame = 74
idx: 24, curr_frame = 74
idx: 25, curr_frame = 75
idx: 26, curr_frame = 75
idx: 27, curr_frame = 76
idx: 28, curr_frame = 76
idx: 29, curr_frame = 76
idx: 30, curr_frame = 77
idx: 31, curr_frame = 77
idx: 32, curr_frame = 77
idx: 33, curr_frame = 78
idx: 34, curr_frame = 78
idx: 35, curr_frame = 78
idx: 36, curr_frame = 79
idx: 37, curr_frame = 79
idx: 38, curr_frame = 79
idx: 39, cu

Frames: 150
(150, 2)
248
idx: 0, curr_frame = 56
idx: 1, curr_frame = 57
idx: 2, curr_frame = 67
idx: 3, curr_frame = 68
idx: 4, curr_frame = 68
idx: 5, curr_frame = 69
idx: 6, curr_frame = 69
idx: 7, curr_frame = 70
idx: 8, curr_frame = 70
idx: 9, curr_frame = 71
idx: 10, curr_frame = 71
idx: 11, curr_frame = 72
idx: 12, curr_frame = 72
idx: 13, curr_frame = 73
idx: 14, curr_frame = 73
idx: 15, curr_frame = 73
idx: 16, curr_frame = 74
idx: 17, curr_frame = 74
idx: 18, curr_frame = 74
idx: 19, curr_frame = 75
idx: 20, curr_frame = 75
idx: 21, curr_frame = 75
idx: 22, curr_frame = 76
idx: 23, curr_frame = 76
idx: 24, curr_frame = 76
idx: 25, curr_frame = 77
idx: 26, curr_frame = 77
idx: 27, curr_frame = 77
idx: 28, curr_frame = 77
idx: 29, curr_frame = 78
idx: 30, curr_frame = 78
idx: 31, curr_frame = 78
idx: 32, curr_frame = 78
idx: 33, curr_frame = 79
idx: 34, curr_frame = 79
idx: 35, curr_frame = 79
idx: 36, curr_frame = 79
idx: 37, curr_frame = 80
idx: 38, curr_frame = 80
idx: 39, c

x_img shape: (962, 720, 1280, 3), x_diff shape: (962, 16, 2), y_values shape: (962, 16, 2)


In [None]:
# need to normalise y values
# need to build a model that includes distance and frame diff
# model predicting x and y values

# going to build model that predicts coords from image

In [None]:
def normalise_img(x_values):
    return x_values / 255

In [None]:
def cnn_prepare(x_value, y_value):
    #x_val = np.reshape(x_value,(x_value.shape[0] * x_value.shape[1], 
    #                            x_value.shape[2], 
    #                            x_value.shape[3], 
    #                            x_value.shape[4]))
    x_val = normalise_img(x_value)
    y_val = np.reshape(y_value,(y_value.shape[0],
                                y_value.shape[1] * y_value.shape[2]))
    return x_val, y_val

In [None]:
def random_np(x_np, y_np):
    prng = RandomState(0)
    randomise = prng.permutation(x_np.shape[0])
    x_np = x_np[randomise]
    y_np = y_np[randomise]
    return x_np, y_np

In [None]:
def split_np(x_data, y_data, percent):
    """ splits a numpy array into testing and training """
    position = int(len(x_data) * (1-percent))
    x_train, x_test = x_data[:position], x_data[position:]
    y_train, y_test = y_data[:position], y_data[position:]
    print('x_train shape: {}, x_test shape: {}'.format(x_train.shape,x_test.shape))
    print('y_train shape: {}, y_test shape: {}'.format(y_train.shape,y_test.shape))
    return x_train, y_train, x_test, y_test

In [None]:

x_img, y = random_np(x_img, y)

x_cnn, y_cnn = cnn_prepare(x_img, y)
#x_cnn, y_cnn = x_img, y
x_train, y_train, x_test, y_test = split_np(x_cnn, y_cnn, 0.2)

## remove useless variables
x_img, x_diff, y, x_cnn, y_cnn = None, None, None, None, None

In [None]:
## prepare data to pickle
import pickle

data = [x_train, y_train, x_test, y_test]
pickle.dump(data, open("./aws/data_rand.p","wb"))
print("Finished")

In [None]:
input_shape = x_train.shape[1:]
output_shape = y_train.shape[1]

model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3),
                activation='relu',
                input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, (3, 3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, (3, 3),activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(output_shape))
model.compile(loss='mean_squared_error', 
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="dnn/tmp_best_weights.hdf5", verbose=0, save_best_only=True) # save best model

batch_size = 4
epochs = 1000
import time
start_time = time.time()

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test),
          callbacks=[monitor,checkpointer])
model.load_weights('dnn/tmp_best_weights.hdf5') # load weights from best model


save_dir = join(os.getcwd(),"dnn")
save_path = join(save_dir,str(int(start_time)) + "_cnn.h5")
model.save(save_path)

score = model.evaluate(x_test, y_test, verbose=2)
print('Test loss: {}'.format(score[0]))
print('Test accuracy: {}'.format(score[1]))

elapsed_time = time.time() - start_time
print("Elapsed time: {}".format(hms_string(elapsed_time)))