# Grand Theft Auto V Driving learning with Deep Learning (CNN and YOLO)
---
Self driving car in Grand Theft Auto V with Deep Learning and Object Detection. Adapted from
https://github.com/eritzyg/GTAV-Self-driving-car
by Iker Garcia and Eritz Yerga.



### Authors: Evan Miller, Allan Bourke



---
## Index:

0. <a href="#0.-Object-Detection">Object Detection</a>
1. <a href="#1.-Generate-dataset">Generate dataset</a>
    * <a href="#Frame-capture-functions">Frame capture functions</a>
    * <a href="#Image-preprocessing-functions">Image preprocess functions</a>
    * <a href="#Game-control-and-input-reading-functions">Game control and input reading functions</a>
    * <b><a href="#Generate-dataset">Generate dataset</a></b>
2. <a href="#2.-Dataset-processing-utilities">Dataset processing utilities</a>
3. <a href="#3.-Define-the-model">Define our model</a>
4. <a href="#4.-Train">Train</a>
5. <a href="#5.-Run-our-model-in-the-game">Run our model in the game</a>

<a href=""></a>

---
First we import the libraries we are going to need to run this notebook.

In [2]:
import numpy as np
from PIL import ImageGrab
import cv2 
import time
from sys import stdout
from IPython.display import clear_output
import os
from grabber import Grabber
import threading
import matplotlib.pyplot as plt
from collections import Counter
from random import shuffle
import glob
import tensorflow.keras
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import TimeDistributed, LSTM, Flatten, Dense, InputLayer, MaxPooling2D, Dropout, Activation, Embedding, GRU, ConvLSTM2D, concatenate
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras import optimizers
from tensorflow.keras.models import load_model
from tensorflow.keras import initializers
import h5py
import log
from heapq import nlargest

# 0. Object Detection
By Evan Miller

This section utilizes the Yolov3 object detection model with pretrained weights for driving created by Lavanya Shukla on Kaggle. https://www.kaggle.com/lavanyashukla01/yolov3-lyft-dataset/?select=model.h5 

In [3]:
# Loads the model from the file
obj_model = load_model('yolo.h5', compile=False)

In [4]:
#By Lavanya Shukla on Kaggle.

#Custom class for functionality needed to extract bounding boxes from the YOLO model

class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)

        return self.label

    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
 
        return self.score
 
def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def _softmax(x, axis=-1):
    x = x - np.amax(x, axis, keepdims=True)
    e_x = np.exp(x)
    
    return e_x / e_x.sum(axis, keepdims=True)
 
def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4]   = _sigmoid(netout[..., 4])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i // grid_w
        col = i % grid_w
        
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[row, col, b, 4]
            
            if(objectness <= obj_thresh): continue
            
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[row,col,b,:4]

            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
            
            # last elements are class probabilities
            classes = netout[row,col,b,5:]
            
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)

            boxes.append(box)

    return boxes

#TODO: Minimize implementation for images that don't need resizing
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    if (float(net_w)/image_w) < (float(net_h)/image_h):
        new_w = net_w
        new_h = (image_h*net_w)/image_w
    else:
        new_h = net_w
        new_w = (image_w*net_h)/image_h
        
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
        

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b
    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2,x4) - x3

def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    intersect = intersect_w * intersect_h
    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    union = w1*h1 + w2*h2 - intersect
    return float(intersect) / union

def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
        
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0

# load and prepare an image
def load_image_pixels(filename, shape):
    # load the image to get its shape
    image = load_img(filename)
    width, height = image.size
    # load the image with the required size
    image = load_img(filename, target_size=shape)
    # convert to numpy array
    image = img_to_array(image)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0
    # add a dimension so that we have one sample
    image = expand_dims(image, 0)
    return image, width, height

# get all of the results above a threshold
def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
                # don't break, many labels may trigger for one box
    return v_boxes, v_labels, v_scores

# draw all results
def draw_boxes(filename, v_boxes, v_labels, v_scores):
    # load the image
    data = pyplot.imread(filename)
    # plot the image
    pyplot.imshow(data)
    # get the context for drawing boxes
    ax = pyplot.gca()
    # plot each box
    for i in range(len(v_boxes)):
        box = v_boxes[i]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        # calculate width and height of the box
        width, height = x2 - x1, y2 - y1
        # create the shape
        rect = Rectangle((x1, y1), width, height, fill=False, color='white')
        # draw the box
        ax.add_patch(rect)
        # draw text and score in top left corner
        label = "%s (%.3f)" % (v_labels[i], v_scores[i])
        pyplot.text(x1, y1, label, color='white')
    # show the plot
    pyplot.show()

In [5]:
#Takes a normalized rgb image and returns a tuple of two image arrays. The first has the objects and the second has everything else.
#Adapted from Lavanya Shukla on Kaggle. Written by Evan Miller.

def get_sub_images(IMAGE):
    # declare labels for the model
    labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat"]
    
    # get image size and model paramaters
    WIDTH, HEIGHT = 416,416
    
    # Set starting anchor positions and threshold for detecting an object
    anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
    class_threshold = 0.3
    
    # normalize image and add a dimension
    IMAGE = IMAGE.astype('float32')
    IMAGE /= 255.0
    inpt = np.expand_dims(IMAGE, 0)
    
    # Use YOLO to find bounding boxes
    yhat = obj_model.predict(inpt)
    
    # Create boxes
    boxes = list()
    for i in range(len(yhat)):
        # decode the output of the network
        boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, HEIGHT, WIDTH)
    
    # correct the sizes of the bounding boxes for the shape of the image
    correct_yolo_boxes(boxes, HEIGHT, WIDTH, HEIGHT, WIDTH)
    
    # suppress non-maximal boxes
    do_nms(boxes, 0.5)
    
    # get the details of the detected objects
    v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
    
    # values are True where there is an object
    mask_object = np.zeros((HEIGHT,WIDTH,3), dtype=bool)
    
    for x in range(len(v_boxes)):
        box = v_boxes[x]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        # Loop over area of each box
        for i in range(x1,x2):
            if i >= 416:
                continue
            for j in range(y1,y2):
                if j >= 416:
                    continue
                
                mask_object[j][i] = [True,True,True]
                
    mask_background = ~mask_object
    
    
    objects = np.zeros_like(IMAGE)
    background = np.zeros_like(IMAGE)
    
    # Apply masks to the image to get objwct and background arrays
    objects[mask_object] = IMAGE[mask_object]
    background[mask_background] = IMAGE[mask_background]
   
    
    return (objects, background)

# 1. Generate dataset

We will generate the dataset for training the model later on, it is important that we set the game on the first person view and take into account certain conditions for the dataset. Check the "How To Guide" [here](http://www.waynenterprises.com/ai-ml) for more information on this.

In [6]:
from game_control import PressKey, ReleaseKey
from getkeys import key_check

### Frame capture functions
These functions capture the game's frames in 1280x1024 Windowed mode.

Screen record is the method to get one frame and img_thread is the thread we will later use to constantly capture the game's output.

In [7]:
global grb
grb = Grabber(bbox=(126,26,1152,1025))
def screen_record(method = 'ImageGrab'):
    if method == 'ImageGrab':
        printscreen =  ImageGrab.grab(bbox=(126,26,1152,1025))
        generalIMG = np.array(printscreen)
    
    elif method == 'grabber':
        global grb
        printscreen = None
        printscreen = grb.grab(printscreen)
        generalIMG = np.array(printscreen)
    
    return generalIMG          

In [8]:
global front_buffer
global back_buffer
front_buffer = np.zeros((1024, 1024), dtype=np.int8)
back_buffer = np.zeros((1024, 1024), dtype=np.int8)

global fps
fps = 0

def img_thread():
    global front_buffer
    global back_buffer
    global fps
    
    last_time = time.time()
    while True:
        front_buffer = screen_record()
        # Swap buffers
        front_buffer, back_buffer = back_buffer, front_buffer
        fps = int(1.0/(time.time()-last_time))
        last_time = time.time()
        
        if 'J' in key_check():
            break
    return
    

### Image preprocessing functions

We will define a function that will apply the preprocessing we want to the images:

In [9]:
# Code inspired by https://towardsdatascience.com/image-pre-processing-c1aec0be3edf
# Ideas written by Prince Canuma

# adds Gaussian blur to an image

def blur(res_img):

    no_noise = []
    for i in range(len(res_img)):
        blur = cv2.GaussianBlur(res_img[i], (5, 5), 0)
        no_noise.append(blur)

    return no_noise

In [10]:
def preprocess_image(image):
    proccessed_image = cv2.resize(image,(416,416))
    #proccessed_image = blur(proccessed_image)
    
    return proccessed_image

### Game control and input reading functions

These functions will read the inputs and generate a array for later use when generating the dataset:

In [11]:
def keys_to_output(keys):
    '''
    Convert keys to a ...multi-hot... array

    [A,W,D] boolean values.
    '''
    output = [0,0,0,0]
    
    if 'A' in keys:
        output[0] = 1
    if 'D' in keys:
        output[1] = 1
    if 'W' in keys:
        output[2] = 1
    if 'S' in keys:
        output[3] = 1
    
    return output

This sequencer thread will capture the sequences of 5 frames with a separation of 1/capturerate ms each.

In [12]:
global seq
global num
num = 0
seq = []

global key_out
key_out = [0, 0, 0, 0]

def image_sequencer_thread():
    global back_buffer
    global seq
    global key_out
    global num
    
    # Frames per second capture rate
    capturerate = 10.0
    while True:
        last_time = time.time()
        if len(seq) == 5:
            del seq[0]

        seq.append(preprocess_image(np.copy(back_buffer)))
        num = num + 1
        keys = key_check()
        if 'J' in keys:
            break
        key_out = keys_to_output(keys)
        waittime = (1.0/capturerate)-(time.time()-last_time)
        if waittime>0.0:
            time.sleep(waittime)

This function will be useful to check which class corresponds the input we captured to:

In [13]:
def counter_keys(key):
        if np.array_equal(key , [0,0,0,0]):
            return 0
        elif np.array_equal(key , [1,0,0,0]):
            return 1
        elif np.array_equal(key , [0,1,0,0]):
            return 2
        elif np.array_equal(key , [0,0,1,0]):
            return 3
        elif np.array_equal(key , [0,0,0,1]):
            return 4
        elif np.array_equal(key , [1,0,1,0]):
            return 5
        elif np.array_equal(key , [1,0,0,1]):
            return 6
        elif np.array_equal(key , [0,1,1,0]):
            return 7
        elif np.array_equal(key , [0,1,0,1]):
            return 8
        else:
            return -1

This is the function that the data saving threads will run to save the dataset to compressed files (change the path if needed):

In [14]:
def save_data(data,number):
    file_name = 'Training_Data\\training_data'+str(number)+'.npz'
    np.savez_compressed(file_name,data=list([x[:5] for x in data]),labels=list([x[5:] for x in data]))
    del data

## Generate dataset

We will run this function to generate the dataset:

In [15]:
def run():
    global fps
    global front_buffer
    global back_buffer
    global seq
    global key_out
    global num
    training_data = []
    threads = list()
    th_img = threading.Thread(target=img_thread)
    th_seq = threading.Thread(target=image_sequencer_thread)
    threads.append(th_img)
    threads.append(th_seq)
    th_img.start()
    time.sleep(1)
    th_seq.start()
    l = 0
    fn = 0
    time.sleep(4)
    last_num = 0
    
    number_of_keys = [0,0,0,0,0,0,0,0,0]
    
    while True:
        img_seq = seq.copy()
        output = key_out.copy()
        
        while len(img_seq) != 5 or last_num==num:
            del img_seq, output
            img_seq = seq.copy()
            output = key_out.copy()
        last_num = num
        
        clear_output(wait=True)
        stdout.write('Recording at {} FPS \n'.format(fps))
        stdout.write('Images in sequence {} \n'.format(len(img_seq)))
        stdout.write('Training data len {} secuences \n'.format(l))
        stdout.write('Number of archives {}\n'.format(fn))
        stdout.write('Keys pressed: ' + str(output) + ' \n')
        stdout.write('Keys samples in this file: ' + 'none:' + str(number_of_keys[0]) + ' A:' + str(number_of_keys[1])+ ' D:' + str(number_of_keys[2]) + ' W:' + str(number_of_keys[3])+ ' S:' + str(number_of_keys[4]) + ' AW:'  + str(number_of_keys[5]) + ' AS:' + str(number_of_keys[6]) + ' WD:' + str(number_of_keys[7]) + ' SD:' + str(number_of_keys[8]) + ' \n')
        stdout.flush()
        
        key  = counter_keys(output)
        
        if key != -1:
            larg = nlargest(9,number_of_keys)
            prop = (9. - float(larg.index(number_of_keys[key])))/10
            if(number_of_keys[key]  > np.mean(number_of_keys) * 1.25):
                prop = prop + 0.05
            if (np.random.rand() > prop):
                number_of_keys[key] += 1
                l = l+1
                training_data.append([img_seq[0],img_seq[1],img_seq[2],img_seq[3],img_seq[4], output])
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break

        if len(training_data) % 500 == 0 and len(training_data) != 0:
        #if len(training_data) % 20 == 0 and len(training_data) != 0:
            threading.Thread(target=save_data, args=(training_data.copy(), fn,)).start()
            fn = fn + 1
            del training_data
            training_data = []
            
        if 'J' in key_check():
            threading.Thread(target=save_data, args=(training_data.copy(), fn,)).start()
            fn = fn + 1
            del training_data
            training_data = []
            break
            


Now we will run the capture and generation of the dataset, remember that the game's window must be in 1280x1024 resolution and located in the top left part of the screen ((0,0) coordinates).

Once you captured all the data you need you can press 'J' to stop the run function and archive the current sequences. Be sure to give it some time to save before shutting down the kernel, as the large files may take some time to save.

In [16]:
capturing = False

if capturing:
    time.sleep(15)
    run()

In [17]:
'''
Plots images from the data file
Change the filepath and index 'i' if necessary
'''

plotImgs = False
if plotImgs:
    with np.load('D:\\Data\\Miller_Data(2)3-11-2021\\training_data1.npz') as data:
        training_data = data['data']
    
    i= 420
    
    for j in range(5):
        plt.imshow(cv2.cvtColor(training_data[i][j], cv2.COLOR_BGR2RGB))
        plt.show()
        plt.imshow(cv2.cvtColor(training_data[i][j], cv2.COLOR_BGR2RGB))
        plt.show()
        print('---------------------------------------------------------')

In [18]:
'''
Plots images from the data file after object detection and separation
Change the filepath and index 'i' if necessary
'''

plotImgs = False
if plotImgs:
    with np.load('D:\\Data\\Miller_Data(2)3-11-2021\\training_data1.npz') as data:
        training_data = data['data']
    
    i = 420
    
    train_objects = []
    train_background = []
    for j in range(5):
        temp1,temp2 = get_sub_images(training_data[i][j])
        train_objects.append(temp1)
        train_background.append(temp2)

    for j in range(5):
        plt.imshow(cv2.cvtColor(train_objects[j], cv2.COLOR_BGR2RGB))
        plt.show()
        plt.imshow(cv2.cvtColor(train_background[j], cv2.COLOR_BGR2RGB))
        plt.show()
        print('---------------------------------------------------------')

This function is used to count the number of instances per class in a set:

In [19]:
def sort_by_class(data, labels):
    
    #[nonekey,A,D,W,S,AW,AS,DW,DS]
    imgs = [[],[],[],[],[],[],[],[],[]]
    lbls = [[],[],[],[],[],[],[],[],[]]
    
    #np.random.shuffle(data)
    
    for i in range(len(data)):
        if np.array_equal(labels[i] , [0,0,0,0]):
            imgs[0].append(data[i])
            lbls[0].append(labels[i])
        elif np.array_equal(labels[i] , [1,0,0,0]):
            imgs[1].append(data[i])
            lbls[1].append(labels[i])
        elif np.array_equal(labels[i] , [0,1,0,0]):
            imgs[2].append(data[i])
            lbls[2].append(labels[i])
        elif np.array_equal(labels[i] , [0,0,1,0]):
            imgs[3].append(data[i])
            lbls[3].append(labels[i])
        elif np.array_equal(labels[i] , [0,0,0,1]):
            imgs[4].append(data[i])
            lbls[4].append(labels[i])
        elif np.array_equal(labels[i] , [1,0,1,0]):
            imgs[5].append(data[i])
            lbls[5].append(labels[i])
        elif np.array_equal(labels[i] , [1,0,0,1]):
            imgs[6].append(data[i])
            lbls[6].append(labels[i])
        elif np.array_equal(labels[i] , [0,1,1,0]):
            imgs[7].append(data[i])
            lbls[7].append(labels[i])
        elif np.array_equal(labels[i] , [0,1,0,1]):
            imgs[8].append(data[i])
            lbls[8].append(labels[i])

    return (imgs, lbls)
    


This function will balance the number of instances of the classes in a set by deleting extra instances after shuffling:

In [20]:
def balance_data(data_in_clases, labels_in_clases):
    balanced_data = []
    balanced_labels = []
    
    max_len = 999999999999999999999
    for i in range(len(labels_in_clases)):
        ln = len(labels_in_clases[i])
        if ln < max_len:
            max_len = ln 
    
    for i in range(len(labels_in_clases)):
        for j in range(max_len):
            balanced_data.append(data_in_clases[i][j])
            balanced_labels.append(labels_in_clases[i][j])
    
    
    to_shuffle = list(zip(balanced_data, balanced_labels))
    np.random.shuffle(to_shuffle)
    balanced_data, balanced_labels = zip(*to_shuffle)
    
    return (balanced_data, balanced_labels)
    

For debug purposes:

In [31]:
'''
Tests data balancing.
Label replacement can be used to achieve an even distribution of labels among all images
'''


debug = True

if debug:
    with np.load('D:\Data\Data-Course1\training_data0.npz') as data:
        images = data['data']
        labels = data['labels']

    
    labels = np.array([x[0] for x in labels])
    
    '''
    # testing labels replacement
    new_labels = []
    for i in range(len(labels)):
        if i%9 == 0:
            new_labels.append([0,0,0,0])
        if i%9 == 1:
            new_labels.append([1,0,0,0])
        if i%9 == 2:
            new_labels.append([0,1,0,0])
        if i%9 == 3:
            new_labels.append([0,0,1,0])
        if i%9 == 4:
            new_labels.append([0,0,0,1])
        if i%9 == 5:
            new_labels.append([1,0,1,0])
        if i%9 == 6:
            new_labels.append([1,0,0,1])
        if i%9 == 7:
            new_labels.append([0,1,1,0])
        if i%9 == 8:
            new_labels.append([0,1,0,1])
    labels = new_labels
    '''
    
    
    sorted_images, sorted_labels = sort_by_class(images, labels) 
    print('none: ' + str(len(sorted_labels[0])))
    print('A: ' + str(len(sorted_labels[1])))
    print('D ' + str(len(sorted_labels[2])))
    print('W ' + str(len(sorted_labels[3])))
    print('S ' + str(len(sorted_labels[4])))
    print('AW ' + str(len(sorted_labels[5])))
    print('AS ' + str(len(sorted_labels[6])))
    print('DW ' + str(len(sorted_labels[7])))
    print('DS ' + str(len(sorted_labels[8])))


    sorted_images, sorted_labels = balance_data(sorted_images, sorted_labels)
    sorted_images, sorted_labels = sort_by_class(sorted_images, sorted_labels)
    print('none: ' + str(len(sorted_labels[0])))
    print('A: ' + str(len(sorted_labels[1])))
    print('D ' + str(len(sorted_labels[2])))
    print('W ' + str(len(sorted_labels[3])))
    print('S ' + str(len(sorted_labels[4])))
    print('AW ' + str(len(sorted_labels[5])))
    print('AS ' + str(len(sorted_labels[6])))
    print('DW ' + str(len(sorted_labels[7])))
    print('DS ' + str(len(sorted_labels[8])))


OSError: [Errno 22] Invalid argument: 'D:\\Data\\Data-Course1\training_data0.npz'

In [22]:
'''
Tests for improperly zipped data. 
This usually occurs when the saving process is interrupted or when the data is corrupted during a transfer.
Change the folder path to the location of the data you want to test
'''


debug = False

from zipfile import ZipFile

if debug:
    file_names = glob.glob("D:\\Data_Archive\\**\*.npz", recursive=True)
    
    for file in file_names:
        try:
            with ZipFile(file) as zf:
                print ("found good file: ", file)
        except BadZipfile:
            print ("found bad file: ", file)

In [23]:
'''
Tests data files for an absence of a key class, which breaks the training generator.
Change the folder path to the location of the data you want to test
'''


debug = False

if debug:
    
    file_names = glob.glob("D:\\Data_Archive\\**\*.npz", recursive=True)
    
    for file in file_names:
        with np.load(file) as data:
            images = data['data']
            labels = data['labels']

        labels = np.array([x[0] for x in labels])
    
        sorted_images, sorted_labels = sort_by_class(images, labels) 
        if any(len(sorted_labels[x]) == 0 for x in range(len(sorted_labels))):
            print ("found bad file: ", file)
        else:
            print ("found good file: ", file)

# 3. Define the model

We will define the model to use in our training, please select the model you want to use in the cell bellow (the first two models are described in the "Definition of the DNN" of this [documentation](https://github.com/eritzyg/GTAV-Self-driving-car#documentation). They are kept for archiving purposes.):

In [24]:
# Select the model to use:
# 'CNN+MPL' : Convolutional neural network with multi layer perceptron.
# 'CNN+RNN' : Convolutional neural network with recurrent neural network.
# 'YOLO'    : Processes the objects and the background seperately 
selected_model = 'YOLO'

And now we define the corresponding model:

In [25]:
if selected_model == 'CNN+RNN':
    model = Sequential()

    model.add(InputLayer(input_shape=(5, 270, 480, 3)))

    model.add(TimeDistributed(Convolution2D(32, (4,4), data_format='channels_last')))
    model.add(TimeDistributed(Activation('relu')))
    print(model.output_shape)

    model.add(TimeDistributed(Convolution2D(32, (4,4), data_format='channels_last')))
    model.add(TimeDistributed(Activation('relu')))
    print(model.output_shape)

    model.add(TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last')))
    model.add(TimeDistributed(Dropout(0.25)))
    print(model.output_shape)

    model.add(TimeDistributed(Convolution2D(16, (3,3), data_format='channels_last')))
    model.add(TimeDistributed(Activation('relu')))
    print(model.output_shape)


    model.add(TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last')))
    model.add(TimeDistributed(Dropout(0.25)))
    print(model.output_shape)

    model.add(TimeDistributed(Flatten()))
    print(model.output_shape)

    model.add(GRU(256, kernel_initializer=initializers.RandomNormal(stddev=0.001))) #128
    model.add(Dropout(0.25))
    print(model.output_shape)

    model.add(Dense(100))
    print(model.output_shape)

    model.add(Dense(80))
    print(model.output_shape)

    model.add(Dense(40))
    print(model.output_shape)

    model.add(Dense(9, activation='sigmoid'))
    print(model.output_shape)

    opt = optimizers.RMSprop(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy']) 

In [26]:
if selected_model == 'CNN+MLP':
    model = Sequential()

    model.add(InputLayer(input_shape=(5, 270, 480, 3)))

    model.add(TimeDistributed(Convolution2D(16, (4,8), data_format='channels_last')))
    model.add(TimeDistributed(Activation('relu')))
    print(model.output_shape)

    model.add(TimeDistributed(Convolution2D(16, (4,4), data_format='channels_last')))
    model.add(TimeDistributed(Activation('relu')))
    print(model.output_shape)

    model.add(TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last')))
    model.add(TimeDistributed(Dropout(0.25)))
    print(model.output_shape)

    model.add(TimeDistributed(Convolution2D(12, (3,3), data_format='channels_last')))
    model.add(TimeDistributed(Activation('relu')))
    print(model.output_shape)

    model.add(TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last')))
    model.add(TimeDistributed(Dropout(0.25)))
    print(model.output_shape)

    model.add(Flatten())
    print(model.output_shape)

    model.add(Dense(300))
    print(model.output_shape)
    model.add(Dense(100))
    print(model.output_shape)
    print(model.output_shape)
    model.add(Dense(9, activation='sigmoid'))
    print(model.output_shape)

    opt = optimizers.rmsprop(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy']) 

In [27]:
# Model by Evan Miller
#
# Takes in two inputs corresponding to the images after they have been split into objects and background
# by the YOLO object detection algorithm. These images are processed with a CNN and an RNN+CNN respectively, then they are
# concatenated and fed through some Dense layers for a final prediction.

if selected_model == 'YOLO':
    input1 = tf.keras.Input(shape=(5, 416, 416, 3))
    x1 = TimeDistributed(Convolution2D(16, (4,8), data_format='channels_last'))(input1)
    x1 = TimeDistributed(Activation('relu'))(x1)
    x1 = TimeDistributed(Convolution2D(16, (4,4), data_format='channels_last'))(x1)
    x1 = TimeDistributed(Activation('relu'))(x1)
    x1 = TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last'))(x1)
    x1 = TimeDistributed(Dropout(0.25))(x1)
    x1 = TimeDistributed(Convolution2D(12, (3,3), data_format='channels_last'))(x1)
    x1 = TimeDistributed(Activation('relu'))(x1)
    x1 = TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last'))(x1)
    #x1 = TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last'))(x1)
    x1 = TimeDistributed(Dropout(0.25))(x1)
    x1 = Flatten()(x1)
    x1 = Dense(300)(x1)
    x1 = Dense(100)(x1)
    #x1 = Dense(9, activation='sigmoid')(x1)
    x1 = Model(inputs=input1,outputs=x1)
    
    input2 = tf.keras.Input(shape=(5, 416, 416, 3))
    x2 = TimeDistributed(Convolution2D(32, (4,4), data_format='channels_last'))(input2)
    x2 = TimeDistributed(Activation('relu'))(x2)
    x2 = TimeDistributed(Convolution2D(32, (4,4), data_format='channels_last'))(x2)
    x2 = TimeDistributed(Activation('relu'))(x2)
    x2 = TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last'))(x2)
    x2 = TimeDistributed(Dropout(0.25))(x2)
    x2 = TimeDistributed(Convolution2D(16, (3,3), data_format='channels_last'))(x2)
    x2 = TimeDistributed(Activation('relu'))(x2)
    x2 = TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last'))(x2)
    #x2 = TimeDistributed(MaxPooling2D(pool_size=(5, 5), data_format='channels_last'))(x2)
    x2 = TimeDistributed(Dropout(0.25))(x2)
    x2 = TimeDistributed(Flatten())(x2)
    x2 = GRU(256, kernel_initializer=initializers.RandomNormal(stddev=0.001))(x2)
    x2 = Dropout(0.25)(x2)
    x2 = Flatten()(x2)
    x2 = Dense(100)(x2)
    #x2 = Dense(80)(x2)
    #x2 = Dense(40)(x2)
    #x2 = Dense(9, activation='sigmoid')(x2)
    x2 = Model(inputs=input2,outputs=x2)
    
    combined = concatenate([x1.output,x2.output])
    out = Dense(80)(combined)
    out = Dense(40)(out)
    out = Dense(9, activation='softmax')(out)
    
    model = Model(inputs=[x1.input,x2.input], outputs=out)

In [28]:
opt = tf.keras.optimizers.RMSprop(
    learning_rate=0.0001,
    momentum=0.9,
    centered=True,
)


model.compile(optimizer=opt, loss='categorical_crossentropy', weighted_metrics=['accuracy'])
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 5, 416, 416, 0                                            
__________________________________________________________________________________________________
time_distributed_10 (TimeDistri (None, 5, 413, 413,  1568        input_2[0][0]                    
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 5, 416, 416, 0                                            
__________________________________________________________________________________________________
time_distributed_11 (TimeDistri (None, 5, 413, 413,  0           time_distributed_10[0][0]        
____________________________________________________________________________________________

# 4. Train

First, we will calculate the number of instances of each class in order to create class weights for our data

In [32]:
Totalling = False

if Totalling:
    file_names = glob.glob("D:\\Data\\**\*.npz", recursive=True)
    totals = [0,0,0,0,0,0,0,0,0]

    for fil in file_names:
        print(fil)
        with np.load(fil) as dat:
            training_data_images = dat['data']
            training_data_labels = dat['labels']
        
        training_data_labels = np.array([x[0] for x in training_data_labels])

        sorted_images, sorted_labels = sort_by_class(training_data_images, training_data_labels)
        
        print('none: ' + str(len(sorted_labels[0])))
        print('A: ' + str(len(sorted_labels[1])))
        print('D ' + str(len(sorted_labels[2])))
        print('W ' + str(len(sorted_labels[3])))
        print('S ' + str(len(sorted_labels[4])))
        print('AW ' + str(len(sorted_labels[5])))
        print('AS ' + str(len(sorted_labels[6])))
        print('DW ' + str(len(sorted_labels[7])))
        print('DS ' + str(len(sorted_labels[8])))

        for i in range(len(sorted_labels)):
            totals[i] += len(sorted_labels[i])

    weights = [x/np.min(totals) for x in totals]
    class_weights = {0:weights[0], 1:weights[1], 2:weights[2], 3:weights[3], 4:weights[4], 5:weights[5], 6:weights[6], 7:weights[7], 8:weights[8]}

D:\Data\Data-Course1\training_data0.npz
none: 90
A: 48
D 36
W 105
S 56
AW 77
AS 14
DW 54
DS 20
D:\Data\Data-Course1\training_data1.npz
none: 93
A: 50
D 30
W 90
S 46
AW 67
AS 19
DW 50
DS 17
D:\Data\Data-Course2\training_data0.npz
none: 77
A: 44
D 43
W 83
S 65
AW 78
AS 22
DW 56
DS 32
D:\Data\Data-Course2\training_data1.npz
none: 61
A: 65
D 35
W 81
S 66
AW 58
AS 39
DW 58
DS 37
D:\Data\Data-Course2\training_data2.npz
none: 67
A: 53
D 35
W 89
S 59
AW 69
AS 37
DW 61
DS 30
D:\Data\Data-Course2\training_data3.npz
none: 13
A: 8
D 9
W 23
S 28
AW 14
AS 20
DW 13
DS 8
D:\Data\John\training_data_1.npz
none: 45
A: 55
D 41
W 57
S 41
AW 66
AS 46
DW 44
DS 9
D:\Data\John\training_data_10.npz
none: 52
A: 60
D 36
W 107
S 55
AW 60
AS 48
DW 58
DS 24
D:\Data\John\training_data_100.npz
none: 64
A: 62
D 55
W 100
S 44
AW 67
AS 34
DW 67
DS 7
D:\Data\John\training_data_101.npz
none: 55
A: 76
D 36
W 95
S 56
AW 74
AS 33
DW 60
DS 15
D:\Data\John\training_data_102.npz
none: 84
A: 68
D 38
W 87
S 32
AW 75
AS 30
DW 73
DS

none: 55
A: 64
D 41
W 83
S 47
AW 86
AS 45
DW 57
DS 22
D:\Data\John\training_data_9.npz
none: 25
A: 28
D 21
W 43
S 25
AW 33
AS 24
DW 23
DS 34
D:\Data\John\training_data_90.npz
none: 14
A: 9
D 6
W 24
S 25
AW 12
AS 21
DW 15
DS 2
D:\Data\John\training_data_91.npz
none: 60
A: 64
D 30
W 91
S 47
AW 72
AS 54
DW 61
DS 21
D:\Data\John\training_data_92.npz
none: 53
A: 49
D 41
W 106
S 65
AW 56
AS 49
DW 49
DS 32
D:\Data\John\training_data_93.npz
none: 48
A: 57
D 27
W 97
S 58
AW 72
AS 39
DW 59
DS 43
D:\Data\John\training_data_94.npz
none: 66
A: 68
D 39
W 81
S 49
AW 64
AS 40
DW 70
DS 23
D:\Data\John\training_data_95.npz
none: 59
A: 62
D 51
W 81
S 43
AW 51
AS 70
DW 61
DS 22
D:\Data\John\training_data_96.npz
none: 70
A: 57
D 38
W 118
S 48
AW 62
AS 35
DW 53
DS 19
D:\Data\John\training_data_97.npz
none: 11
A: 18
D 10
W 16
S 4
AW 12
AS 12
DW 14
DS 7
D:\Data\John\training_data_98.npz
none: 60
A: 56
D 46
W 110
S 50
AW 66
AS 45
DW 55
DS 12
D:\Data\John\training_data_99.npz
none: 61
A: 67
D 52
W 92
S 54
AW 59

Now we will define some functions to reshape the data according to the input of our model:

In [34]:
def reshape_custom_X(data, verbose = 0):
    reshaped = np.zeros((data.shape[0], 5, 416, 416, 3), dtype=np.float32)
    for i in range(0, data.shape[0]):
        for j in range(0, 5):
            if (verbose == 1):
                print('Reshaped image: ' + str(i))
            reshaped[i][j] = data[i][j]
            
    return reshaped

def reshape_custom_y(data):
    reshaped = np.zeros((data.shape[0], 9), dtype=np.float32)
    for i in range(0, data.shape[0]):
        # [A,D,W,S]
        if np.array_equal(data[i] , [0,0,0,0]):
            reshaped[i][0] = 1.
        elif np.array_equal(data[i] , [1,0,0,0]):
            reshaped[i][1] = 1.
        elif np.array_equal(data[i] , [0,1,0,0]):
            reshaped[i][2] = 1.
        elif np.array_equal(data[i] , [0,0,1,0]):
            reshaped[i][3] = 1.
        elif np.array_equal(data[i] , [0,0,0,1]):
            reshaped[i][4] = 1.
        elif np.array_equal(data[i] , [1,0,1,0]):
            reshaped[i][5] = 1.
        elif np.array_equal(data[i] , [1,0,0,1]):
            reshaped[i][6] = 1.
        elif np.array_equal(data[i] , [0,1,1,0]):
            reshaped[i][7] = 1.
        elif np.array_equal(data[i] , [0,1,0,1]):
            reshaped[i][8] = 1.
    return reshaped

And now we run the training process, the accuracy can be seen after each epoch.

In [35]:
def tf_data_generator(file_list, batch_size = 5):
    i = 0 # Iterates over the files in the dataset
    data = [[],[]]
    labels = []
    while True:
        if i >= len(file_list):
            print("\nShuffling and Restarting...")
            i = 0
            np.random.shuffle(file_list)
        else:
            fil = file_list[i] 
            print("\nOpening Training File: ", fil)

            with np.load(fil) as dat:
                training_data_images = dat['data']
                training_data_labels = dat['labels']

            to_shuffle = list(zip(training_data_images, training_data_labels))
            np.random.shuffle(to_shuffle)
            training_data_images, training_data_labels = zip(*to_shuffle)

            training_data_labels = np.array([x[0] for x in training_data_labels])

            sorted_images, sorted_labels = sort_by_class(training_data_images, training_data_labels)

            print('none: ' + str(len(sorted_labels[0])))
            print('A: ' + str(len(sorted_labels[1])))
            print('D ' + str(len(sorted_labels[2])))
            print('W ' + str(len(sorted_labels[3])))
            print('S ' + str(len(sorted_labels[4])))
            print('AW ' + str(len(sorted_labels[5])))
            print('AS ' + str(len(sorted_labels[6])))
            print('DW ' + str(len(sorted_labels[7])))
            print('DS ' + str(len(sorted_labels[8])))

            # shuffling occurs here
            #training_data_images, training_data_labels = balance_data(sorted_images, sorted_labels)

            train_objects = []
            train_background = []
            for k in range(len(training_data_images)):
                train_objects.append([])
                train_background.append([])
                for j in range(5):
                    #normalization occurs here
                    temp1,temp2 = get_sub_images(training_data_images[k][j])
                    train_objects[k].append(temp1)
                    train_background[k].append(temp2)

            
            X_train = [reshape_custom_X(np.array(train_objects)), reshape_custom_X(np.array(train_background))]
            y_train = reshape_custom_y(np.array(training_data_labels))

            # Adds data from the file to the collected data
            for x in range(len(X_train[0])):
                data[0].append(X_train[0][x])
                data[1].append(X_train[1][x])
                labels.append(y_train[x])
                # Outputs a batch of data once a batch has been filled
                if len(labels) >= batch_size:
                    num_batches = len(labels) // batch_size
                    for l in range(num_batches):
                        yield (np.array(data[0][l*batch_size:(l+1)*batch_size]), np.array(data[1][l*batch_size:(l+1)*batch_size])), labels[l*batch_size:(l+1)*batch_size]
                    data = [[],[]]
                    labels = []
            
            i = i + 1

In [36]:
#Generator version of training function

training = True
epochs = 5 # 1 epoch is roughly once through each file
batch_size = 1
num_cases = 50 # estimated number of instances of the least common button press in an average file

if training:
    file_names = glob.glob("D:\\Data\\**\*.npz", recursive=True)
    np.random.shuffle(file_names)
    # Roughly 80:20 training to validation data split
    train_file_names = file_names[0:int(len(file_names)*0.8)]
    validation_file_names = file_names[int(len(file_names)*0.8):]

    print("number of training files = ", len(train_file_names), "\nnumber of validation files = ", len(validation_file_names))
    

    train_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [train_file_names, batch_size], 
                                                  output_shapes = (((None,5,416,416,3), (None,5,416,416,3)),(None,9)),
                                                  output_types = ((tf.float32, tf.float32), tf.float32))

    validation_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [validation_file_names, batch_size],
                                                  output_shapes = (((None,5,416,416,3), (None,5,416,416,3)),(None,9)),
                                                  output_types = ((tf.float32, tf.float32), tf.float32))


    steps_per_epoch = len(train_file_names)*num_cases*9//batch_size
    validation_steps = len(validation_file_names)*num_cases*9//batch_size
    print("steps_per_epoch = ", steps_per_epoch)
    print("validation_steps = ", validation_steps)
    
    # Saves the model at the point in the training where the highest validation accuracy was achieved
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath='Model\\'+selected_model+"_checkpoint.h5",
        save_weights_only=False,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True)

    if not Totalling:
        class_weights = {0:1, 1:1, 2:1, 3:1, 4:1, 5:1, 6:1, 7:1, 8:1}
        #class_weights = {0: 2.151405912040375, 1: 2.1820475847152125, 2: 1.4978370583994232, 3: 3.39689978370584, 4: 1.7534246575342465, 5: 2.476207642393655, 6: 1.834534967555876, 7: 2.1074260994953136, 8: 1.0}

    model.fit(train_dataset, validation_data = validation_dataset, class_weight=class_weights, steps_per_epoch = steps_per_epoch,
             validation_steps = validation_steps, epochs = epochs, callbacks=[model_checkpoint_callback])

number of training files =  88 
number of validation files =  22
steps_per_epoch =  39600
validation_steps =  9900
Epoch 1/5

Opening Training File:  b'D:\\Data\\John\\training_data_73.npz'
none: 52
A: 76
D 50
W 77
S 59
AW 69
AS 48
DW 54
DS 15
  500/39600 [..............................] - ETA: 32:35 - loss: 5.3051 - accuracy: 0.1507
Opening Training File:  b'D:\\Data\\John\\training_data_88.npz'
none: 49
A: 80
D 51
W 84
S 58
AW 72
AS 45
DW 47
DS 14
  999/39600 [..............................] - ETA: 3:14:01 - loss: 5.0638 - accuracy: 0.1860
Opening Training File:  b'D:\\Data\\John\\training_data_11.npz'
none: 67
A: 62
D 37
W 79
S 46
AW 66
AS 67
DW 56
DS 20
 1500/39600 [>.............................] - ETA: 4:05:54 - loss: 4.9684 - accuracy: 0.2055
Opening Training File:  b'D:\\Data\\John\\training_data_15.npz'
none: 83
A: 45
D 42
W 83
S 65
AW 74
AS 40
DW 61
DS 7
 1999/39600 [>.............................] - ETA: 4:28:21 - loss: 4.9230 - accuracy: 0.2134
Opening Training File:  b'D:\

UnknownError: 2 root error(s) found.
  (0) Unknown:  FileNotFoundError: [Errno 2] No such file or directory: b'D:\\Data\\Miller_Data(1)3-11-2021\\training_data0(1).npz'
Traceback (most recent call last):

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\ops\script_ops.py", line 249, in __call__
    ret = func(*args)

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\autograph\impl\api.py", line 620, in wrapper
    return func(*args, **kwargs)

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 891, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "<ipython-input-35-ae1d652ce12c>", line 14, in tf_data_generator
    with np.load(fil) as dat:

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\numpy\lib\npyio.py", line 416, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))

FileNotFoundError: [Errno 2] No such file or directory: b'D:\\Data\\Miller_Data(1)3-11-2021\\training_data0(1).npz'


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_2]]
  (1) Unknown:  FileNotFoundError: [Errno 2] No such file or directory: b'D:\\Data\\Miller_Data(1)3-11-2021\\training_data0(1).npz'
Traceback (most recent call last):

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\ops\script_ops.py", line 249, in __call__
    ret = func(*args)

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\autograph\impl\api.py", line 620, in wrapper
    return func(*args, **kwargs)

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 891, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "<ipython-input-35-ae1d652ce12c>", line 14, in tf_data_generator
    with np.load(fil) as dat:

  File "C:\Users\mille7em\AppData\Roaming\Python\Python38\site-packages\numpy\lib\npyio.py", line 416, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))

FileNotFoundError: [Errno 2] No such file or directory: b'D:\\Data\\Miller_Data(1)3-11-2021\\training_data0(1).npz'


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_9450]

Function call stack:
train_function -> train_function


Now we save the learned network (Not reccomended if using callbacks):

In [None]:
import h5py as h5py

saving = False

if saving:
    model.save('Model\\'+selected_model+".h5")

# 5. Run our model in the game

Load the model we want to test

In [None]:
import h5py as h5py

loading = True

if loading:
    model = load_model('Model\\Weights.h5')

In [None]:
'''
Plots images from the data file after object detection and separation, then executes and prints a prediction on the sequence
Change the filepath and index 'i' if necessary
'''

plotImgs = True

if plotImgs:
    print(selected_model)
    
    with np.load('D:\\Data\\Miller_Data(1)3-11-2021\\training_data0.npz') as data:
        training_data = data['data']
    
    i= 3
    
    train_objects = []
    train_background = []
    for j in range(5):
        temp1,temp2 = get_sub_images(training_data[i][j])
        train_objects.append(temp1)
        train_background.append(temp2)
    
    for j in range(5):
        plt.imshow(cv2.cvtColor(train_objects[j], cv2.COLOR_BGR2RGB))
        plt.show()
        plt.imshow(cv2.cvtColor(train_background[j], cv2.COLOR_BGR2RGB))
        plt.show()
        print('---------------------------------------------------------')
    
    X_test = [reshape_custom_X(np.array([train_objects]), verbose=0), reshape_custom_X(np.array([train_background]), verbose=0)]

    print (np.argmax(model.predict((X_test[0], X_test[1])), axis=1))

Now it is time to test our model in the game, we will reduce the amount of VRAM tensorflow can use so that the game has some VRAM in spare. We will also import the library needed to send inputs to the game:

In [None]:
# Let the game have some VRAM (needed or the game will crash)
config = tf.compat.v1.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.4
tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))

# For controlling the game
from inputsHandler import select_key
from tkinter import *

In [None]:
def key_press(key):
    if key == 1:
        return'A'
    if key == 2:
        return'D'
    if key == 3:
        return'W'
    if key == 4:
        return'S'
    if key == 5:
        return'AW'
    if key == 6:
        return'AS'
    if key == 7:
        return'DW'
    if key == 8:
        return'DS'
    return 'none'

This fuction calculates the Mean Squared Error between 2 images. Is used to detect if the car is stuck somewhere. 

In [None]:
def mse(imageA, imageB):

    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    return err

We define the run function for our network:

In [None]:
#Run Configurations
show_current_control = False #It will show a windows with a message indicating if the car is currently be controlled by
                            #Network  or by a Human
    
show_whatAIsees = False #It will show the 5 images that the netowrk uses the predict the output 

enable_evasion = True #If the program detects that the car is not moving (for example because it is stuck facing a wall and
                        #the network is not able to return to the road) It will make the car move backwards for a second.

In [None]:
def run_IA():
    global fps
    global front_buffer
    global back_buffer
    global seq
    global key_out
    global num
    
    training_data = []
    threads = list()
    th_img = threading.Thread(target=img_thread)
    th_seq = threading.Thread(target=image_sequencer_thread)
    threads.append(th_img)
    threads.append(th_seq)
    th_img.start()
    time.sleep(1)
    th_seq.start()
    time.sleep(4)
    
    last_num = 0
    
    last_time = time.time()
    
    if show_current_control:
        root = Tk()
        var = StringVar()
        var.set('AI CONTROL')
        l = Label(root, textvariable = var, fg='green', font=("Courier", 44))
        l.pack()

    
    while True:
       
        img_seq = seq.copy()
        while len(img_seq) != 5 or last_num==num:
            del img_seq
            img_seq = seq.copy()
        last_num = num
        array = [img_seq[0],img_seq[1],img_seq[2],img_seq[3],img_seq[4]]
        
        objects = []
        background = []
        for j in range(5):
            temp1,temp2 = get_sub_images(array[j])
            objects.append(temp1)
            background.append(temp2)
        
        
        objects = np.expand_dims(np.array(objects), axis=0)
        background = np.expand_dims(np.array(background), axis=0)
        p = np.argmax(model.predict([objects,background]))
        
        if not 'J' in key_check():
            select_key(p)
            if show_current_control:
                var.set('AI CONTROL')
                l.config(fg='green')
                root.update()
        else:
            if show_current_control:
                var.set('MANUAL CONTROL')
                l.config(fg='red')
                root.update()

        #This is used to detect if the car is stuck somewhere (for example facing a wall) and the network does not know what to do. It will move the car
        #backward for a second.
        
        if enable_evasion:
            score = mse(img_seq[0],img_seq[4])
            if score < 1000:
                if show_current_control:
                    var.set('EXECUTING EVASION')
                    l.config(fg='blue')
                    root.update()
                select_key(4)
                time.sleep(1)
                if np.random.rand()>0.5:
                    select_key(6)
                else:
                    select_key(8)
                time.sleep(0.2)
                if show_current_control:
                    var.set('AI CONTROL')
                    l.config(fg='green')
                    root.update()

        time_act = time.time()
        clear_output(wait=True)
        stdout.write('Recording at {} FPS \n'.format(fps))
        stdout.write('Images in sequence {} \n'.format(len(img_seq)))
        stdout.write('Actions per second: ' + str(1/(time_act-last_time)) + '\n')
        if enable_evasion:
            stdout.write('Diference from img 1 to img 5: ' + str(score))
        stdout.flush()
        last_time = time.time()
        
        if show_whatAIsees:
            cv2.imshow('window1',np.array(img_seq[0])) 
            cv2.imshow('window2',np.array(img_seq[1]))
            cv2.imshow('window3',np.array(img_seq[2]))
            cv2.imshow('window4',np.array(img_seq[3]))
            cv2.imshow('window5',np.array(img_seq[4]))
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break
     

And now we run the network in the game:

In [None]:
run_IA()