# Nexar Challenge 2
Re-Train it with nexar Dataset
<img src="nb_images/logo-nexar.png" width="50%">

In [1]:
import pandas as pd
from scipy import misc
import argparse
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy.io
import scipy.misc
import os, sys
import shutil
import fnmatch
import math
import random, shutil
from PIL import Image
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from keras import optimizers
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

from yolo_utils2 import predict_any, create_model, get_batch, iou, mAP_eval 

from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from retrain_yolo import process_data,process_data_pil,get_classes,get_anchors,get_detector_mask,train,draw

# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all" # importing iPython output fucntioanlity 

#sys.path.append(os.getcwd()+'/yad2k/models' )
#sys.path.append(os.getcwd()+'/yad2k/utils' )

from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body, yolo_eval
# from yad2k.utils.draw_boxes import draw_boxes

%matplotlib inline


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Definitons and paths

# path to images and boxes
images_dir = os.getcwd()+'/../data/Training/nexet_2017_'
boxes_dir = os.getcwd()+'/../data/Training/train_boxes.csv'

training_chunks_path = os.getcwd()+'/../data/Training/training_chunks/'
training_data_chunks_size = 8*30 # Number of samples of  chuncked downs training data files. Need to be multiple of smallest mini-batch to be used
training_chunks_name = 'training_chunk_'
images_test_dir = os.getcwd()+'/../data/Test/nexet_2017_test'

# anchors_path to anchors file, defaults to yolo_anchors.txt
anchors_path = "model_data/yolo_anchors.txt"

image_shape = (720., 1280.)

# Default anchor boxes
YOLO_ANCHORS = np.array(
    ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434),
     (7.88282, 3.52778), (9.77052, 9.16828)))

# path to classes file, defaults to pascal_classes.txt
# Nexar classes ->
class_idx= {"car":0,"bus":1,"pickup_truck":2,"truck":3,"van":4}
classes_path = "model_data/nexar_classes.txt"

os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

# 1 - Preparing the data


To perform training we need:
* class_names,     anchors,      image_data,      boxes,     detectors_mask,      matching_true_boxes

Put the training data into a format: Data['class_names'], etc

We will load the original data, process it and save it into several npz (Data-chuncks) to perform mini-batches training.

Normal batches sizes are 8 and 32 samples, so we will make sure our data npz (Data-chuncks) are multiple of 8: 160 samples per chunk
  

In [3]:
# Get anchors and classes names
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)

In [4]:
dts_images_names = []
for dts in range(1,4):
    print("Processing Dataset folder", dts)
    dts_files = os.listdir(images_dir+str(dts)) # files in dataset dts
    for i,image_sample in enumerate(dts_files): #for all images in the dataset folder
        dts_files[i] = images_dir+str(dts)+"/"+image_sample
    random.shuffle(dts_files) # Shuffle the order of data-chunks: Try to avoid biases
    dts_images_names.extend(dts_files)

Processing Dataset folder 1
Processing Dataset folder 2
Processing Dataset folder 3


In [5]:
# Custom dataset object 
images_list = []
boxes_list = []

# Load the train boxes
Img_db = pd.read_csv(boxes_dir, header = 0)

samples_to_chunk = 0
chunk_count = 1

random.shuffle(dts_images_names) # Shuffle the order of data-chunks: Try to avoid biases

for image_sample in dts_images_names: #for all images in the dataset folder
#     print(image_sample.split("/")[-1])

    ## Get original Images and Boxes
    # Get the image
    img2 = mpimg.imread(image_sample)
    images_list.append(img2)
    
    # Write the labels and boxes
    labels_boxes = []
#     print(Img_db[Img_db['image_filename']==image_sample.split("/")[-1]].as_matrix())
    for box_matched in Img_db[Img_db['image_filename']==image_sample.split("/")[-1]].as_matrix():
        labels_boxes.append( [class_idx[box_matched[-2]], *box_matched[2:6]] )
    boxes_list.append(np.asarray(labels_boxes))

    samples_to_chunk +=1

    ## Translate to training model's inputs
    if(samples_to_chunk >= training_data_chunks_size or image_sample == dts_images_names[-1]):
        print(' Chunk number ',str(chunk_count))
        ### Preprocess the data: get images and boxes
        # get images and boxes
        image_data, boxes = process_data(images_list, boxes_list)
        print('     Chunked boxes data dimensions:', boxes.shape)
        image_data[0,:,:]
        print('     Chunked image data dimensions:',image_data.shape)
        ### Precompute detectors_mask and matching_true_boxes for training
        # Precompute detectors_mask and matching_true_boxes for training
        detectors_mask = [0 for i in range(len(boxes))]
        matching_true_boxes = [0 for i in range(len(boxes))]
        for i, box in enumerate(boxes):
            detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])

        detectors_mask = np.array(detectors_mask)
        matching_true_boxes = np.array(matching_true_boxes)

        print("     detectors_mask shape     ",detectors_mask.shape)
        print("     matching_true_boxes shape",matching_true_boxes.shape)

        # Save
#         np.savez(training_chunks_path+training_chunks_name+str(chunk_count),
#                  class_names = class_names,
#                  anchors = anchors,
#                  image_data = image_data,
#                  boxes = boxes,
#                  detectors_mask = detectors_mask,
#                  matching_true_boxes = matching_true_boxes
#                 )
        chunk_count += 1
        # Remove data from RAM and update: Our problem is low RAM
        images_list = []
        boxes_list = []
        image_data = None
        boxes = None

        samples_to_chunk = 0
        # control
#             input("Press Enter to continue...")

 Chunk number  1
     Chunked boxes data dimensions: (240, 10, 5)
     Chunked image data dimensions: (240, 416, 416, 3)


NameError: name 'anchors' is not defined