
**Filename:**        train_net.ipynb <br/>
**Project name:**    Real-time applications of Computer Vision on UAVs <br/>
**Author:**          Satyam Gaba (satyamgb321@gmail.com) <br/>
**Supervisor:**      [Pratik Narang](https://www.bits-pilani.ac.in/pilani/pratiknarang/profile) <br/>
**Last modified:**   25 May 2019 <br/>
**Comments:**        This file contains the python script to train the neural network on aiskeye dataset for aerial image recognition. <br/>
**References:**      [Keras implementation of RetinaNet object detection](https://github.com/fizyr/keras-retinanet), <br/>
                     [ESRI Object Detection](https://github.com/kunwar31/ESRI_Object_Detection)

In [None]:
#download and install keras-retinanet library

#!git clone https://github.com/fizyr/keras-retinanet

In [None]:
#!pip3 install keras-retinanet/ --user
#!pip install jupyter_utils

In [None]:
#!rm keras-retinanet/ -R

In [None]:
import os

# to limit TensorFlow to first GPU 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"     
os.environ["CUDA_VISIBLE_DEVICES"]="1"

# to verify the GPU in use
# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

In [None]:
import csv
import numpy as np
import keras
import math
import tensorflow as tf
import cv2
from os import listdir, walk
from os.path import join
from keras_retinanet.bin.train import create_generators,create_models,create_callbacks
from keras_retinanet.models import backbone,load_model,convert_model
from keras_retinanet.utils.config import read_config_file,parse_anchor_parameters
from keras_retinanet.utils.visualization import draw_boxes
from sklearn.model_selection import train_test_split
from imgaug import augmenters as iaa

tf.set_random_seed(31) # SEEDS MAKE RESULTS MORE REPRODUCABLE
np.random.seed(17)

from keras_retinanet.utils.model import freeze as freeze_model
import keras_retinanet

# Reading Training and Validation data

Normally we would have 10-30% of our images in validation set but as we want best possible score we'll use all our images to train, as we have quite few training images already. 

In [None]:
#training and validation dataset path
path_train_dset = '/home/pytorch/satyam/object_detection/dataset/train_final/'
path_val_dset = '/home/pytorch/satyam/object_detection/dataset/val_final/'

_,_,train_ids = next(walk(path_train_dset))
_,_,val_ids = next(walk(path_val_dset))
    
print(len(train_ids),len(val_ids))

In [None]:
## use when images are randomly selected in csv file from each scenario in aiskyeye dataset

# train_csv = '/home/pytorch/satyam/object_detection/dataset/train_final/annotations/train_annotations.csv'
# val_csv = '/home/pytorch/satyam/object_detection/dataset/val_final/annotations/val_annotations.csv'

# '''function will give number of images in the csv'''
# def find_num_img(csv_file):
#     with open(csv_file, 'r') as csvfile:
#             csvreader = csv.reader(csvfile, delimiter = ',', lineterminator = '\n') 
#             img_list=[]
#             i=0
#             for row in csvreader:
#                 img_list.append(row[0])
#                 i=i+1
#             num_ids = len(list(set(img_list))) # number of unique elements in the list
#     return num_ids
        
# len_train_ids = find_num_img(train_csv)
# len_val_ids = find_num_img(val_csv)
# print(len_train_ids,len_val_ids)

# Anchor Parameters

1. Anchor parameters are used to decide how anchor boxes will be generated for the model.
1. As we're dealing mostly small boxes with can be highly elongated, we'll change ratios and scales to fit our needs.
1. test_anchors.ipynb is used to visualize anchors on ground truth boxes

In [None]:
with open('config.ini','w') as f:
    f.write('[anchor_parameters]\nsizes   = 32 64 128 256 512\nstrides = 8 16 32 64 128\nratios  = 0.25 0.5 0.75 1 1.5 2 4 6 8 10\nscales  = 0.5 1 2\n')

# Some Hyperparameters

We will rescale our images to 672x672 for better precision

In [None]:
b = backbone('resnet50')

class args:
    batch_size = 4
    config = read_config_file('config.ini')
    random_transform = True # Image augmentation
    annotations = '/home/pytorch/satyam/object_detection/dataset/train_final/annotations/train_annotations.csv'
    val_annotations = '/home/pytorch/satyam/object_detection/dataset/val_final/annotations/val_annotations.csv'
    classes = '/home/pytorch/satyam/object_detection/dataset/train_final/annotations/class_mapping.csv'
    image_min_side = 672
    image_max_side = 672
    dataset_type = 'csv'
    tensorboard_dir = ''
    evaluation = False
    snapshots = True
    snapshot_path = "saved/"
    backbone = 'resnet50'
    epochs = 30
    steps = len(train_ids)//(batch_size)
    weighted_average = True

In [None]:
train_gen,valid_gen = create_generators(args,b.preprocess_image)

# Image Augmentation

In addition to augmentations already done by keras-retinanet [here](https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/bin/train.py#L227) , we'll use a package called imgaug to furthur augment the data.


In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
# Define our sequence of augmentation steps that will be applied to every image.
seq = iaa.Sequential(
    [
        #
        # Execute 1 to 9 of the following (less important) augmenters per
        # image. Don't execute all of them, as that would often be way too
        # strong.
        #
        iaa.SomeOf((1, 9),
            [

                        # Blur each image with varying strength using
                        # gaussian blur (sigma between 0 and .5),
                        # average/uniform blur (kernel size 1x1)
                        # median blur (kernel size 1x1).
                        iaa.OneOf([
                            iaa.GaussianBlur((0,0.5)),
                            iaa.AverageBlur(k=(1)),
                            iaa.MedianBlur(k=(1)),
                        ]),

                        # Sharpen each image, overlay the result with the original
                        # image using an alpha between 0 (no sharpening) and 1
                        # (full sharpening effect).
                        iaa.Sharpen(alpha=(0, 0.25), lightness=(0.75, 1.5)),

                        # Add gaussian noise to some images.
                        # In 50% of these cases, the noise is randomly sampled per
                        # channel and pixel.
                        # In the other 50% of all cases it is sampled once per
                        # pixel (i.e. brightness change).
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.01*255), per_channel=0.5
                        ),

                        # Either drop randomly 1 to 10% of all pixels (i.e. set
                        # them to black) or drop them on an image with 2-5% percent
                        # of the original size, leading to large dropped
                        # rectangles.
                        iaa.OneOf([
                            iaa.Dropout((0.01, 0.1), per_channel=0.5),
                            iaa.CoarseDropout(
                                (0.03, 0.15), size_percent=(0.02, 0.05),
                                per_channel=0.2
                            ),
                        ]),

                        # Add a value of -5 to 5 to each pixel.
                        iaa.Add((-5, 5), per_channel=0.5),

                        # Change brightness of images (85-115% of original value).
                        iaa.Multiply((0.85, 1.15), per_channel=0.5),

                        # Improve or worsen the contrast of images.
                        iaa.ContrastNormalization((0.75, 1.25), per_channel=0.5),

                        # Convert each image to grayscale and then overlay the
                        # result with the original with random alpha. I.e. remove
                        # colors with varying strengths.
                        iaa.Grayscale(alpha=(0.0, 0.25)),

                        # In some images distort local areas with varying strength.
                        sometimes(iaa.PiecewiseAffine(scale=(0.001, 0.01)))
                    ],
            # do all of the above augmentations in random order
            random_order=True
        )
    ],
    # do all of the above augmentations in random order
    random_order=True
)

In [None]:
def augment_train_gen(train_gen,visualize=False):
    '''
    Creates a generator using another generator with applied image augmentation.
    Args
        train_gen  : keras-retinanet generator object.
        visualize  : Boolean; False will convert bounding boxes to their anchor box targets for the model.
    '''
    imgs = []
    boxes = []
    targets = []
    size = train_gen.size()
    idx = 0
    while True:
        while len(imgs) < args.batch_size:
            image       = train_gen.load_image(idx % size)
            annotations = train_gen.load_annotations(idx % size)
            image,annotations = train_gen.random_transform_group_entry(image,annotations)
            imgs.append(image)            
            boxes.append(annotations['bboxes'])
            targets.append(annotations)
            idx += 1
        if visualize:
            imgs = seq.augment_images(imgs)
            imgs = np.array(imgs)
            boxes = np.array(boxes)
            yield imgs,boxes
        else:
            imgs = seq.augment_images(imgs)
            imgs,targets = train_gen.preprocess_group(imgs,targets)
            imgs = train_gen.compute_inputs(imgs)
            targets = train_gen.compute_targets(imgs,targets)
            imgs = np.array(imgs)
            yield imgs,targets
        imgs = []
        boxes = []
        targets = []
        

# Visualize augmentations

In [None]:
!python -m pip install -U pip
!python -m pip install -U matplotlib
!conda uninstall matplotlib --yes
!conda install matplotlib --yes

In [None]:
import matplotlib.pyplot as plt

skip_batches = 4
i = 0

for imgs,boxes in augment_train_gen(train_gen,visualize=True):
    if i > skip_batches:
        fig=plt.figure(figsize=(24,96))
        columns = 2
        rows = 8
        for i in range(1, columns*rows + 1):
            draw_boxes(imgs[i], boxes[i], (0, 255, 0), thickness=1)
            fig.add_subplot(rows, columns, i)
            plt.imshow(cv2.cvtColor(imgs[i],cv2.COLOR_BGR2RGB))
        plt.show()
        break
    else:
        i += 1


# More Hyperparameters

we'll use learning rate of 0.001 and freeze weights for the backbone

In [None]:
model, training_model, prediction_model = create_models(
            backbone_retinanet=b.retinanet,
            num_classes=train_gen.num_classes(),
            weights=None,
            multi_gpu=False,
            freeze_backbone=True,
            lr=1e-3,
            config=args.config
        )

In [None]:
callbacks = create_callbacks(
    model,
    training_model,
    prediction_model,
    valid_gen,
    args,
)

# Download pretrained model

We download a pretrained model on COCO dataset and load it's weights, we'll skip loading the weights for the few last layers

In [None]:
#!wget https://github.com/fizyr/keras-retinanet/releases/download/0.5.0/resnet50_coco_best_v2.1.0.h5

In [None]:
training_model.load_weights('./resnet50_coco_best_v2.1.0.h5',skip_mismatch=True,by_name=True)

In [None]:
# model.summary()

# Train the model

We will train for total 30 epochs. When after few epochs, there is no significant improvement in the model we'll unfreeze the model by running above cells and resume the training <br/>
Note: Do change the initial_epoch when resuming the training 

In [None]:
training_model.fit_generator(generator=augment_train_gen(train_gen),
        steps_per_epoch=args.steps,
        epochs=args.epochs,
        verbose=1,
        callbacks=callbacks,
        workers=2,
        use_multiprocessing=True,
        shuffle=True,
        initial_epoch=1)

In [None]:
## training without augmentation to the dataset

# model.fit_generator(generator=train_gen,
#         steps_per_epoch=args.steps,
#         epochs=args.epochs,
#         verbose=1,
#         callbacks=callbacks,
#         workers=2,
#         use_multiprocessing=True,
#         shuffle=True,
#         initial_epoch=0)

# Debugging Script
 To check if model is running on GPU

In [None]:
# import tensorflow as tf
# import timeit

# config = tf.ConfigProto()
# config.gpu_options.allow_growth = True

# with tf.device('/cpu:0'):
#     random_image_cpu = tf.random_normal((100, 100, 100, 3))
#     net_cpu = tf.layers.conv2d(random_image_cpu, 32, 7)
#     net_cpu = tf.reduce_sum(net_cpu)

# with tf.device('/device:GPU:0'):
#     random_image_gpu = tf.random_normal((100, 100, 100, 3))
#     net_gpu = tf.layers.conv2d(random_image_gpu, 32, 7)
#     net_gpu = tf.reduce_sum(net_gpu)

# sess = tf.Session(config=config)

# # Test execution once to detect errors early.
# try:
#     sess.run(tf.global_variables_initializer())
# except tf.errors.InvalidArgumentError:
#     print(
#       '\n\nThis error most likely means that this system is not '
#       'configured to use a GPU. \n\n')
#     raise

# def cpu():
#     sess.run(net_cpu)

# def gpu():
#     sess.run(net_gpu)

# # Runs the op several times.
# print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
#       '(batch x height x width x channel). Sum of ten runs.')
# print('CPU (s):')
# cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
# print(cpu_time)
# print('GPU (s):')
# gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
# print(gpu_time)
# print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

# sess.close()


In [None]:
# from tensorflow.python.client import device_lib 
# print(device_lib.list_local_devices())

In [None]:
# import tensorflow as tf
# #print(dir(tf.feature_column))
# print(tf.__version__)