In [None]:
!wget -c http://images.cocodataset.org/annotations/annotations_trainval2017.zip   # downloading annotations for train and validation 2017 data

In [None]:
!wget -c http://images.cocodataset.org/annotations.zip  # downloading annotations

In [None]:
!unzip -o annotations_trainval2017.zip  # unzipping annotations for train and validation 2017 data

In [None]:
pwd   # checking the directory

In [None]:
tf.keras.backend.clear_session()   # clearing previous session

In [None]:
# importing libraries
import os
import json
import math
import tensorflow as tf

import cv2
import numpy as np
import pandas as pd
from keras import layers, optimizers
#from keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import load_model
import time

In [None]:
image_files = '/home/image_training/val2017'    # image file directory
annotation_json_files = '/home/image_training/annotations'   # annotation file directory 

In [None]:
with open(os.path.join(annotation_json_files,"person_keypoints_val2017.json")) as f:
    j=json.load(f)      # joining annotation directory with the person keypoint directory then loading

In [None]:
def bound(low, high, value):            # bounding value between low and high
    return min(high, max(low, value))

In [None]:
length = len(j["annotations"])     # length of the image dataset

In [None]:
new_dim = 256         # considering image dimension (256, 256, 3)
images = np.zeros((length, new_dim, new_dim, 3))     # initialization of image dataset
box = np.zeros((length, 4))      # initialization of bounding box dataset

In [None]:
# creating image array with corresponding bounding box array as dataset
for i in range(len(j["annotations"])):
    annotation_index = i # in range [0:len(j["annotations"]) )  # extracting annotation index
    image_id=j["annotations"][annotation_index]["image_id"]   # extracting image id
    image_index=[ind for ind in range(len(j["images"])) if j["images"][ind]["id"]==image_id][0]   # extracting image index
    im=cv2.imread(os.path.join(image_files,j["images"][image_index]["file_name"]))  # extracting image
    h = im.shape[0] # extracting height of the image
    w = im.shape[1] # extracting width of the image
    crop = min(h,w)  # determining minimum of height and width 
    imgc = im[0:crop, 0:crop]  # croping the image by minimum of height or width
    imgr = cv2.resize(imgc, (new_dim,new_dim))  # resizing the image by (256,256) dimension
    bbox=np.array(j["annotations"][annotation_index]["bbox"],dtype="int")  # extracting bounding box
    bbox[2:4] = bbox[0:2] + bbox[2:4]  # creating top left and bottom right point
    bboxr = bbox * (new_dim/crop)  #  resizing bounding box to (256,256) dimension
    bboxr = [(e / 256) for e in bboxr] # normalization of bounding box
    bboxrn = [bound(0, 1, b) for b in bboxr]   # bounding values
    images[i,:,:,:] = imgr / 255   # normalization of image pixels
    box[i,:] = bboxrn 
    #cv2.rectangle(im,bbox[0:2],bbox[0:2]+bbox[2:4],(0,255,0),3)

In [None]:
images.shape   # verifying the image shape

In [None]:
mini = 32  # images in each batch
batch = 343  #  number of batches
epoches = 300  # number of epoches

In [None]:
#  human detection model training
tic = time.time()     #start time
# training process
for j in range(0, epoches):    # for loop for epoches
    for i in range(0, batch):        # for loop for batches
        start = i*mini    #   starting index for each batch 
        end = (i + 1)*mini   # ending index for each batch 
        images_batch = images[start:end,:,:,:]   # slicing images for each batch 
        box_batch = box[start:end,:]     # slicing bounding boxes for each batch 
        print("No of epochs running = " + str(j + 1) + " and No of batch running = " + str(i + 1))
    
        if i == 0 and j == 0:           # # for 1st iteration initialization of the model
            model_h = tf.keras.models.Sequential([
                tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(new_dim, new_dim, 3)),
                tf.keras.layers.MaxPooling2D(2,2),
                tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
                tf.keras.layers.MaxPooling2D(2,2), 
                tf.keras.layers.Conv2D(64, (3,3), activation='relu'), 
                tf.keras.layers.MaxPooling2D(2,2),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(256, activation='relu'), 
                tf.keras.layers.Dense(128, activation='relu'),
                tf.keras.layers.Dense(64, activation='relu'),
                tf.keras.layers.Dense(32, activation='relu'),
                tf.keras.layers.Dense(4, activation='sigmoid') 
                ])
            model_h.summary()        # model structure
            model_h.compile(                   # setting optimizer, loss function and metrics
                            optimizer='sgd',
                            loss='mse',
                            metrics=[tf.keras.metrics.MeanSquaredError()])

            history = model_h.fit(x = images_batch,     #   running the model for 1st iteration
                                  y = box_batch,
                                  steps_per_epoch=1,
                                  epochs=1,
                                  verbose=1)
        
            model_h.save('human_detection_model')                  #  saving the model
            model_h.save_weights('human_detection_model_weights.h5')     #   saving the weights
        
        else:
            model_h = load_model('human_detection_model')              #   loading the model
            model_h.load_weights('human_detection_model_weights.h5')    #  loading the weights
            history = model_h.fit(  x = images_batch,        #  running the model for rest of the images
                                  y = box_batch,
                                  steps_per_epoch=1,
                                  epochs=1,
                                  verbose=1)
            model_h.save('human_detection_model')                         #  saving the model
            model_h.save_weights('human_detection_model_weights.h5')      # saving the weights
        
toc = time.time()  #  end time 
print(" Time elapsed = " + str((toc - tic)/60) + "minutes")   # time elapsed

In [None]:
model_h = load_model('human_detection_model')        # loading the model
model_h.load_weights('human_detection_model_weights.h5')    # loading the weights

In [None]:
# training accuracy

begin = 1000   # starting index for evaluating training accuracy
final = 2000   # ending index for evaluating training accuracy
num = 850      # index which is used for visualization of training accuracy
preds = model_h.predict(images[begin:final])      # prediction for images starting from begin to final
preds1 = preds[num]        # taking specific index for visualization
print(preds1)
preds1 = preds1*256              # converting prediction bounding box from normalized to original value
box_pred = preds1.astype(int)    # converting the values to integer
print(box_pred)
box_true = box[begin + num] * 256      # converting true bounding box from normalized to original value
box_true = box_true.astype(int)        # converting the values to integer
print(box_true)
imaging = images[begin + num] * 255    # converting images from normalized to original value
iming = imaging.astype(np.uint8)       # converting the values to integer
cv2.rectangle(iming,box_pred[0:2],box_pred[2:4],(0,0,255),2)  # showing the images along with predicted bounding box
cv2.rectangle(iming,box_true[0:2],box_true[2:4],(0,255,0),2)  # showing the images along with true bounding box
cv2.imshow("Output", iming)
while(True):
    k = cv2.waitKey(33)
    if k == -1:  # if no key was pressed, -1 is returned
        continue
    else:
        break
cv2.destroyAllWindows()

In [None]:
# testing accuracy

begin = 100             # starting index for evaluating testing accuracy
final = 200             # ending index for evaluating testing accuracy
num = 50                # index which is used for visualization of testing accuracy
preds = model_h.predict(images[begin:final])   # prediction for images starting from begin to final
preds1 = preds[num]    #   taking specific index for visualization
print(preds1)
preds1 = preds1*256     # converting prediction bounding box from normalized to original value
box_pred = preds1.astype(int)   # converting the values to integer
print(box_pred)
print(box[begin + num] * 256)    
imaging = images[begin + num] * 255   # converting images from normalized to original value
iming = imaging.astype(np.uint8)      # converting the values to integer
cv2.imshow("input", iming)            # showing the images along with predicted bounding box
while(True):
    k = cv2.waitKey(33)
    if k == -1:  # if no key was pressed, -1 is returned
        continue
    else:
        break
cv2.destroyAllWindows()