In [None]:
# This code is part of the solution for face and upper torso recognition from image.
# It is implemented on a Kaggle environment and the model is trained with a GPU processor.
# In this code, the model is trained and tested.
# Created by Dusan Radivojevic 26.02.2022.

In [1]:
# installation of necessary modules with appropriate versions

pip install pixellib==0.7.0

In [2]:
pip install scikit-image==0.15.0

In [3]:
pip install labelme2coco==0.1.2

In [4]:
pip install tensorflow==2.4.0

In [10]:
# get appropriate pretrained model

!wget "https://github.com/ayoolaolafenwa/PixelLib/releases/download/1.2/mask_rcnn_coco.h5"

In [11]:
# copy database to output directory that allows read and write

from distutils.dir_util import copy_tree
from_dir = "../input/face-database"
to_dir = "./"
copy_tree(from_dir,to_dir)

In [12]:
# Visualize a sample image before training

import pixellib
from pixellib.custom_train import instance_custom_training

vis_img = instance_custom_training()
vis_img.load_dataset("./Face_and_torso_database")
vis_img.visualize_sample()

In [13]:
# preparation of model for training

train_maskrcnn = instance_custom_training()
train_maskrcnn.modelConfig(network_backbone = "resnet101", num_classes= 2, batch_size = 2,gpu_count=1)
train_maskrcnn.load_pretrained_model("./mask_rcnn_coco.h5")
train_maskrcnn.load_dataset("./Face_and_torso_database")

In [14]:
# train model

train_maskrcnn.train_model(num_epochs = 100, augmentation=True,  path_trained_models = "mask_rcnn_models",layers="all")
# output of the best epoch:
# Epoch 69/100
# 100/100 [==============================] - 60s 602ms/step - batch: 49.5000 - size: 2.0000 - loss: 0.2061 - rpn_class_loss: 0.0020 - rpn_bbox_loss: 0.0306 - mrcnn_class_loss: 0.0161 - mrcnn_bbox_loss: 0.0317 - mrcnn_mask_loss: 0.1257 - val_loss: 0.3219 - val_rpn_class_loss: 0.0074 - val_rpn_bbox_loss: 0.0594 - val_mrcnn_class_loss: 0.0215 - val_mrcnn_bbox_loss: 0.0729 - val_mrcnn_mask_loss: 0.1607

In [19]:
# visualization of model prediction on one test image

from pixellib.instance import custom_segmentation
from PIL import Image
import matplotlib.pyplot as plt
segment_image = custom_segmentation()
segment_image.inferConfig(num_classes= 2, class_names= ["BG","face","upper_torso"], network_backbone = "resnet101", detection_threshold = 0.5, image_max_dim = 512, image_min_dim = 512)
segment_image.load_model("./mask_rcnn_models/mask_rcnn_model.069-0.321938.h5")
im_path = '../input/face-database/Face_and_torso_database/test/Alex_Popov_0001.jpg'
segment_image.segmentImage(im_path, show_bboxes=True, output_image_name="./Out_img.jpg")
im = Image.open(r"./Out_img.jpg")
plt.imshow(im)

In [15]:
# evaluation of model prediction

train_maskrcnn1 = instance_custom_training()
train_maskrcnn1.modelConfig(network_backbone = "resnet101", num_classes= 2)
train_maskrcnn1.load_dataset("./Face_and_torso_database")
train_maskrcnn1.evaluate_model("./mask_rcnn_models/mask_rcnn_model.069-0.321938.h5",0.5)
# output:
#        Using resnet101 as network backbone For Mask R-CNN model
#        ./mask_rcnn_models/mask_rcnn_model.069-0.321938.h5 evaluation using iou_threshold 0.5 is 0.824917 