# Colab Preparation

In [0]:
!git clone https://github.com/MikeCun/PersonReID.git
!mv PersonReID/* ./
!pip3 install -r requirements.txt
!python3 setup.py install

# Person ReID Demo

A quick intro to using the pre-trained model of Mask RCNN and EANET to re-idefication the person.

In [0]:
import os
import sys

# Root directory of the project
ROOT_DIR = os.path.abspath("./")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
# Import COCO config
from mrcnn import coco

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

# Configurations, Create Model and Load Trained Weights

In [0]:
class InferenceConfig(coco.CocoConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()

# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)

class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
               'bus', 'train', 'truck', 'boat', 'traffic light',
               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
               'kite', 'baseball bat', 'baseball glove', 'skateboard',
               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
               'teddy bear', 'hair drier', 'toothbrush']

# Run Person Detection and Store the feature

we use the Mask RCNN to detect the person in every frame.Then using the EANET to extract the feature of them and storing in 'feature_file.h5' by LSH.

In [0]:
import os
import cv2
import sys
import h5py
import torch
import pickle
import warnings
import progressbar
import numpy as np
from easydict import EasyDict
from functools import partial
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

%matplotlib inline

from package.eval.np_distance import compute_dist
from package.optim.eanet_trainer import EANetTrainer
from face_recognition.api import compare_faces as face_com
from face_recognition.api import face_locations as face_loc
from face_recognition.api import face_encodings as face_enc
from face_recognition.api import load_image_file as face_load

# Prepare load model because of encoding method of the EANET model
pickle.load = partial(pickle.load, encoding="latin1")
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
warnings.filterwarnings('ignore')

# Prepare the EANET
args = EasyDict()
args.exp_dir = './' #EANET Model Path
args.cfg_file = 'package/config/default.py'
args.ow_file = 'paper_configs/PCB.txt'
args.ow_str = "cfg.only_infer = True"

eanet_trainer = EANetTrainer(args=args)

# Set the workpath
general_workpath = os.path.join(os.getcwd(), 'reid_result/') # General workpath
workvideo = os.path.join(os.getcwd(), "test_2.avi")
videoname = os.path.splitext(os.path.split(workvideo)[1])[0]

# Move to videoname file
newworkpath = os.path.join(general_workpath, videoname) + '/'
if not os.path.exists(newworkpath):
    os.makedirs(os.path.join(newworkpath))

# Open the video and get the total frame
capture = cv2.VideoCapture(workvideo)
total_frame = int(capture.get(7)) + 1

# Initial the some variable
p = progressbar.ProgressBar() # Show the exacting feature progress
p.start(total_frame)
is_stopped = True
# GPUs can speed the progress of exacting face feature
cuda = torch.cuda.is_available()

# Create the dictionary to store the feature and their path
body_feature_dict = {'path':[], 'feat':[]}
face_feature_dict = {'path':[], 'feat':[]}

# Default frequence is 1 sec
fps = int(capture.get(5))
frame_freq = fps
# frame_freq = 8

def frame_to_time(frame_now, fps=fps):
    secs = frame_now // fps
    s, ms = divmod(frame_now, fps)
    m, s = divmod(secs, 60)
    h, m = divmod(m, 60)
    time_stamp = ("%02d-%02d-%02d-%03d" % (h, m, s, ms))
    return time_stamp

while is_stopped:
    
    # Load the video frame and decide when the video is end by 'is_stopped'
    is_stopped, frame = capture.read()
    frame_now = int(capture.get(1))
    if (frame_now % frame_freq == 0 and is_stopped):
        
        results = model.detect([frame], verbose=0) # Detection
        r = results[0]
        # Visualization the detection result of the frame
        # visualize.display_instances(frame, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
        
        for i in range(r['rois'].shape[0]):
            # Get every pedestrian in the frame
            if not np.any(r['rois'][i]):
                continue
            
            # Get the pedestrian in the frame and possibility depending the detection result is greater than 0.95
            if r['class_ids'][i] == 1 and r['scores'][i] >= 0.95:
                y1, x1, y2, x2 = r['rois'][i] # Get the target area of pedestrian
                
                # Define the pic name of frame using time stamp in video and serial number in the frame detection result
                pic_name = frame_to_time(frame_now) + '_' + str(i)
                img_path = newworkpath + pic_name + '.jpg'
                
                # Store every pedestrian target area
                if not os.path.exists(img_path):
                    cv2.imwrite(img_path, frame[y1: y2, x1: x2])
                
                # Exact the feature of pedestrain body
                body_feature = eanet_trainer.infer_one_im(im_path=img_path, squeeze=True)['feat']
                body_feature_dict['path'].append(img_path.encode()) # h5py only supports ASCII in string
                body_feature_dict['feat'].append(body_feature)      # encode() when is stored, decode() when for using
                
                # Using the face feature by Face Recognition API
                face_np = face_load(img_path)
                # If you don't have the GPUs, you can remove the 'model' params then it will use 'hog' instead of 'cnn'
                face_location = face_loc(face_np, model="cnn" if cuda else None)
                if face_location: # Sometimes can't detect the face in the frame
                    # Exact the face feature of pedestrian
                    face_feature = face_enc(face_np, known_face_locations=face_location)[0]
                    face_feature_dict['path'].append(img_path.encode()) # h5py only supports ASCII in string
                    face_feature_dict['feat'].append(face_feature)      # encode() when is stored, decode() when for using
                    
    p.update(frame_now) # Update the progress
    
# Store the dictionary including the path and feature using h5
with h5py.File(newworkpath + videoname + "_body_feature_data.h5", "w") as body_feature_file:
    body_feature_file.create_dataset(videoname + '_path_data', data=body_feature_dict['path'])
    body_feature_file.create_dataset(videoname + '_feat_data', data=body_feature_dict['feat'])

with h5py.File(newworkpath + videoname + "_face_feature_data.h5", "w") as face_feature_file:
    face_feature_file.create_dataset(videoname + '_path_data', data=face_feature_dict['path'])
    face_feature_file.create_dataset(videoname + '_feat_data', data=face_feature_dict['feat'])
    
# Close the file and capture
body_feature_file.close()
face_feature_file.close()
capture.release()
p.finish()

# Person Re-Identification
we use lsh to search the person which has been extracted the feature and stored in file.

In [0]:
# Set the ReID picture path
reidpicpath = './reid_test_1.png'
reid_image = cv2.imread(reidpicpath, cv2.IMREAD_COLOR)

# Set the ReID workpath
reidvideo_path = '/content/test_2.avi'
videoname = os.path.splitext(os.path.split(reidvideo_path)[1])[0]
if os.path.join(general_workpath, videoname):
    reidworkpath = os.path.join(general_workpath, videoname) + '/'
else: # If there isn't a feature file of the video
    print("The %s video hasn't exacted the feature!") % (videoname)
    sys.exit(0)

# Get the ReID picture name
reidname = os.path.splitext(os.path.split(reidpicpath)[1])[0]

# Detection
reid_results = model.detect([reid_image], verbose=0)
reid_r = reid_results[0]

# Create the dictionary of ReID result including every detected pedestrian respectively
reid_dict = {'reid_path':[], 'reid_rank':[]}
reid_body_feature_dict = {'reid_path':[], 'reid_rank':[]}
reid_face_feature_dict = {'reid_path':[], 'reid_rank':[]}


# Visualization the detection result of ReID picture
# visualize.display_instances(reid_image, reid_r['rois'], reid_r['masks'], reid_r['class_ids'], class_names, reid_r['scores'])
warnings.filterwarnings('ignore')

# Open the stored body and face feature respectively in the directory
with h5py.File(reidworkpath + videoname + "_body_feature_data.h5", "r") as reid_body_feature_file:
    reid_body_feature_dict['reid_path'] = reid_body_feature_file[videoname + '_path_data'].value
    reid_body_feature_dict['reid_feat'] = reid_body_feature_file[videoname + '_feat_data'].value
    
with h5py.File(reidworkpath + videoname + "_face_feature_data.h5", "r") as reid_face_feature_file:
    reid_face_feature_dict['reid_path'] = reid_face_feature_file[videoname + '_path_data'].value
    reid_face_feature_dict['reid_feat'] = reid_face_feature_file[videoname + '_feat_data'].value
    
for i in range(reid_r['rois'].shape[0]):
    # Progress every pedestrian in the ReID picture
    if not np.any(reid_r['rois'][i]):
        continue
    
    # Get the pedestrian in the ReID picture and possibility depending the detection result is greater than 0.95
    if reid_r['class_ids'][i] == 1 and reid_r['scores'][i] >= 0.95:
        y1, x1, y2, x2 = reid_r['rois'][i] # Get the target area of pedestrian
        
        # Define the pic name of ReID picture using serial number in the detection result
        reid_pic_name = str(reidname) + '_' + str(i)
        reid_img_path = reidworkpath + reid_pic_name + '.jpg'
        reid_temp_list = [] # Using for adding the ReID result
        
        # Store every pedestrian target area
        if not os.path.exists(reid_img_path):
            cv2.imwrite(reid_img_path, reid_image[y1: y2, x1: x2])
        
        # Exact the feature of pedestrain body
        reid_body_feature = eanet_trainer.infer_one_im(im_path=reid_img_path, squeeze=False)['feat']
        # Using the cosine distence as indicator to distinguish the different pedestrian
        # cosine_distance is a list of cosine distance among the ReID body feature and body feature which is stored in the directory
        cosine_distance = compute_dist(reid_body_feature, reid_body_feature_dict['reid_feat'])[0]
        # Distinguished distance as a threshold value can distinguish between two pedestrians if they are same one
        distinguished_distance = 2 * np.mean(cosine_distance) - np.max(cosine_distance)
        
        for j in range(len(cosine_distance)):
            if cosine_distance[j] <= distinguished_distance:
                # Add the same pedestrians result by body disinguishing
                reid_temp_list.append(reid_body_feature_dict['reid_path'][j])
        
        # Exact the feature of pedestrain face if he has
        reid_face_np = face_load(reid_img_path)
        reid_face_location = face_loc(reid_face_np, model="cnn" if cuda else None)
        if reid_face_location:
            reid_face_feature = face_enc(reid_face_np, known_face_locations=reid_face_location)[0]
            # reid_face_result is a list just including 'True' and 'False'
            reid_face_result = face_com(reid_face_feature_dict['reid_feat'], reid_face_feature, tolerance=0.58)
            
            for k in range(len(reid_face_result)):
                if reid_face_result[k]:
                    # Add the same pedestrians result by face recognition
                    reid_temp_list.append(reid_face_feature_dict['reid_path'][k])
        
        reid_dict['reid_path'].append(reid_img_path.encode()) # Add the picture path of ReID result
        
        if len(reid_temp_list) == 0: # If ReID fails, there is another picture to show the result
            reid_dict['reid_rank'].append([(os.getcwd() + '/fail.jpg').encode()]) # h5py only supports ASCII in string
        else:
            reid_dict['reid_rank'].append(list(set(reid_temp_list))) # 'set' is used for removing repeat items
            
# Store the ReID list result for Visualization
reid_result_file_path = reidworkpath + videoname + "_" + reidname + "_reid_result.h5"
with h5py.File(reid_result_file_path, "w") as reid_result_file:
    reid_result_file.create_dataset(reidname + '_reid_path', data=reid_dict['reid_path'])
    reid_result_file.create_dataset(reidname + '_reid_rank', data=reid_dict['reid_rank'])
    
# Store the ReID result record for Visualization
with h5py.File(general_workpath + "reid_record.h5", "a") as reid_record:
    reid_record.create_dataset(videoname + "-" + reidpicpath, data=reid_result_file_path)
    
# Close the file which is opened
reid_body_feature_file.close()
reid_face_feature_file.close()
reid_result_file.close()
reid_record.close()

# Visualization ReID Result

In [0]:
# Set the ReID picture and ReID result path for visualization
visualize_reid_pic_path = './reid_test_1.png' # The Path can't be changed after ReID queries
visualize_reid_video_path = os.path.join(os.getcwd(), "test_2.avi")
reidvideoname = os.path.splitext(os.path.split(visualize_reid_video_path)[1])[0]
vis_picname = os.path.splitext(os.path.split(visualize_reid_pic_path)[1])[0]
import matplotlib.image as mpimg
# Query the visualize_reid_pic_path in the stored reid_record to load the ReID result
with h5py.File(general_workpath + "reid_record.h5", "r") as reid_record:
    reid_result_file_path = reid_record[reidvideoname + "-" + visualize_reid_pic_path].value

# Open the stored ReID list result in the directory
reid_vis_dict = {'reid_path':[], 'reid_rank':[]}
with h5py.File(reid_result_file_path, "r") as reid_vis_dict_file:
    reid_vis_dict['reid_path'] = reid_vis_dict_file[vis_picname + '_reid_path'].value
    reid_vis_dict['reid_rank'] = reid_vis_dict_file[vis_picname + '_reid_rank'].value
    
for i in range(len(reid_vis_dict['reid_path'])):
    
    plt.title("Query Picture: " + vis_picname)
    plt.imshow(mpimg.imread(reid_vis_dict['reid_path'][i].decode()))
    plt.show()
    
    for j in range(len(reid_vis_dict['reid_rank'][i])):
        reid_pic_path = reid_vis_dict['reid_rank'][i][j].decode()
        reid_picname = os.path.splitext(os.path.split(reid_pic_path)[1])[0]   
        plt.title(reid_picname)
        plt.imshow(mpimg.imread(reid_pic_path))
        plt.show()