In [11]:
import numpy as np
import imutils
import time
import cv2
import os
import pandas as pd
from collections import OrderedDict
from scipy.spatial import distance as dist



In [12]:
FRAME = "frame"
IN = "C:/Users/DELL/Desktop/frames/"

In [13]:
class CentroidTracker():
    def __init__(self, maxDisappeared=50):
        self.nextObjectID = 0
        self.objects = OrderedDict()
        self.disappeared = OrderedDict()
        self.maxDisappeared = maxDisappeared

    def register(self, centroid):

        self.objects[self.nextObjectID] = centroid
        self.disappeared[self.nextObjectID] = 0
        self.nextObjectID += 1

    def deregister(self, objectID):
        del self.objects[objectID]
        del self.disappeared[objectID]

    def update(self, rects):
        if len(rects) == 0:
            for objectID in list(self.disappeared.keys()):
                self.disappeared[objectID] += 1
                if self.disappeared[objectID] > self.maxDisappeared:
                    self.deregister(objectID)


            return self.objects

        inputCentroids = np.zeros((len(rects), 2), dtype="int")
        boxMapping = {}
        for (i, (startX, startY, endX, endY)) in enumerate(rects):
            cX = int((startX + endX) / 2.0)
            cY = int((startY + endY) / 2.0)
            inputCentroids[i] = (cX, cY)
            boxMapping[tuple(inputCentroids[i])] = i


        if len(self.objects) == 0:
            for i in range(0, len(inputCentroids)):
                self.register(inputCentroids[i])

        else:
            objectIDs = list(self.objects.keys())
            objectCentroids = list(self.objects.values())
            D = dist.cdist(np.array(objectCentroids), inputCentroids)


            rows = D.min(axis=1).argsort()


            cols = D.argmin(axis=1)[rows]


            usedRows = set()
            usedCols = set()


            for (row, col) in zip(rows, cols):

                if row in usedRows or col in usedCols:
                    continue


                objectID = objectIDs[row]
                self.objects[objectID] = inputCentroids[col]
                self.disappeared[objectID] = 0


                usedRows.add(row)
                usedCols.add(col)


            unusedRows = set(range(0, D.shape[0])).difference(usedRows)
            unusedCols = set(range(0, D.shape[1])).difference(usedCols)


            if D.shape[0] >= D.shape[1]:
                for row in unusedRows:

                    objectID = objectIDs[row]
                    self.disappeared[objectID] += 1


                    if self.disappeared[objectID] > self.maxDisappeared:
                        self.deregister(objectID)


            else:
                for col in unusedCols:
                    self.register(inputCentroids[col])

        return (self.objects, boxMapping)

In [14]:
mask_rcnn  = os.path.join('mask-rcnn-coco/')
confidence =0.5
threshold = 0.3
video   = os.path.join('video.mp4')
# out  = os.path.join('C:/Users/DELL/Desktop/PFE/seg_action3.mp4')

In [15]:
# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([mask_rcnn,"object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")


In [16]:
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),dtype="uint8")

In [17]:
# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([mask_rcnn,"frozen_inference_graph.pb"])
configPath = os.path.sep.join([mask_rcnn,"mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])

In [18]:
# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)
ct = CentroidTracker()

[INFO] loading Mask R-CNN from disk...


In [19]:
vs = cv2.VideoCapture(video)
writer = None
# try to determine the total number of frames in the video file
try:
    prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
        else cv2.CAP_PROP_FRAME_COUNT
    total = int(vs.get(prop))
    print("[INFO] {} total frames in video".format(total))

# an error occurred while trying to determine the total
# number of frames in the video file
except:
    print("[INFO] could not determine # of frames in video")
    total = -1


[INFO] 475 total frames in video


In [20]:
frames = []
poly = []
j = 0
pixel_person = pd.DataFrame({'frame_Id':[], 'person_id':[],'pixel': [], 'confidence': []})

while True:
    (grabbed, frame) = vs.read()
    if not grabbed:
        break
    blob = cv2.dnn.blobFromImage(frame, swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    (boxes, masks) = net.forward(["detection_out_final","detection_masks"])
    end = time.time()
#     frame = np.multiply(frame,0)
    rects = []
    classIds = []
    for i in range(0, boxes.shape[2]):
        classID = int(boxes[0, 0, i, 1])
        confidence = boxes[0, 0, i, 2]
        if confidence > 0.5:
            (H, W) = frame.shape[:2]
            box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
            rects.append(box.astype("int"))
            classIds.append(classID)
            
            

            
    # track objects found 
    objects, boxMapping = ct.update(rects)
    for (objectID, centroid) in objects.items():
        # find the rect and class id of the current object 
        try :
            (startX, startY, endX, endY) = rects[boxMapping[tuple(centroid)]]
            classID = classIds[boxMapping[tuple(centroid)]]
        except:
            continue
            
        boxW = endX - startX
        boxH = endY - startY
        mask = masks[i, classID]
        mask = cv2.resize(mask, (boxW, boxH),interpolation=cv2.INTER_NEAREST)
        mask = (mask > 0.3)
        roi = frame[startY:endY, startX:endX][mask]

        text = "ID {}".format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
#       frames.append(frame)

        color = COLORS[objectID] 
        blended = ((0.4 * color) + (0.6* roi)).astype("uint8")
#         blended = ((1*color)+(0*roi)).astype('uint8')
        frame[startY:endY, startX:endX][mask] = blended
        
#         frames.append(frame)
        color = [int(c) for c in color]
#         cv2.rectangle(frame, (startX, startY), (endX, endY),color, 2)
#         text = "{}: {:.4f}".format(LABELS[classID], confidence)
#         cv2.putText(frame, text, (startX, startY - 5),cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        if LABELS[classID]=='person':
            pixel_person = pixel_person.append({'frame_Id':j,'person_id':objectID,
                                'pixel': color,
                                'confidence':confidence},
                                ignore_index=True)

            
#             pixel_person.append(color)
    if total > 0:
        elap = (end - start)
        print("[INFO] single frame took {:.4f} seconds".format(elap))
        print("[INFO] estimated total time to finish: {:.4f}".format(elap * total))
    
    
    
    im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    
    
    cv2.imwrite("C:/Users/DELL/Desktop/PFE/instance_segmentation/mask-rcnn/frames/frame%d.jpg" % j,im_rgb)

    cv2.imshow("show", frame)
    
    j = j+1
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break
print("[INFO] cleaning up...")
vs.release()

[INFO] single frame took 5.2096 seconds
[INFO] estimated total time to finish: 2474.5404
[INFO] single frame took 5.2690 seconds
[INFO] estimated total time to finish: 2502.7798
[INFO] single frame took 5.2005 seconds
[INFO] estimated total time to finish: 2470.2348
[INFO] single frame took 4.8573 seconds
[INFO] estimated total time to finish: 2307.2347


KeyboardInterrupt: 