<a href="https://colab.research.google.com/github/ChikinH/DeepLearning/blob/main/OpenCVPeopleCounter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Data mining project done by Cousin Antoine during master's degree.
People detection then tracking, to count them in many ways.

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


Creation of trackable object class.
Three fields:
- object id
- counted boolean
- list of centroids (center of bounding box)

In [None]:
class TrackableObject():
  def __init__(self, id, centroid):
    self.id = id
    self.counted = False
    self.centroids = [centroid]

Now we need a centroid tracker.  
It need to know how many frames bedore we consider that an object disappeared.  
It also need to store object present on video stream, and those that disappeared less than frameBeforeDisappered frames.

In [None]:
from scipy.spatial import distance as dist
from collections import OrderedDict
import numpy as np

class CentroidTracker():
  def __init__(self, frameBeforeDisappeared=100):
    self.nextID=0
    self.objects = OrderedDict()
    self.disappeared = OrderedDict()
    self.frameBeforeDisappeared = frameBeforeDisappeared
  
  def register(self, centroid):
    self.objects[self.nextID] = centroid
    self.disappeared[self.nextID] = 0
    self.nextID += 1
  
  def deregister(self, id):
    del self.objects[id]
    del self.disappeared[id]

  def update(self, rects):
  # rects are bounding box given by an object detector
    if len(rects)==0:
      # no more object on image
      for id in list(self.disappeared.keys()):
        self.disappeared[id] += 1
        if self.disappeared[id] > self.frameBeforeDisappeared:
          self.deregister(id)
      return self.objects
    # compute centroids from rectangles
    inputCentroids = np.zeros((len(rects),2),dtype="int")
    for (i,(startX,startY,endX,endY)) in enumerate(rects):
      cX = int((startX+endX)/2)
      cY = int((startY+endY)/2)
      inputCentroids[i] =(cX,cY)

    # if no object is currently tracked, register all centroids
    if len(self.objects) == 0:
      for i in range(0, len(inputCentroids)):
        self.register(inputCentroids[i])

    # otherwise we match existing centroid, or register new ones
    else:
      objectIDs = list(self.objects.keys())
      objectCentroids = list(self.objects.values())

      # compute euclidian distance between pair of existing centroid and input ones, then sorting the distances
      D = dist.cdist(np.array(objectCentroids), inputCentroids)
      rows = D.min(axis=1).argsort()
      cols = D.argmin(axis=1)[rows]

      # use the distance to see if we can match objects
      usedRows = set()
      usedCols = set()

      for (row,col) in zip(rows,cols):
        if row in usedRows or col in usedCols:
          continue
        
        # update existing centroid
        objectID = objectIDs[row]
        self.objects[objectID] = inputCentroids[col]
        self.disappeared[objectID] = 0

        usedRows.add(row)
        usedCols.add(col)
      
      # compute rows and col not used
      unusedRows = set(range(0, D.shape[0])).difference(usedRows)
      unusedCols = set(range(0, D.shape[1])).difference(usedCols)

      # if there is more centroid existing than inputed ones, we check the ones missing
      if D.shape[0] >= D.shape[1]:
        for row in unusedRows:
          objectID = objectIDs[row]
          self.disappeared[objectID] += 1
          if self.disappeared[objectID] > self.frameBeforeDisappeared:
            self.deregister(objectID)
      # else we need to register new centroid
      else:
        for col in unusedCols:
          self.register(inputCentroids[col])
    return self.objects

Below, we implement our people counter.

In [None]:
import argparse
import cv2
import dlib
import imutils
import time
import pandas as pd
from imutils.video import VideoStream
from imutils.video import FPS
from google.colab.patches import cv2_imshow

We load a pre-trained model able to detect few classes.

In [None]:
# code for mobilenet ssd (unused)

#CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
#	"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
#	"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
#	"sofa", "train", "tvmonitor"]

# net = cv2.dnn.readNetFromCaffe("/content/drive/MyDrive/Colab Notebooks/DataMining/mobilenet_ssd/MobileNetSSD_deploy.prototxt.txt", "/content/drive/MyDrive/Colab Notebooks/DataMining/mobilenet_ssd/MobileNetSSD_deploy.caffemodel")

# code for yolo

labelsPath = "/content/drive/MyDrive/Colab Notebooks/DataMining/yolo/coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
	dtype="uint8")

configPath = "/content/drive/MyDrive/Colab Notebooks/DataMining/yolo/yolov3.cfg"
weightsPath = "/content/drive/MyDrive/Colab Notebooks/DataMining/yolo/yolov3.weights"

net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [None]:
def my_tracker(input, output, target="person", display=False):
  dataframe = pd.DataFrame(columns=["id","frame", "direction"])

  vs = cv2.VideoCapture(input)

  frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))

  writer = None

  W = None
  H = None

  ct = CentroidTracker(frameBeforeDisappeared=50)
  trackers = []
  trackableObjects = {}

  totalFrames = 0
  totalDown = 0
  totalUp = 0

  fps = FPS().start()

  while True:
    frame = vs.read()
    frame = frame[1]
    if totalFrames == frames:
      break

    frame = imutils.resize(frame, width=500)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    if W is None or H is None:
      (H,W) = frame.shape[:2]
    if writer is None:
      fourcc = cv2.VideoWriter_fourcc(*"MJPG")
      writer = cv2.VideoWriter(output,fourcc, 30, (W,H), True)

    status = "Waiting"
    rects = []

    if totalFrames % 30 == 0:
      status = "Detecting"
      trackers = []

      blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),swapRB=True, crop=False)
      net.setInput(blob)
      start = time.time()
      layerOutput = net.forward(ln)
      end = time.time()

      for output in layerOutput:
        for detection in output:
          scores = detection[5:]
          classID = np.argmax(scores)
          confidence = scores[classID]

          if LABELS[classID] != target:
            continue

          if confidence > 0.8:
            box = detection[0:4] * np.array([W, H, W, H])
            (centerX, centerY, width, height) = box.astype("int")
            startX = int(centerX - (width / 2))
            startY = int(centerY - (height / 2))
            endX = int(centerX + (width / 2))
            endY = int(centerY + (height / 2))

            tracker = dlib.correlation_tracker()
            rect = dlib.rectangle(startX, startY, endX, endY)
            rects.append((startX, startY, endX, endY))
            tracker.start_track(rgb, rect)

            trackers.append(tracker)

    else:
      for tracker in trackers:
        status = "Tracking"
        tracker.update(rgb)
        pos = tracker.get_position()
        startX = int(pos.left())
        startY = int(pos.top())
        endX = int(pos.right())
        endY = int(pos.bottom())
        rects.append((startX, startY, endX, endY))
      
    cv2.line(frame, (0,H//2), (W,H//2),(0,255,255),2)
    objects = ct.update(rects)
    for (objectID, centroid) in objects.items():
      to = trackableObjects.get(objectID,None)

      if to is None:
        to = TrackableObject(objectID, centroid)

      else:
        y=[c[1] for c in to.centroids]
        direction = centroid[1] - np.mean(y)
        to.centroids.append(centroid)

        if not to.counted:
          if direction < 0 and centroid[1] < H//2:
            totalUp += 1
            to.counted = True
            dataframe = dataframe.append(pd.DataFrame([[objectID, totalFrames, 0]], columns=["id","frame", "direction"]), ignore_index=True)
            
          elif direction > 0 and centroid[1] > H//2:
            totalDown += 1
            to.counted = True
            dataframe = dataframe.append(pd.DataFrame([[objectID, totalFrames, 1]], columns=["id","frame", "direction"]), ignore_index=True)
      trackableObjects[objectID] = to
          
      text = "ID {}".format(objectID)
      cv2.putText(frame, text, (centroid[0]-10, centroid[1]-10), cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,255,0),2)
      cv2.circle(frame,(centroid[0],centroid[1]),4,(0,255,0),-1)

    info = [("Up",totalUp),("Down",totalDown),("Status",status)]
    for (i, (k,v)) in enumerate(info):
      text = "{} : {}".format(k,v)
      cv2.putText(frame, text, (10,H-((i*20)+20)), cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,0,255),2)
    writer.write(frame)

    if display and totalFrames % 15 == 0:
      cv2_imshow(frame)

    totalFrames += 1
    fps.update()
  return dataframe

In [None]:
# count on a video, return a dataframe with all the object counted
data = my_tracker("/content/drive/MyDrive/Colab Notebooks/DataMining/Datas/carhighway.mp4","/content/drive/MyDrive/Colab Notebooks/DataMining/Datas/outcarhighwayvideo.mp4", target="car")

In [None]:
data.head()

Unnamed: 0,id,frame,direction
0,0,56,1
1,1,58,1
2,4,60,0
3,6,61,1
4,3,86,1


In [None]:
data.describe()

Unnamed: 0,id,frame,direction
count,25,25,25
unique,25,24,2
top,25,281,1
freq,1,2,14


In [None]:
data = data.query("direction == 1")
data.shape[0]

14

It is possible to count car, person or other classes in LABELS :

In [None]:
print(LABELS)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


The first purpose is to perform statistical analysis, on the attendance of shops for example.  
With enough data extraction from a camera, it could be possible to train a linear model to predict how many customer will come on a precise day. Adding data like date, weather, or anything that affect the sells is recommended.

Sources : pyimagesearch.com