In [1]:
# All we need is coco.names, v3, v3-tiny, v4, v4-tiny weights and cfgs files. So downloading them
# !git clone https://github.com/AlexeyAB/darknet.git

#Weights
!curl https://pjreddie.com/media/files/yolov3-tiny.weights --output yolov3-tiny.weights
    
!curl https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg --output yolov3-tiny.cfg
    
!curl https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/coco.names --output coco.names

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0 33.7M    0 32461    0     0  35092      0  0:16:49 --:--:--  0:16:49 35055
  3 33.7M    3 1135k    0     0   583k      0  0:00:59  0:00:01  0:00:58  583k
 20 33.7M   20 6959k    0     0  2346k      0  0:00:14  0:00:02  0:00:12 2345k
 37 33.7M   37 12.5M    0     0  3220k      0  0:00:10  0:00:03  0:00:07 3219k
 49 33.7M   49 16.6M    0     0  3480k      0  0:00:09  0:00:04  0:00:05 3479k
 64 33.7M   64 21.9M    0     0  3825k      0  0:00:09  0:00:05  0:00:04 4534k
 76 33.7M   76 25.7M    0     0  3792k      0  0:00:09  0:00:06  0:00:03 5039k
 84 33.7M   84 28.7M    0     0  3732k      0  0:00:09  0:00:07  0:00:02 4569k
 94 33.7M   94 31.7M    0     0  3654k      0  0:00:09  0:00:08  0:00:01 4007k
100 33.7M  100 33.7M    0     0  3658k      0  0:00

In [2]:
import cv2
import numpy as np
import time
import pandas as pd
import imutils
from imutils.video import FPS

In [3]:
from keras.models import load_model
model = load_model('my_h5_model.h5')

# Using For Loop

In [4]:
## Creating Variables for easy processing that handles most of the execution

INPUT_FILE='assignment-clip.mp4'
OUTPUT_FILE='assignment-output.mp4'
CONFIG_FILE='yolov3-tiny.cfg'
WEIGHTS_FILE='yolov3-tiny.weights'

# Capturing the Video and printing most information from the video
vs = cv2.VideoCapture(INPUT_FILE)
print(f'FPS: {vs.get(cv2.CAP_PROP_FPS)}')
print(f'No of Frames: {vs.get(cv2.CAP_PROP_FRAME_COUNT)}')
print(f'Frame Width: {vs.get(cv2.CAP_PROP_FRAME_WIDTH)}')
print(f'Frame Height: {vs.get(cv2.CAP_PROP_FRAME_HEIGHT)}')

H=None
W=None

fps = FPS().start()

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
writer = cv2.VideoWriter(OUTPUT_FILE, fourcc, 30,(428, 576), True)

LABELS = ['sedan', 'suv']

np.random.seed(4)
# COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

#For Yolo 3
net = cv2.dnn.readNet(CONFIG_FILE, WEIGHTS_FILE)

#For Yolo 4
#!pip install opencv-python==4.4.0.40
#net = cv2.dnn_DetectionModel(CONFIG_FILE, WEIGHTS_FILE)

# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
cnt =0;

answer_df = pd.DataFrame()
obj_count = []
frames = []
suvs = []
sedans = []

process_start_time = time.time()

while True:
  cnt+=1
  flag, image = vs.read()
  if not flag:
      break
  frames.append(cnt)
  car = 0
  blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (320, 320),swapRB=True, crop=False)
  net.setInput(blob)
  if W is None or H is None:
    (H, W) = image.shape[:2]
  layerOutputs = net.forward(ln)

  boxes = []
  confidences = []
  classIDs = []

  for output in layerOutputs:
    for detection in output:
      scores = detection[5:]
      classID = np.argmax(scores)
      confidence = scores[classID]

      # filter out weak predictions by ensuring the detected
      # probability is greater than the minimum probability
      if (confidence > 0.3 and classID == 2):
        # scale the bounding box coordinates back relative to the
        # size of the image, keeping in mind that YOLO actually
        # returns the center (x, y)-coordinates of the bounding
        # box followed by the boxes' width and height
        box = detection[0:4] * np.array([W, H, W, H])
        (centerX, centerY, width, height) = box.astype("int")

        # use the center (x, y)-coordinates to derive the top and
        # and left corner of the bounding box
        x = int(centerX - (width / 2))
        y = int(centerY - (height / 2))

        # update our list of bounding box coordinates, confidences,
        # and class IDs
        boxes.append([x, y, int(width), int(height)])
        confidences.append(float(confidence))
        classIDs.append(classID)

  # apply non-maxima suppression to suppress weak, overlapping bounding
  # boxes
  idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.05, 0.2)


  # ensure at least one detection exists
  if len(idxs) > 0:
    # loop over the indexes we are keeping
    suv = 0
    sedan = 0
    for i in idxs.flatten():
      # extract the bounding box coordinates
      x, y, w, h = boxes[i]
      if x>=0 and y>=0 and w>=0 and h>=0:
        car += 1
        # print(x, y, w, h)
        ####
        #Part of box detector - take crop images out of the bounding box and keep it in a variable
        cropped = image[x:x + w, y:y + h]
        
        res = cv2.resize(cropped, dsize=(200, 200), interpolation=cv2.INTER_LINEAR)
        res = res.reshape((1, 200, 200, 3))
        prediction_probability = model.predict(res)

        if prediction_probability.item() >= 0.5:
          label = 'suv'
          suv += 1
        else:
          label = 'sedan'
          sedan += 1
        ####
        # color = [int(c) for c in COLORS[classIDs[i]]]

        cv2.rectangle(image, (x, y), (x + w, y + h), (122,  68, 143), 2)
        
        ######convert LABELS with sedan or suv type and confidences with the prob score
        text = f"car: {confidences[i]:.2f} {label}"
        labelSize, baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        y = max(y, labelSize[1])
        cv2.rectangle(image, (x, y - labelSize[1]), (x + labelSize[0], y), (122,  68, 143), cv2.FILLED)
        cv2.putText(image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255))
        ######
  suvs.append(suv)
  sedans.append(sedan)
  obj_count.append(car)
  process_complete_time = time.time() - process_start_time
  fps_val = cnt / process_complete_time
  cv2.putText(image, f'FPS: {round(fps_val,2)}', (10,50), cv2.FONT_HERSHEY_SIMPLEX,
              0.75, (0,0,0), 1)
  cv2.putText(image, f'Cars in Frame: {car}', (10,80), cv2.FONT_HERSHEY_SIMPLEX,
              0.75, (0,0,0), 1)
  
  # show the output image
  # cv2.imshow("output", cv2.resize(image,(428, 576)))
  writer.write(cv2.resize(image,(428, 576)))
  fps.update()

  key = cv2.waitKey(1) & 0xFF
  if key == ord("q"):
    break

fps.stop()

print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

answer_df['Frame Id'] = frames
answer_df['Car Count'] = obj_count
answer_df['SUV'] = suvs
answer_df['Sedan'] = sedans
answer_df.to_csv('Acutal Truth.csv', index=False)

# do a bit of cleanup
cv2.destroyAllWindows()

# release the file pointers
print("[INFO] cleaning up...")
writer.release()
vs.release()

FPS: 30.0
No of Frames: 900.0
Frame Width: 428.0
Frame Height: 576.0
[INFO] elasped time: 220.53
[INFO] approx. FPS: 4.08
[INFO] cleaning up...


# Using Semaphore

In [5]:
from concurrent.futures import ThreadPoolExecutor
import threading

class ProducerConsumer:    
    
    def __init__(self):
        INPUT_FILE='assignment-clip.mp4'
        # INPUT_FILE = next(iter(uploaded.keys()))
        
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        OUTPUT_FILE='assignment-output-lock-pro-con.mp4'

        self.writer = cv2.VideoWriter(OUTPUT_FILE, fourcc, 30, (428, 576), True)

        CONFIG_FILE='yolov3-tiny.cfg'
        WEIGHTS_FILE='yolov3-tiny.weights'
        
        self.net = cv2.dnn.readNet(CONFIG_FILE, WEIGHTS_FILE)
        self.ln = self.net.getLayerNames()
        self.ln = [self.ln[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]
        
        self.video_stream = cv2.VideoCapture('assignment-clip.mp4')
        self.total_frames = int(self.video_stream.get(cv2.CAP_PROP_FRAME_COUNT))

        print(f'FPS: {self.video_stream.get(cv2.CAP_PROP_FPS)}')
        print(f'No of Frames: {self.video_stream.get(cv2.CAP_PROP_FRAME_COUNT)}')
        print(f'Frame Width: {self.video_stream.get(cv2.CAP_PROP_FRAME_WIDTH)}')
        print(f'Frame Height: {self.video_stream.get(cv2.CAP_PROP_FRAME_HEIGHT)}')

        self.idxs = []
        self.boxes = []
        self.confidences = []
        self.classIDs = []
        self.p_lock = threading.Lock()
        self.c_lock = threading.Lock()
        self.c_lock.acquire()

    def produce(self):
        self.process_start_time = time.time()
        for fno in range(0, self.total_frames, 1):
            self.p_lock.acquire()
            self.video_stream.set(cv2.CAP_PROP_POS_FRAMES, fno)
            _, image = self.video_stream.read()

            (H, W) = image.shape[:2]

            blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (320, 320),swapRB=True, crop=False)
            self.net.setInput(blob)
            layerOutputs = self.net.forward(self.ln)

            self.boxes = []
            self.confidences = []
            self.classIDs = []

            for output in layerOutputs:
                for detection in output:
                  scores = detection[5:]
                  classID = np.argmax(scores)
                  confidence = scores[classID]

                  # filter out weak predictions by ensuring the detected
                  # probability is greater than the minimum probability
                  if confidence > 0.3:
                      # scale the bounding box coordinates back relative to the
                      # size of the image, keeping in mind that YOLO actually
                      # returns the center (x, y)-coordinates of the bounding
                      # box followed by the boxes' width and height
                      box = detection[0:4] * np.array([W, H, W, H])
                      (centerX, centerY, width, height) = box.astype("int")
                      
                      # use the center (x, y)-coordinates to derive the top and left corner of the bounding box
                      x = int(centerX - (width / 2))
                      y = int(centerY - (height / 2))
                      
                      # update our list of bounding box coordinates, confidences and class IDs
                      self.boxes.append([x, y, int(width), int(height)])
                      self.confidences.append(float(confidence))
                      self.classIDs.append(classID)
            
            # apply non-maxima suppression to suppress weak, overlapping bounding
            # boxes
            self.idxs = []
            self.idxs = cv2.dnn.NMSBoxes(self.boxes, self.confidences, 0.05, 0.2)

            self.c_lock.release()

    def consume(self):
        answer_df = pd.DataFrame()
        obj_count = []
        frames = []
        
        LABELS_FILE='coco.names'
        LABELS = open(LABELS_FILE).read().strip().split("\n")

        np.random.seed(4)
        COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

        for fno in range(0, self.total_frames, 1):
            self.c_lock.acquire()
            self.video_stream.set(cv2.CAP_PROP_POS_FRAMES, fno)
            _, image = self.video_stream.read()
            
            frames.append(fno+1)
            obj_count.append(len(self.idxs))
            
            # ensure at least one detection exists
            if len(self.idxs) > 0:
                # loop over the indexes we are keeping
                for i in self.idxs.flatten():
                    # extract the bounding box coordinates
                    (x, y) = (self.boxes[i][0], self.boxes[i][1])
                    (w, h) = (self.boxes[i][2], self.boxes[i][3])
                    
                    ####
                    #Part of box detector - take crop images out of the bounding box and keep it in a variable
                    cropped = image[x:x + w, y:y + h]
                    # cv2_imshow(cropped)
                    ####

                    color = [int(c) for c in COLORS[self.classIDs[i]]]

                    cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
                    
                    ######convert LABELS with sedan or suv type and confidences with the prob score
                    text = "{}: {:.4f}".format(LABELS[self.classIDs[i]], self.confidences[i])
                    cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                    ######

            process_complete_time = time.time() - self.process_start_time
            fps_val = fno / process_complete_time
            cv2.putText(image, f'FPS: {round(fps_val,2)}', (10,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
            
            # show the output image
            # cv2.imshow("output", cv2.resize(image,(428, 576)))
            self.writer.write(cv2.resize(image,(428, 576)))
            # fps.update()
            
            key = cv2.waitKey(1) & 0xFF
            if key == ord("q"):
                break

            self.p_lock.release()

        answer_df['Frame Id'] = frames
        answer_df['Car Count'] = obj_count
        answer_df.to_csv('Acutal Truth.csv', index=False)

        # do a bit of cleanup
        cv2.destroyAllWindows()

        # release the file pointers
        print("[INFO] cleaning up...")
        print(f"Processing time: {time.time() - self.process_start_time}")
        self.writer.release()
        self.video_stream.release()

if __name__ == '__main__':
    pc = ProducerConsumer()
    with ThreadPoolExecutor(max_workers=2) as executor:
        executor.submit(pc.produce)
        executor.submit(pc.consume)

FPS: 30.0
No of Frames: 900.0
Frame Width: 428.0
Frame Height: 576.0
[INFO] cleaning up...
Processing time: 66.88559246063232


# Using MultiThreading

In [7]:
from threading import Thread
from queue import Queue

queue = Queue()

class Producer(Thread):
  def run(self):
    frame = 0
    while True:
      flag, image = video_stream.read()
      if not flag:
          break
      frame += 1
      blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (320, 320),swapRB=True, crop=False)
      net.setInput(blob)
      layerOutputs = net.forward(ln)

      boxes = []
      confidences = []
      classIDs = []

      for output in layerOutputs:
        for detection in output:
          scores = detection[5:]
          classID = np.argmax(scores)
          confidence = scores[classID]

          # filter out weak predictions by ensuring the detected
          # probability is greater than the minimum probability
          if confidence > 0.3:
            # scale the bounding box coordinates back relative to the
            # size of the image, keeping in mind that YOLO actually
            # returns the center (x, y)-coordinates of the bounding
            # box followed by the boxes' width and height
            W = 428
            H = 576
            box = detection[0:4] * np.array([W, H, W, H])
            (centerX, centerY, width, height) = box.astype("int")

            # use the center (x, y)-coordinates to derive the top and
            # and left corner of the bounding box
            x = int(centerX - (width / 2))
            y = int(centerY - (height / 2))

            # update our list of bounding box coordinates, confidences,
            # and class IDs
            boxes.append([x, y, int(width), int(height)])
            confidences.append(float(confidence))
            classIDs.append(classID)

      # apply non-maxima suppression to suppress weak, overlapping bounding
      # boxes
      idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.05, 0.2)

      queue.put({"frame": frame, "image": image, "idxs": idxs, "boxes": boxes, "confidences": confidences, "classIDs": classIDs})

class Consumer(Thread):
  def run(self):
    cnt = 0
    while cnt < 900:
      data = queue.get()
      cnt += 1
      image = data['image']
      frame = data['frame']
      idxs = data['idxs']
      boxes = data['boxes']
      confidences = data['confidences']
      classIDs = data['classIDs']
      
      frames.append(frame)
      obj_count.append(len(idxs))
      
      # ensure at least one detection exists
      if len(idxs) > 0:
          # loop over the indexes we are keeping
          for i in idxs.flatten():
              # extract the bounding box coordinates
              (x, y) = (boxes[i][0], boxes[i][1])
              (w, h) = (boxes[i][2], boxes[i][3])
              
              ####
              #Part of box detector - take crop images out of the bounding box and keep it in a variable
              cropped = image[x:x + w, y:y + h]
              # cv2_imshow(cropped)
              ####

              color = [int(c) for c in COLORS[classIDs[i]]]

              cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
              
              ######convert LABELS with sedan or suv type and confidences with the prob score
              text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
              cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
              ######

      process_complete_time = time.time() - process_start_time
      fps_val = frame / process_complete_time
      cv2.putText(image, f'FPS: {round(fps_val,2)}', (10,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
      
      # show the output image
      # cv2.imshow("output", cv2.resize(image,(428, 576)))
      writer.write(cv2.resize(image,(428, 576)))
      # fps.update()
      
      key = cv2.waitKey(1) & 0xFF
      if key == ord("q"):
          break

    answer_df['Frame Id'] = frames
    answer_df['Car Count'] = obj_count
    answer_df.to_csv('Acutal Truth.csv', index=False)

    # do a bit of cleanup
    cv2.destroyAllWindows()

    # release the file pointers
    print("[INFO] cleaning up...")
    print(f"Processing time: {time.time() - process_start_time}")
    writer.release()
    video_stream.release()


obj_count = []
frames = []
video_stream = cv2.VideoCapture('assignment-clip.mp4')
total_frames = int(video_stream.get(cv2.CAP_PROP_FRAME_COUNT))

print(f'FPS: {video_stream.get(cv2.CAP_PROP_FPS)}')
print(f'No of Frames: {video_stream.get(cv2.CAP_PROP_FRAME_COUNT)}')
print(f'Frame Width: {video_stream.get(cv2.CAP_PROP_FRAME_WIDTH)}')
print(f'Frame Height: {video_stream.get(cv2.CAP_PROP_FRAME_HEIGHT)}')

fourcc = cv2.VideoWriter_fourcc(*"MJPG")
OUTPUT_FILE='assignment-output-multithread-pro-con.avi'

writer = cv2.VideoWriter(OUTPUT_FILE, fourcc, 30, (428, 576), True)

CONFIG_FILE='yolov3-tiny.cfg'
WEIGHTS_FILE='yolov3-tiny.weights'

net = cv2.dnn.readNet(CONFIG_FILE, WEIGHTS_FILE)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

LABELS_FILE='coco.names'
LABELS = open(LABELS_FILE).read().strip().split("\n")

np.random.seed(4)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

answer_df = pd.DataFrame()
process_start_time = time.time()

p = Producer()
p.start()

c = Consumer()
c.start()

p.join()
c.join()

FPS: 30.0
No of Frames: 900.0
Frame Width: 428.0
Frame Height: 576.0
[INFO] cleaning up...
Processing time: 32.73280906677246


# F1 Accuracy

In [8]:
actual_truth = pd.read_csv('Acutal Truth.csv')
actual_truth_activity_1_column = actual_truth['Car Count'].values

ground_truth = pd.read_excel('Ground Truth.xlsx')
ground_truth_activity_1_column = ground_truth['Total'].values

from sklearn.metrics import f1_score
f1_score(actual_truth_activity_1_column, ground_truth_activity_1_column, average='micro')

0.53