In [1]:
import numpy as np
import cv2
import os
import time
from matplotlib import pyplot as plt
from tqdm import tqdm
import glob
frames_dir = 'preprocessed-frames'
output_dir = 'bg-detection-frames'

In [8]:
# initialize the HOG descriptor/person detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# loop over the image paths
for i in tqdm(range(1, 290)):
    # load the image and resize it to (1) reduce detection time
    # and (2) improve detection accuracy
    image = cv2.imread(f"{frames_dir}/frame{100}.jpg")
    #image = imutils.resize(image, width=min(400, image.shape[1]))
    orig = image.copy()
    # detect people in the image
    (rects, weights) = hog.detectMultiScale(
        image, winStride=(4, 4), padding=(8, 8), scale=1.05)
    print(rects)
    # draw the original bounding boxes
    for (x, y, w, h) in rects:
        cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    #pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    # draw the final bounding boxes
                            #pick
    for (xA, yA, xB, yB) in rects:
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

    # show some information on the number of bounding boxes
    #filename = imagePath[imagePath.rfind("/") + 1:]
    #print("[INFO] {}: {} original boxes, {} after suppression".format(
    #    filename, len(rects), len(pick)))

    # show the output images
    plt.imshow(image)

0it [00:00, ?it/s]


In [2]:
out = cv2.VideoWriter("hog.avi", cv2.VideoWriter_fourcc(*"MJPG"), 15, (512,480))

In [4]:
# Load the image and convert it to grayscale
#frame_num=90
output_dir="hog"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for i in tqdm(range(1, 250)):
    image = cv2.imread(f"{frames_dir}/frame{i}.jpg")

    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

    # Detect people in the image
    locations, confidence = hog.detectMultiScale(image)
    #locations

    # Draw rectangles around the detected people
    for (x, y, w, h) in locations:
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 5)
        #plt.imshow(image)

    # Display the image with detected people
    #plt.imshow(image)
    out.write(image)
    cv2.imwrite(f"{output_dir}/frame{i}.jpg", image)    

out.release()

100%|██████████| 249/249 [00:21<00:00, 11.76it/s]


In [2]:
from imutils.object_detection import non_max_suppression
from imutils import paths
import numpy as np
import imutils
import cv2

In [4]:
# Load the image and convert it to grayscale
#frame_num=90
output_dir="hog-imutils"

frames_dir='preprocessed-frames-no-gaussian'
out = cv2.VideoWriter("hog-imutils.avi", cv2.VideoWriter_fourcc(*"MJPG"), 15, (512,480))

if not os.path.exists(output_dir):
    os.makedirs(output_dir)


ex_image = cv2.imread(f"{frames_dir}/frame1.jpg")
#all the frame have the same dimention
(height, width) = ex_image.shape[:2]
desired_width=400
if width<desired_width:
    width=desired_width
    ratio=desired_width/float(desired_width)
    height=int(height*ratio)

for i in tqdm(range(1, 250)):
    image = cv2.imread(f"{frames_dir}/frame{i}.jpg")
    
    # resize → reduce detection time and  improve detection accuracy
    # keep a minimum image size for accurate predictions
    image = cv2.resize(image, (width, height)) # resize the image according to the width to height ratio

    # Converti in scala di grigi e (opzionale) equalizza
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)

    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

    # Detect people in the image
    #winStride controlla la granularità dello spostamento della finestra di rilevamento sull’immagine.
    #padding gestisce il contesto extra attorno alla finestra di rilevamento.
    #scale controlla la creazione di una piramide di immagini per rilevare oggetti a dimensioni variabili.
    locations, confidence = hog.detectMultiScale(
        #                 4,4             8,8           1.05
        image, winStride=(4, 4), padding=(8, 8), scale=1.05)
    #locations

    # Draw rectangles around the detected people
    for i, (x, y, w, h) in enumerate(locations):
        if confidence[i] < 0.13:
            cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 0), 2)
        elif confidence[i] < 0.3 and confidence[i] > 0.13:
            cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 2)
        if confidence[i] < 0.7 and confidence[i] > 0.3:
            cv2.rectangle(image, (x, y), (x+w, y+h), (50, 122, 255), 2)
        if confidence[i] > 0.7:
            cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)

    
    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    locations = np.array([[x, y, x + w, y + h] for (x, y, w, h) in locations])
    pick = non_max_suppression(locations, probs=None, overlapThresh=0.65)

    #final bounding boxes
    for (xA, yA, xB, yB) in locations:
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
    
    original_dim_image = imutils.resize(image, width=512, height=480)
    out.write(original_dim_image)
    cv2.imwrite(f"{output_dir}/frame{i}.jpg", original_dim_image)    

out.release()

  0%|          | 0/249 [00:00<?, ?it/s]

100%|██████████| 249/249 [01:05<00:00,  3.79it/s]
