In [3]:
import numpy as np
import cv2
import os

In [21]:
def sample_buffer_method(capture, N=60, use_median=True):
    N = 60 # buffer size
    YY = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    XX = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    BUF = np.zeros((YY, XX, N), np.uint8)
    iN = 0

    TP = 0
    FP = 0
    FN = 0

    iterator = 1

    while True:
        ret, frame = capture.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            BUF[:,:,iN] = frame
            iN = (iN + 1) % N

            truth_frame = cv2.imread('../groundtruth/gt%06d.png' % iterator, cv2.IMREAD_GRAYSCALE)
            truth_frame = cv2.resize(truth_frame, (XX, YY))


            if use_median:
                avg = np.median(BUF, axis=2).astype(np.uint8)
            else:
                avg = np.mean(BUF, axis=2).astype(np.uint8)

            diff = cv2.absdiff(frame, avg)

            # binary threshold
            diff[diff > 50] = 255
            diff[diff <= 50] = 0

            # morphological operations
            kernel = np.ones((5,5), np.uint8)
            diff = cv2.morphologyEx(diff, cv2.MORPH_CLOSE, kernel)
            diff = cv2.morphologyEx(diff, cv2.MORPH_OPEN, kernel)
        
            # show diff image and truth image side by side
            cv2.imshow('diff', diff)
            cv2.imshow('truth', truth_frame)

            # calculate f1 using ground truth
            if iterator >= 300:
                TP += np.sum(np.logical_and(diff == 255, truth_frame > 0))
                FP += np.sum(np.logical_and(diff == 255, truth_frame == 0))
                FN += np.sum(np.logical_and(diff == 0, truth_frame > 10))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
        iterator += 1
    cv2.destroyAllWindows()

    precission = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * TP / (2 * TP + FP + FN)
    print('Precission: %f' % precission)
    print('Recall: %f' % recall)
    print('F1: %f' % f1)

In [22]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
sample_buffer_method(capture, N=60, use_median=True)

Precission: 0.675166
Recall: 0.262577
F1: 0.378106


In [23]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
sample_buffer_method(capture, N=60, use_median=False)

Precission: 0.682566
Recall: 0.251001
F1: 0.367032


In [10]:
def approx_method(capture, alpha=0.01, use_median=True):
    YY = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    XX = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))

    BG = None

    TP = 0
    FP = 0
    FN = 0

    iterator = 1

    while True:
        ret, frame = capture.read()
        if ret:
            truth_frame = cv2.imread('../groundtruth/gt%06d.png' % iterator, cv2.IMREAD_GRAYSCALE)
            truth_frame = cv2.resize(truth_frame, (XX, YY))

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            if BG is None:
                BG = frame
            else:
                if use_median:
                    # if new pixel is brighter than median, median + 1, if darker median -1 else median
                    BG = np.where(frame > BG, BG + 1, np.where(frame < BG, BG - 1, BG))
                else:
                    BG = alpha * frame + (1 - alpha) * BG

            diff = cv2.absdiff(frame, BG.astype(np.uint8))

            # binary threshold
            diff[diff > 30] = 255
            diff[diff <= 30] = 0

            # morphological operations
            kernel = np.ones((5,5), np.uint8)
            diff = cv2.morphologyEx(diff, cv2.MORPH_CLOSE, kernel)
            diff = cv2.morphologyEx(diff, cv2.MORPH_OPEN, kernel)

            # show diff image and truth image side by side
            cv2.imshow('diff', diff)
            cv2.imshow('BG', BG.astype(np.uint8))
            cv2.imshow('truth', truth_frame)
        
            # calculate f1 using ground truth
            if iterator >= 300:
                TP += np.sum(np.logical_and(diff == 255, truth_frame > 0))
                FP += np.sum(np.logical_and(diff == 255, truth_frame == 0))
                FN += np.sum(np.logical_and(diff == 0, truth_frame > 10))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
        iterator += 1
    cv2.destroyAllWindows()

    precission = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * TP / (2 * TP + FP + FN)
    print('Precission: %f' % precission)
    print('Recall: %f' % recall)
    print('F1: %f' % f1)

In [25]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
approx_method(capture, use_median=True)

Precission: 0.657093
Recall: 0.311287
F1: 0.422446


In [29]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
approx_method(capture, alpha=0.01, use_median=False)

Precission: 0.652300
Recall: 0.313493
F1: 0.423468


In [36]:
def approx_method_conservative(capture, alpha=0.01, use_median=True):
    YY = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    XX = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))

    BG = None

    TP = 0
    FP = 0
    FN = 0

    iterator = 1
    prev_diff = np.ones((YY, XX), np.uint8) * 255

    while True:
        ret, frame = capture.read()
        if ret:
            truth_frame = cv2.imread('../groundtruth/gt%06d.png' % iterator, cv2.IMREAD_GRAYSCALE)
            truth_frame = cv2.resize(truth_frame, (XX, YY))

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            if BG is None:
                BG = frame
            else:
                if use_median:
                    # if new pixel is brighter than median, median + 1, if darker median -1 else median
                    # however only update the pixel where prev_diff is 0
                    BG = np.where(prev_diff == 0, np.where(frame > BG, BG + 1, np.where(frame < BG, BG - 1, BG)), BG)
                else:
                    # only update the pixel where prev_diff is 0
                    BG = np.where(prev_diff == 0, alpha * frame + (1 - alpha) * BG, BG)

            diff = cv2.absdiff(frame, BG.astype(np.uint8))

            # binary threshold
            diff[diff > 30] = 255
            diff[diff <= 30] = 0

            # morphological operations
            kernel = np.ones((5,5), np.uint8)
            diff = cv2.morphologyEx(diff, cv2.MORPH_CLOSE, kernel)
            diff = cv2.morphologyEx(diff, cv2.MORPH_OPEN, kernel)

            # show diff image and truth image side by side
            cv2.imshow('diff', diff)
            cv2.imshow('BG', BG.astype(np.uint8))
            cv2.imshow('truth', truth_frame)
        
            # calculate f1 using ground truth
            if iterator >= 300:
                TP += np.sum(np.logical_and(diff == 255, truth_frame > 0))
                FP += np.sum(np.logical_and(diff == 255, truth_frame == 0))
                FN += np.sum(np.logical_and(diff == 0, truth_frame > 10))

            prev_diff = diff

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
        iterator += 1
    cv2.destroyAllWindows()

    precission = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * TP / (2 * TP + FP + FN)
    print('Precission: %f' % precission)
    print('Recall: %f' % recall)
    print('F1: %f' % f1)

In [37]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
approx_method_conservative(capture, alpha=0.01, use_median=True)

Precission: 0.652753
Recall: 0.315407
F1: 0.425308


In [38]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
approx_method_conservative(capture, alpha=0.01, use_median=False)

Precission: 0.648951
Recall: 0.318844
F1: 0.427599


In [61]:
def gaussian_mixture_models(capture, history=500, threshold=16, detectShadows=True, apply_morphological_operations=False):
    YY = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    XX = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))

    TP = 0
    FP = 0
    FN = 0

    iterator = 1
    
    fgbg = cv2.createBackgroundSubtractorMOG2(history=history, varThreshold=threshold, detectShadows=detectShadows)

    while True:
        ret, frame = capture.read()
        if ret:
            truth_frame = cv2.imread('../groundtruth/gt%06d.png' % iterator, cv2.IMREAD_GRAYSCALE)
            truth_frame = cv2.resize(truth_frame, (XX, YY))
            
            fgmask = fgbg.apply(frame)

            if apply_morphological_operations:
                kernel = np.ones((5,5), np.uint8)
                fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_CLOSE, kernel)
                fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)

            # show diff image
            cv2.imshow('diff', fgmask)

            # calculate f1 using ground truth
            if iterator >= 300:
                TP += np.sum(np.logical_and(fgmask > 0, truth_frame > 0))
                FP += np.sum(np.logical_and(fgmask > 0, truth_frame == 0))
                FN += np.sum(np.logical_and(fgmask == 0, truth_frame > 0))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
        iterator += 1
    cv2.destroyAllWindows()

    precission = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * TP / (2 * TP + FP + FN)
    print('Precission: %f' % precission)
    print('Recall: %f' % recall)
    print('F1: %f' % f1)

In [52]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
gaussian_mixture_models(capture)

TP: 855711 | FP: 1199277 | FN: 1169950
Precission: 0.416407
Recall: 0.422435
F1: 0.419399


In [58]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
gaussian_mixture_models(capture, detectShadows=True)

Precission: 0.416407
Recall: 0.422435
F1: 0.419399


In [59]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
gaussian_mixture_models(capture, threshold=32, detectShadows=False)

Precission: 0.515695
Recall: 0.375884
F1: 0.434828


In [60]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
gaussian_mixture_models(capture, history=5, detectShadows=False)

Precission: 0.251850
Recall: 0.248584
F1: 0.250206


In [62]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
gaussian_mixture_models(capture, threshold=32, detectShadows=False, apply_morphological_operations=True)

Precission: 0.545978
Recall: 0.444394
F1: 0.489976


In [64]:
def knn(capture, history=500, distance=400.0, detectShadows=True, apply_morphological_operations=False):
    YY = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    XX = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))

    TP = 0
    FP = 0
    FN = 0

    iterator = 1
    
    fgbg = cv2.createBackgroundSubtractorKNN(history=history, dist2Threshold=distance, detectShadows=detectShadows)

    while True:
        ret, frame = capture.read()
        if ret:
            truth_frame = cv2.imread('../groundtruth/gt%06d.png' % iterator, cv2.IMREAD_GRAYSCALE)
            truth_frame = cv2.resize(truth_frame, (XX, YY))
            
            fgmask = fgbg.apply(frame)

            if apply_morphological_operations:
                kernel = np.ones((5,5), np.uint8)
                fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_CLOSE, kernel)
                fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)

            # show diff image
            cv2.imshow('diff', fgmask)

            # calculate f1 using ground truth
            if iterator >= 300:
                TP += np.sum(np.logical_and(fgmask > 0, truth_frame > 0))
                FP += np.sum(np.logical_and(fgmask > 0, truth_frame == 0))
                FN += np.sum(np.logical_and(fgmask == 0, truth_frame > 0))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
        iterator += 1
    cv2.destroyAllWindows()

    precission = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * TP / (2 * TP + FP + FN)
    print('Precission: %f' % precission)
    print('Recall: %f' % recall)
    print('F1: %f' % f1)

In [65]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
knn(capture)

Precission: 0.483599
Recall: 0.376745
F1: 0.423536


In [67]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
knn(capture, detectShadows=False)

Precission: 0.483898
Recall: 0.376813
F1: 0.423694


In [69]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
knn(capture, distance=100.0)

Precission: 0.221441
Recall: 0.457756
F1: 0.298488


In [73]:
capture = cv2.VideoCapture('pedestrians_input.mp4')
knn(capture, distance=394.0, apply_morphological_operations=True)

Precission: 0.519531
Recall: 0.430653
F1: 0.470935
