INF8770 Technologies multimédias

Polytechnique Montréal

Importation des librairies

In [None]:
import os
import glob
import cv2
import numpy
import csv
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from skimage import io
import urllib.request as request

Installation du package ByteTrack

In [None]:
%%bash
if [ ! -d "ByteTrack" ]; then
  echo "Importing Model"
  git clone https://github.com/ifzhang/ByteTrack.git
fi

In [None]:
%cd ByteTrack
! pip install -r requirements.txt
! python setup.py develop

Importation des données

In [None]:
highway = glob.glob(os.path.join("./data/baseline/highway/input", "*.jpg"))
office = glob.glob(os.path.join("./data/baseline/office/input", "*.jpg"))
pedestrians = glob.glob(os.path.join("./data/baseline/pedestrians/input", "*.jpg"))
PETS2006 = glob.glob(os.path.join("./data/baseline/PETS2006/input", "*.jpg"))

highway_ref = glob.glob(os.path.join("./data/baseline/highway/groundtruth", "*.png"))
office_ref = glob.glob(os.path.join("./data/baseline/office/groundtruth", "*.png"))
pedestrians_ref = glob.glob(os.path.join("./data/baseline/pedestrians/groundtruth", "*.png"))
PETS2006_ref = glob.glob(os.path.join("./data/baseline/PETS2006/groundtruth", "*.png"))

Background Substraction

In [None]:
def generate_background_substrated_image(image_path, background_images):
  threshold = 25
  minDectections = 0.7 * len(background_images)

  image = (cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)).astype(float)
  diff_sum = []

  for background_image in background_images:
    diff = numpy.abs(image - background_image) >= threshold
    if len(diff_sum) == 0:
      diff_sum = diff.astype('int')
    else:
      diff_sum += diff.astype('int')

  front_image = diff_sum >= minDectections

  return front_image.astype('int')

Instance Segmentation

In [None]:
COCO_NAMES = ['background']
label_names = request.urlopen('https://raw.githubusercontent.com/gabilodeau/INF6804/master/utils/coco-labels.txt')
for label_name in label_names.readlines():
  COCO_NAMES.append(label_name.strip().decode('UTF-8'))

def Mask_RCNN(dataset):
  preprocess = transforms.Compose([transforms.ToTensor()])
  model = models.detection.maskrcnn_resnet50_fpn(weights='DEFAULT').eval()
  THRESHOLD = 0.30
  result = []
  
  for index, img_path in enumerate(dataset):
    print('SI - Image #' + str(index))
    img = io.imread(img_path)
    predictions = model([preprocess(img)])

    for index in range(0, len(predictions)):
      prediction = predictions[index]
      img_result = numpy.zeros(img.shape[:2])
      
      for j, score in enumerate(prediction['scores']):
          if score >= THRESHOLD:
            
            mask = prediction['masks'][j][0].detach().numpy()
            if len(img_result) == 0:
              img_result = numpy.array(mask)
            else:
              img_result = numpy.maximum(img_result, mask)

      result.append(img_result)

  return result

Méthode de comparaison

In [None]:
def norms1(vector_i, vector_j):
    return numpy.sum(numpy.abs(vector_i - vector_j))

Background Substraction

In [None]:
def substract_background(dataset, background_indices):
    background_images = []
    for index in background_indices:
        background_images.append((cv2.imread(dataset[index], cv2.IMREAD_GRAYSCALE)).astype(float))

    result = []
    num = 0

    for path in dataset:
        print('BR - Image #' + str(num))
        result.append(generate_background_substrated_image(path, background_images, num))
        num += 1

    return result

Comparaison des résultats

In [None]:
def classify_difference(dataset, dataset_ref, show_img = -1, do_SI = True, do_BR = True):
    result = []

    if (do_SI): video_SI =  Mask_RCNN(dataset)
    if (do_BR): video_BR =  substract_background(dataset, [x for x in range(len(dataset)) if x % 50 == 0])

    for img_index in range(0, len(dataset_ref)):
        img_ref = numpy.array(cv2.imread(dataset_ref[img_index], cv2.IMREAD_GRAYSCALE).astype(float)) / 255
        if (do_SI): img_SI = video_SI[img_index]
        if (do_BR): img_BR = video_BR[img_index]

        factor = img_ref.flatten().size
        
        if (do_SI): result_SI = (1 - (norms1(img_ref, img_SI) / factor)) * 100
        if (do_BR): result_BR = (1 - (norms1(img_ref, img_BR) / factor)) * 100

        if (show_img != -1 and img_index == show_img):
            plt.imshow(cv2.imread(dataset[img_index])); plt.axis('off'); plt.show()
            plt.imshow(img_ref); plt.axis('off'); plt.show()

            if (do_SI): 
                plt.imshow(img_SI); plt.axis('off'); plt.show()
                plt.imshow(numpy.abs(img_SI - img_ref)); plt.axis('off'); plt.show()

            if (do_BR): 
                plt.imshow(img_BR); plt.axis('off'); plt.show()
                plt.imshow(numpy.abs(img_BR - img_ref)); plt.axis('off'); plt.show()

        if (do_SI and do_BR):
            print(f"{img_index}: (SI:{result_SI}, BR:{result_BR})")
            result.append((result_SI, result_BR))
    
    return result

In [None]:
data = [highway, office, pedestrians, PETS2006]
data_ref = [highway_ref, office_ref, pedestrians_ref, PETS2006_ref]
data_name = ["highway", "office", "pedestrians", "PETS2006"]

for data_index in range(0, len(data)):
    print(f"Dataset - {data_name[data_index]}")
    current_data = data[data_index]
    current_data_ref = data_ref[data_index]

    current_result = classify_difference(current_data, current_data_ref, show_img=-1, do_SI=True, do_BR=True)

    with open(f"{data_name[data_index]}.csv", 'w+', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["index", "SI", "BR"])
        
        for i in range(0, len(current_result)):
            writer.writerow([i, current_result[i][0], current_result[i][1]])