INF8770 Technologies multimédias

Polytechnique Montréal

Importation des librairies

In [None]:
from PIL import Image
import os
from skimage.feature.texture import local_binary_pattern
import glob
import cv2
import numpy
import csv
import time
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from skimage import io
import urllib.request as request

Importation des données

In [None]:
highway = glob.glob(os.path.join("./data/baseline/highway/input", "*.jpg"))
office = glob.glob(os.path.join("./data/baseline/office/input", "*.jpg"))
pedestrians = glob.glob(os.path.join("./data/baseline/pedestrians/input", "*.jpg"))
PETS2006 = glob.glob(os.path.join("./data/baseline/PETS2006/input", "*.jpg"))

highway_ref = glob.glob(os.path.join("./data/baseline/highway/groundtruth", "*.png"))
office_ref = glob.glob(os.path.join("./data/baseline/office/groundtruth", "*.png"))
pedestrians_ref = glob.glob(os.path.join("./data/baseline/pedestrians/groundtruth", "*.png"))
PETS2006_ref = glob.glob(os.path.join("./data/baseline/PETS2006/groundtruth", "*.png"))


# small_test_office = glob.glob(os.path.join("./data/baseline/small_test_office/input", "*.jpg"))
# small_test_office_ref = glob.glob(os.path.join("./data/baseline/small_test_office/groundtruth", "*.png"))


Background Substraction

In [None]:
def generate_background_substrated_image(image_path, background_image):
  threshold = 40
  image = (cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)).astype(float)

  diff = numpy.abs(image - background_image) >= threshold

  int_image = diff.astype(int)

  # plt.imshow(~int_image,plt.get_cmap('binary'))
  # plt.show()
  return int_image

Instance Segmentation

In [None]:
COCO_NAMES = ['background']
label_names = request.urlopen('https://raw.githubusercontent.com/gabilodeau/INF6804/master/utils/coco-labels.txt')
for label_name in label_names.readlines():
  COCO_NAMES.append(label_name.strip().decode('UTF-8'))

def Mask_RCNN(dataset):
  tensors = []
  preprocess = transforms.Compose([transforms.ToTensor()])
  THRESHOLD = 0.30
  result = []
  shapes = []
  
  for img_path in dataset:
    img = io.imread(img_path)
    tensors.append(preprocess(img))
    shapes.append(img.shape)
    
  model = models.detection.maskrcnn_resnet50_fpn(weights='DEFAULT').eval()
  predictions = model(tensors)

  for index in range(0, len(predictions)):
    prediction = predictions[index]
    img_result = numpy.zeros(shapes[index][:2])
    
    for j, score in enumerate(prediction['scores']):
        if score >= THRESHOLD:
          
          mask = prediction['masks'][j][0].detach().numpy()
          if len(img_result) == 0:
            img_result = numpy.array(mask)
          else:
            img_result = numpy.maximum(img_result, mask)

    result.append(img_result)


  return result

Méthode de comparaison

In [None]:
def intersection(vector_i, vector_j):
    return numpy.sum(numpy.minimum(vector_i, vector_j)) / numpy.sum(vector_j)

def norms1(vector_i, vector_j):
    return numpy.sum(numpy.abs(vector_i - vector_j))

def norms2(vector_i, vector_j):
    return numpy.sqrt(numpy.sum(numpy.power(vector_i - vector_j, 2)))

def bhattacharyya(vector_i, vector_j):
    return -numpy.log((numpy.sum(numpy.sqrt(numpy.multiply(vector_i, vector_j)))))

Background Substraction

In [None]:
def substract_background(dataset, background_image_index):
    background_image = (cv2.imread(dataset[background_image_index], cv2.IMREAD_GRAYSCALE)).astype(float)

    result = []
    for path in dataset:
        result.append(generate_background_substrated_image(path, background_image))
    
    return result

result = substract_background(office, 0)

Comparaison des résultats

In [None]:
def classify_difference(dataset, dataset_ref, show_img = False):
    result = []

    video_SI =  Mask_RCNN(dataset)
    video_BR =  Mask_RCNN(dataset)

    for img_index in range(0, len(dataset_ref)):
        img_ref = numpy.array(cv2.imread(dataset_ref[img_index], cv2.IMREAD_GRAYSCALE).astype(float)) / 255
        img_SI = video_SI[img_index]
        img_BR = video_BR[img_index]

        flatten = img_ref.flatten()
        factor = (flatten[flatten != 0]).size
        factor = 1 if (factor == 0) else factor
        
        result_SI = (1 - (norms1(img_SI, img_ref) / factor)) * 100
        result_BR = (1 - (norms1(img_BR, img_ref) / factor)) * 100

        if (show_img):
            plt.imshow(img_ref); plt.axis('off'); plt.show()

            plt.imshow(img_SI); plt.axis('off'); plt.show()
            plt.imshow(numpy.abs(img_SI - img_ref)); plt.axis('off'); plt.show()

            plt.imshow(img_BR); plt.axis('off'); plt.show()
            plt.imshow(numpy.abs(img_BR - img_ref)); plt.axis('off'); plt.show()

        print(f"{img_index}: (SI:{result_SI}, BR:{result_BR})")
        result.append((result_SI, result_BR))
    
    return result

classify_difference(office[0:1], office_ref[0:1], True)

In [None]:
data = [highway[300:302], office[0:1], pedestrians, PETS2006]
data_ref = [highway_ref[300:302], office_ref[0:1], pedestrians_ref, PETS2006_ref]
data_name = ["highway", "office", "pedestrians", "PETS2006"]

for data_index in range(1, len(data[0:2])):
    current_data = data[data_index]
    current_data_ref = data_ref[data_index]

    current_result = classify_difference(current_data, current_data_ref)

    with open(f"{data_name[data_index]}.csv", 'w+', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["index", "SI", "BR"])
        
        for i in range(0, len(current_result)):
            writer.writerow([i, current_result[i][0], current_result[i][1]])