In [54]:
%pip install mlflow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [55]:
import os
from pathlib import Path
import glob
import re
import shutil
from random import shuffle

import torch
from torchvision.ops import box_iou
from torchvision.io import decode_image, write_jpeg
from natsort import natsorted
from tqdm import tqdm

import sys  
sys.path.insert(1, '..')
from utils import get_model
from utils.NMS import remove_overlaping

In [56]:
ORIGINAL_DATASET = os.path.join(os.sep, 'ml', 'LIZA_dataset', 'original_split', '**')
DATASET = os.path.join(os.sep, 'ml', 'LIZA_dataset', 'classifier')
HUMAN = os.path.join(os.sep, 'ml', 'LIZA_dataset', 'classifier', 'human')
NOT_HUMAN = os.path.join(os.sep, 'ml', 'LIZA_dataset', 'classifier', 'not-human')

if os.path.isdir(DATASET):
    shutil.rmtree(DATASET)
[os.mkdir(dir) for dir in (DATASET, HUMAN, NOT_HUMAN)]

INFERENCE_SIZE = 640
AREA_THRESHOLD = 2800
THRESHOLD = 0.25
NMS_IOU_TRESHOLD = 0.2
RATIO_THRESHOLD = 10.


DEVICE = 'cpu'
MLFLOW_URI = 'http://spbhws381-ubuntu.geoscan.lan:5000'
PROJECT_NAME = 'LIZA'
MODEL_NAME_VAL = 'LIZA-detector@trained_detr_visdrone'

In [57]:
images = [file for file in glob.glob(os.path.join(ORIGINAL_DATASET, '*'), recursive=True) if re.match(r'(.*\.jpg)', file)]
annotations = [file for file in glob.glob(os.path.join(ORIGINAL_DATASET, '*'), recursive=True) if re.match(r'(.*\.txt)', file)]

images = natsorted(images)
annotations = natsorted(annotations)

len(images), len(annotations)

(118717, 118717)

In [58]:
mlflow_uri = MLFLOW_URI
project_name = PROJECT_NAME

pipeline_ = get_model(mlflow_uri, project_name, MODEL_NAME_VAL)

model, image_processor = pipeline_.model, pipeline_.image_processor
model = model.to(DEVICE)

image_processor.do_resize = False
image_processor.do_normalize = False
image_processor.do_pad = False



In [52]:
humans_amount = 0
not_humans = []

for image, annotation in tqdm(zip(images, annotations)):
    if Path(image).stem != Path(annotation).stem:
        raise ValueError('Images and annotations indices do not match')
    stem = Path(image).stem

    T = decode_image(image)
    _, height, width = T.shape

    
    with open(annotation) as file:
        labels = file.read().splitlines()

    bboxes = torch.empty((len(labels), 4), dtype=torch.float32)
    for i in range(len(labels)):
        if not labels[i]:
            continue
            
        _, bbox_x, bbox_y, bbox_w, bbox_h = labels[i].split(' ')
        bbox_x, bbox_y, bbox_w, bbox_h = list(map(float, (bbox_x, bbox_y, bbox_w, bbox_h)))
        
        bbox_l = bbox_x - bbox_w / 2  # from center coordinate to left
        bbox_t = bbox_y - bbox_h / 2  # from center coordinate to top
        bbox_r = bbox_x + bbox_w / 2  # from center coordinate to left
        bbox_b = bbox_y + bbox_h / 2  # from center coordinate to top
        bbox = torch.as_tensor((bbox_l * width, bbox_t * height, bbox_r * width, bbox_b * height))
        
        w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
        if w <= 0 or h <= 0 or w * h <= AREA_THRESHOLD:
            continue
            
        bboxes[i] = bbox
        xmin, ymin, xmax, ymax = bbox.clone().to(torch.int32).detach()
        xmin, ymin, xmax, ymax = xmin.item(), ymin.item(), xmax.item(), ymax.item()
        
        path = os.path.join(HUMAN, f'{stem}_{i}.jpg')
        
        if xmin and ymin:
            patch = T[:, ymin:ymax, xmin:xmax]
        elif xmin:
            patch = T[:, :ymax, xmin:xmax]
        elif ymin:
            patch = T[:, ymin:ymax, :xmax]
        else:
            patch = T[:, :ymax, :xmax]
            
        write_jpeg(patch, path)
        
        humans_amount +=1
        
    input = image_processor(images=T, return_tensors='pt')
    input = {k: v[0] for k, v in input.items()}
    input = input['pixel_values'].unsqueeze(0).to(DEVICE)
    output = model(input)
    postprocessed_outputs = image_processor.post_process_object_detection(
        output,
        target_sizes=[(INFERENCE_SIZE, INFERENCE_SIZE)],
        threshold=THRESHOLD,
    )
    
    postprocessed_output = remove_overlaping(postprocessed_outputs[0], NMS_IOU_TRESHOLD, ratio_tresh=RATIO_THRESHOLD)
    for i, pred_bbox in enumerate(postprocessed_output['boxes']):
        xmin, ymin, xmax, ymax = pred_bbox.clone().to(torch.int32).detach()
        xmin, ymin, xmax, ymax = xmin.item(), ymin.item(), xmax.item(), ymax.item()
        xmin, ymin = max(0, xmin), max(0, ymin)
        xmax, ymax = min(xmax, INFERENCE_SIZE), min(ymax, INFERENCE_SIZE)
        
        iou = box_iou(bboxes, pred_bbox.unsqueeze(0))
        if not iou[iou > NMS_IOU_TRESHOLD].any():
            path = os.path.join(NOT_HUMAN, f'{stem}_{i}.jpg')
            
            if xmin and ymin:
                patch = T[:, ymin:ymax, xmin:xmax]
            elif xmin:
                patch = T[:, :ymax, xmin:xmax]
            elif ymin:
                patch = T[:, ymin:ymax, :xmax]
            else:
                patch = T[:, :ymax, :xmax]
                
            write_jpeg(patch, path)
            
            not_humans.append(path)

9601it [1:02:53,  2.19s/it]

In [53]:
ratio = 1.  # how many times there should be not-humans compared to humans? e.g. ratio of 1. means there will be 1:1

if len(not_humans) > humans_amount:
    shuffle(not_humans)
    
    files = not_humans[:len(not_humans) - humans_amount]
    for file in files:
        if os.path.exists(file):
            os.remove(file)