In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
CHP_ID = "3016"
SUBMODEL = "cond-detr-50" # "cond-detr-50"
MODEL_PATH = f"logs/model2/checkpoint-{CHP_ID}"
# IMAGE_SHAPE = 1333 _Shape{IMAGE_SHAPE}
THR = 0.0
iou_threshold = 0.8
FILE_NAME = f"{SUBMODEL}_THR{THR*100:.3f}_IOU{iou_threshold:.3f}_ID{CHP_ID}"

In [3]:
from typing import List, Tuple, Union

import numpy as np
import pandas as pd
import torch
from PIL import Image
from torchvision.ops import nms
from tqdm import tqdm
from transformers import (AutoImageProcessor, AutoModelForObjectDetection,
						  ConditionalDetrForObjectDetection,
						  ConditionalDetrImageProcessor)

from zindi_code import CLS_MAPPER
from zindi_code.dataset_class import load_and_format

In [4]:

image_folder = "zindi_data/images"

test = load_and_format("zindi_data/ValDataset.csv")
test.sample(5)

Unnamed: 0,image_id,bbox,category_id,id
223,id_qk46xf80l5.jpg,"[798, 293, 830, 333]",0,223
1284,id_kbanzvtryi.jpg,"[2059, 1744, 2138, 1829]",0,1284
1779,id_ytq3slqkjm.jpg,"[967, 8, 999, 38]",0,1779
930,id_jl93f5rtzc.jpg,"[460, 609, 486, 635]",0,930
1860,id_ezd6x40fd0.jpg,"[1500, 218, 1570, 314]",1,1860


In [5]:
model_pth = MODEL_PATH

image_processor: ConditionalDetrImageProcessor = AutoImageProcessor.from_pretrained(
	model_pth, use_fast=False,
)
model: ConditionalDetrForObjectDetection = AutoModelForObjectDetection.from_pretrained(
	model_pth
)

model = model.eval().to(
	"cuda"
) # .train(False)

In [6]:
image_processor

ConditionalDetrImageProcessor {
  "do_convert_annotations": true,
  "do_normalize": true,
  "do_pad": true,
  "do_rescale": true,
  "do_resize": true,
  "format": "coco_detection",
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ConditionalDetrImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "pad_size": {
    "height": 800,
    "width": 1333
  },
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "max_height": 800,
    "max_width": 1333
  }
}

In [7]:
model.config.id2label

{0: 'Trophozoite', 1: 'WBC', 2: 'MASK'}

In [8]:
@torch.inference_mode()
def make_prediction(image: Image.Image):
    inputs = image_processor(images=[image], return_tensors="pt")
    outputs = model(**inputs.to("cuda"))
    target_sizes = torch.tensor([[image.size[1], image.size[0]]])
    results = image_processor.post_process_object_detection(
        outputs, threshold=THR, target_sizes=target_sizes
    )[0]
    return results


def _make_predictions(images: list[Image.Image]):
    inputs = image_processor(
        images=[np.array(image) for image in images], return_tensors="pt"
    ).to("cuda")
    # inputs = image_processor.pad(inputs)
    outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1] for image in images])
    return image_processor.post_process_object_detection(
        outputs, threshold=THR, target_sizes=target_sizes
    )


@torch.inference_mode()
def make_predictions(images: list[Image.Image]):
    predictions = [make_prediction(image) for image in images]
    return predictions


def load_transform(path: str):
    return Image.open(os.path.join(image_folder, path)).convert("RGB")


def load_images(image_pths: list[str]):
    return [load_transform(image_pth) for image_pth in image_pths]


def predicts(image_pths: list[str]):
    images = load_images(image_pths)
    results = make_predictions(images)
    predictions = []
    for image_pth, result in zip(image_pths, results):
        prediction = []
        if len(result["boxes"]):
            indices = nms(result["boxes"], result["scores"], iou_threshold)
            if not len(indices):
                continue
            for score, label, box in zip(
                result["scores"][indices],
                result["labels"][indices],
                result["boxes"][indices],
            ):
                x1, y1, x2, y2 = (round(i, 2) for i in box.tolist())
                if label.item() != 2:
                    prediction.append(
                        [
                            image_pth,
                            x1,
                            y1,
                            x2 - x1,
                            y2 - y1,
                            model.config.id2label[label.item()],
                            round(score.item(), 3),
                        ]
                    )
        if not len(prediction):
            prediction.append([image_pth, 0, 0, 0, 0, "NEG", 1.0])
        predictions.extend(prediction)
    return pd.DataFrame(
        predictions, columns=["image_id", "x", "y", "w", "h", "category_id", "score"]
    )

In [9]:
image_pths = test["image_id"].unique()[:16]
image_pths

array(['id_w8xnbd5rvm.jpg', 'id_ytq3slqkjm.jpg', 'id_e20xnaq5qn.jpg',
       'id_7fc9zyfy0e.jpg', 'id_6g52lmvz2y.jpg', 'id_z0i61ad0tq.jpg',
       'id_55a6sf8hbe.jpg', 'id_dg0icorzno.jpg', 'id_zdg96srigj.jpg',
       'id_ezd6x40fd0.jpg', 'id_ch6r0g46fr.jpg', 'id_4cotsn0obm.jpg',
       'id_0fdars2kkw.jpg', 'id_4wkzpeu6or.jpg', 'id_idjqlz4ppb.jpg',
       'id_by6e6shi2z.jpg'], dtype=object)

In [10]:
image = load_images([image_pths[0]])[0]
inputs = image_processor(images=[image], return_tensors="pt")
with torch.inference_mode():
	outputs = model(**inputs.to("cuda"))
target_sizes = torch.tensor([[image.size[1], image.size[0]]])

In [11]:
inputs

{'pixel_values': tensor([[[[-1.7240, -1.7583, -1.7240,  ...,  0.0000,  0.0000,  0.0000],
          [-1.7583, -1.7069, -1.7069,  ...,  0.0000,  0.0000,  0.0000],
          [-1.7583, -1.6898, -1.7583,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [-1.9467, -1.9124, -1.8953,  ...,  0.0000,  0.0000,  0.0000],
          [-1.9295, -1.9295, -1.9124,  ...,  0.0000,  0.0000,  0.0000],
          [-1.9124, -1.9295, -1.9124,  ...,  0.0000,  0.0000,  0.0000]],

         [[-1.8256, -1.8431, -1.8081,  ...,  0.0000,  0.0000,  0.0000],
          [-1.8606, -1.7906, -1.7731,  ...,  0.0000,  0.0000,  0.0000],
          [-1.8606, -1.7731, -1.8256,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [-2.0007, -1.9657, -1.9482,  ...,  0.0000,  0.0000,  0.0000],
          [-2.0007, -2.0007, -1.9657,  ...,  0.0000,  0.0000,  0.0000],
          [-1.9832, -2.0007, -1.9657,  ...,  0.0000,  0.0000,  0.0000]],

         [[-1.4907, -1.5081, -1.4559,  ...,  0.0000,  0.0000,  0.0000],
          [-1

In [12]:
inputs["pixel_mask"].shape

torch.Size([1, 800, 1333])

In [13]:
inputs["pixel_values"].shape

torch.Size([1, 3, 800, 1333])

In [14]:
inputs["pixel_values"][0, 0].shape

torch.Size([800, 1333])

In [15]:
inputs["pixel_mask"][0, 0].sum()

tensor(1069, device='cuda:0')

In [16]:
image.size

(4032, 3016)

In [17]:
outputs.logits.softmax(dim=2)

tensor([[[0.5217, 0.3054, 0.1730],
         [0.5916, 0.2551, 0.1533],
         [0.3758, 0.3197, 0.3045],
         [0.8536, 0.0739, 0.0725],
         [0.4017, 0.3240, 0.2743],
         [0.4723, 0.2872, 0.2405],
         [0.4747, 0.2626, 0.2627],
         [0.6498, 0.1830, 0.1672],
         [0.7178, 0.1757, 0.1065],
         [0.4051, 0.3267, 0.2681],
         [0.7028, 0.1487, 0.1484],
         [0.6054, 0.2310, 0.1636],
         [0.4628, 0.2905, 0.2467],
         [0.3302, 0.3164, 0.3534],
         [0.4835, 0.2875, 0.2290],
         [0.6716, 0.1593, 0.1692],
         [0.6055, 0.2141, 0.1803],
         [0.4753, 0.2899, 0.2348],
         [0.5965, 0.2534, 0.1501],
         [0.5205, 0.2732, 0.2064],
         [0.5840, 0.2003, 0.2157],
         [0.4671, 0.2525, 0.2804],
         [0.6274, 0.2240, 0.1486],
         [0.7137, 0.1776, 0.1087],
         [0.7357, 0.1457, 0.1186],
         [0.6156, 0.2583, 0.1260],
         [0.7659, 0.1497, 0.0843],
         [0.5469, 0.2863, 0.1668],
         [0.5330, 0.

In [18]:
results = predicts(image_pths)

In [19]:
results["category_id"].value_counts()

category_id
Trophozoite    788
WBC            112
Name: count, dtype: int64

In [20]:
results.sample(10)

Unnamed: 0,image_id,x,y,w,h,category_id,score
744,id_4wkzpeu6or.jpg,1636.64,2704.13,168.77,207.89,WBC,0.743
706,id_0fdars2kkw.jpg,601.31,691.36,34.88,32.49,Trophozoite,0.495
62,id_w8xnbd5rvm.jpg,2293.94,2694.9,86.66,99.5,Trophozoite,0.132
783,id_idjqlz4ppb.jpg,2471.79,1624.92,144.79,196.24,WBC,0.675
337,id_z0i61ad0tq.jpg,488.37,58.28,31.77,29.11,Trophozoite,0.168
225,id_7fc9zyfy0e.jpg,1097.79,3020.04,141.52,98.99,Trophozoite,0.046
759,id_4wkzpeu6or.jpg,2631.48,779.73,86.66,104.27,Trophozoite,0.105
311,id_z0i61ad0tq.jpg,1416.89,269.94,32.97,33.03,Trophozoite,0.293
897,id_by6e6shi2z.jpg,1161.23,948.14,34.15,34.24,Trophozoite,0.128
279,id_6g52lmvz2y.jpg,1693.48,329.81,37.82,37.19,Trophozoite,0.06


In [21]:
results["score"].describe()

count    900.000000
mean       0.252712
std        0.200713
min        0.028000
25%        0.083750
50%        0.179500
75%        0.387000
max        0.812000
Name: score, dtype: float64

In [22]:
batch_size = 16
test_images = test["image_id"].unique()
results = [
	predicts(test_images[i : i + batch_size])
	for i in tqdm(
		range(0, len(test_images), batch_size), total=len(test_images) // batch_size + 1
	) if i < len(test_images)
]

100%|██████████| 18/18 [01:10<00:00,  3.90s/it]


In [23]:
predictions = pd.concat(results, ignore_index=True)

In [24]:
predictions.sample(10)

Unnamed: 0,image_id,x,y,w,h,category_id,score
11244,id_g7t9v5jebb.jpg,3127.24,931.71,74.69,103.48,Trophozoite,0.044
10599,id_le33k9f4ee.jpg,2534.12,2563.56,97.14,118.93,Trophozoite,0.305
4774,id_49n2r2g61a.jpg,2917.33,1385.29,82.33,92.47,Trophozoite,0.09
6464,id_56cm3hgim3.jpg,2881.2,1759.32,93.22,116.04,Trophozoite,0.074
10941,id_j8ri1iub1e.jpg,2575.22,1615.1,95.08,121.42,Trophozoite,0.33
11141,id_4f9wdugdot.jpg,1540.32,2237.38,140.56,177.71,WBC,0.673
4577,id_7bbze5cgwz.jpg,2425.31,578.62,114.45,141.16,Trophozoite,0.047
4940,id_t4ahof8989.jpg,1018.47,1605.78,86.27,104.45,Trophozoite,0.039
12491,id_xbmf4arzt7.jpg,1634.08,626.14,154.93,185.97,WBC,0.701
6526,id_wh6zyryvik.jpg,359.35,395.56,39.37,40.44,Trophozoite,0.027


In [25]:
predictions["category_id"].value_counts(True)

category_id
Trophozoite    0.816835
WBC            0.183165
Name: proportion, dtype: float64

In [26]:
predictions["score"].describe()

count    14446.000000
mean         0.181902
std          0.195331
min          0.017000
25%          0.051000
50%          0.089000
75%          0.236000
max          0.840000
Name: score, dtype: float64

In [27]:
f"zindi_data/validation/prediction_{FILE_NAME}.csv"

'zindi_data/validation/prediction_cond-detr-50_THR0.000_IOU0.800_ID3016.csv'

In [28]:
predictions = predictions.rename(columns={"x": "xmin", "y": "ymin"})
predictions = predictions.rename(columns={"category_id": "class", "image_id": "Image_ID", "score": "confidence"})

predictions["xmax"] = predictions["xmin"] + predictions["w"]
predictions["ymax"] = predictions["ymin"] + predictions["h"]

predictions.to_csv(f"zindi_data/validation/prediction_{FILE_NAME}.csv", index=False)