In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
CHP_ID = "580"
SUBMODEL = "cond-detr-50" # "cond-detr-50"
MODEL_PATH = f"logs/model2/checkpoint-{CHP_ID}"
# IMAGE_SHAPE = 1333 _Shape{IMAGE_SHAPE}
THR = 0.0
iou_threshold = 0.4
FILE_NAME = f"{SUBMODEL}_THR{THR*100:.3f}_IOU{iou_threshold:.3f}_ID{CHP_ID}"

In [3]:
from typing import List, Tuple, Union

import numpy as np
import pandas as pd
import torch
from PIL import Image
from torchvision.ops import nms
from tqdm import tqdm
from transformers import (AutoImageProcessor, AutoModelForObjectDetection,
						  ConditionalDetrForObjectDetection,
						  ConditionalDetrImageProcessor)
from transformers.image_transforms import center_to_corners_format
from transformers.utils.generic import TensorType

from zindi_code import CLS_MAPPER
from zindi_code.dataset_class import load_and_format

In [4]:

image_folder = "zindi_data/images"

test = load_and_format("zindi_data/ValDataset.csv")
test.sample(5)

Unnamed: 0,image_id,bbox,category_id,id
585,id_6ujuefpy4m.jpg,"[619, 89, 657, 119]",0,585
513,id_i7nitsscaw.jpg,"[972, 630, 1033, 689]",1,513
1583,id_ezd6x40fd0.jpg,"[1665, 463, 1701, 495]",0,1583
1514,id_3xidcn77dw.jpg,"[1651, 166, 1721, 238]",1,1514
77,id_147gpcegp6.jpg,"[962, 842, 1082, 940]",0,77


In [5]:
model_pth = MODEL_PATH

image_processor: ConditionalDetrImageProcessor = AutoImageProcessor.from_pretrained(
	model_pth, use_fast=False,
)
model: ConditionalDetrForObjectDetection = AutoModelForObjectDetection.from_pretrained(
	model_pth
)

model = model.eval().to(
	"cuda"
) # .train(False)

In [6]:
image_processor

ConditionalDetrImageProcessor {
  "do_convert_annotations": true,
  "do_normalize": true,
  "do_pad": true,
  "do_rescale": true,
  "do_resize": true,
  "format": "coco_detection",
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ConditionalDetrImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "pad_size": {
    "height": 800,
    "width": 1333
  },
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "max_height": 800,
    "max_width": 1333
  }
}

In [7]:
model.config.id2label

{0: 'Trophozoite', 1: 'WBC', 2: 'MASK'}

In [8]:
@torch.inference_mode()
def make_prediction(image: Image.Image):
    inputs = image_processor(images=[image], return_tensors="pt")
    outputs = model(**inputs.to("cuda"))
    target_sizes = torch.tensor([[image.size[1], image.size[0]]])
    results = image_processor.post_process_object_detection(
        outputs, threshold=THR, target_sizes=target_sizes
    )[0]
    return results


def _make_predictions(images: list[Image.Image]):
    inputs = image_processor(
        images=[np.array(image) for image in images], return_tensors="pt"
    ).to("cuda")
    # inputs = image_processor.pad(inputs)
    outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1] for image in images])
    return image_processor.post_process_object_detection(
        outputs, threshold=THR, target_sizes=target_sizes
    )


@torch.inference_mode()
def make_predictions(images: list[Image.Image]):
    predictions = [make_prediction(image) for image in images]
    return predictions


def load_transform(path: str):
    return Image.open(os.path.join(image_folder, path)).convert("RGB")


def load_images(image_pths: list[str]):
    return [load_transform(image_pth) for image_pth in image_pths]


def predicts(image_pths: list[str]):
    images = load_images(image_pths)
    results = make_predictions(images)
    predictions = []
    for image_pth, result in zip(image_pths, results):
        prediction = []
        if len(result["boxes"]):
            indices = nms(result["boxes"], result["scores"], iou_threshold)
            if not len(indices):
                continue
            for score, label, box in zip(
                result["scores"][indices],
                result["labels"][indices],
                result["boxes"][indices],
            ):
                x1, y1, x2, y2 = (round(i, 2) for i in box.tolist())
                if label.item() != 2:
                    prediction.append(
                        [
                            image_pth,
                            x1,
                            y1,
                            x2 - x1,
                            y2 - y1,
                            model.config.id2label[label.item()],
                            round(score.item(), 3),
                        ]
                    )
        if not len(prediction):
            prediction.append([image_pth, 0, 0, 0, 0, "NEG", 1.0])
        predictions.extend(prediction)
    return pd.DataFrame(
        predictions, columns=["image_id", "x", "y", "w", "h", "category_id", "score"]
    )

In [9]:
image_pths = test["image_id"].unique()[:16]
image_pths

array(['id_w8xnbd5rvm.jpg', 'id_ytq3slqkjm.jpg', 'id_e20xnaq5qn.jpg',
       'id_7fc9zyfy0e.jpg', 'id_6g52lmvz2y.jpg', 'id_z0i61ad0tq.jpg',
       'id_55a6sf8hbe.jpg', 'id_dg0icorzno.jpg', 'id_zdg96srigj.jpg',
       'id_ezd6x40fd0.jpg', 'id_ch6r0g46fr.jpg', 'id_4cotsn0obm.jpg',
       'id_0fdars2kkw.jpg', 'id_4wkzpeu6or.jpg', 'id_idjqlz4ppb.jpg',
       'id_by6e6shi2z.jpg'], dtype=object)

In [10]:
image = load_images([image_pths[0]])[0]
inputs = image_processor(images=[image], return_tensors="pt")
with torch.inference_mode():
	outputs = model(**inputs.to("cuda"))
target_sizes = torch.tensor([[image.size[1], image.size[0]]])

In [11]:
inputs

{'pixel_values': tensor([[[[-1.7240, -1.7583, -1.7240,  ...,  0.0000,  0.0000,  0.0000],
          [-1.7583, -1.7069, -1.7069,  ...,  0.0000,  0.0000,  0.0000],
          [-1.7583, -1.6898, -1.7583,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [-1.9467, -1.9124, -1.8953,  ...,  0.0000,  0.0000,  0.0000],
          [-1.9295, -1.9295, -1.9124,  ...,  0.0000,  0.0000,  0.0000],
          [-1.9124, -1.9295, -1.9124,  ...,  0.0000,  0.0000,  0.0000]],

         [[-1.8256, -1.8431, -1.8081,  ...,  0.0000,  0.0000,  0.0000],
          [-1.8606, -1.7906, -1.7731,  ...,  0.0000,  0.0000,  0.0000],
          [-1.8606, -1.7731, -1.8256,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [-2.0007, -1.9657, -1.9482,  ...,  0.0000,  0.0000,  0.0000],
          [-2.0007, -2.0007, -1.9657,  ...,  0.0000,  0.0000,  0.0000],
          [-1.9832, -2.0007, -1.9657,  ...,  0.0000,  0.0000,  0.0000]],

         [[-1.4907, -1.5081, -1.4559,  ...,  0.0000,  0.0000,  0.0000],
          [-1

In [12]:
inputs["pixel_mask"].shape

torch.Size([1, 800, 1333])

In [13]:
inputs["pixel_values"].shape

torch.Size([1, 3, 800, 1333])

In [14]:
inputs["pixel_values"][0, 0].shape

torch.Size([800, 1333])

In [15]:
inputs["pixel_mask"][0, 0].sum()

tensor(1069, device='cuda:0')

In [16]:
image.size

(4032, 3016)

In [17]:
outputs.logits.softmax(dim=2)

tensor([[[0.5330, 0.2409, 0.2261],
         [0.6431, 0.1797, 0.1772],
         [0.3508, 0.3056, 0.3436],
         [0.7310, 0.1203, 0.1487],
         [0.3790, 0.3061, 0.3149],
         [0.5801, 0.1888, 0.2311],
         [0.6157, 0.1811, 0.2032],
         [0.4695, 0.2456, 0.2849],
         [0.6175, 0.1955, 0.1870],
         [0.6577, 0.1849, 0.1574],
         [0.7480, 0.1218, 0.1301],
         [0.6071, 0.1839, 0.2090],
         [0.4473, 0.2655, 0.2873],
         [0.3217, 0.3293, 0.3490],
         [0.6479, 0.1731, 0.1790],
         [0.7343, 0.1412, 0.1245],
         [0.4905, 0.2351, 0.2744],
         [0.3634, 0.3101, 0.3265],
         [0.6656, 0.1763, 0.1581],
         [0.3692, 0.2964, 0.3344],
         [0.6655, 0.1738, 0.1607],
         [0.4801, 0.2225, 0.2974],
         [0.6621, 0.1572, 0.1807],
         [0.7489, 0.1310, 0.1201],
         [0.7201, 0.1336, 0.1463],
         [0.4986, 0.2393, 0.2621],
         [0.7264, 0.1317, 0.1419],
         [0.7327, 0.1524, 0.1150],
         [0.5890, 0.

In [18]:
results = predicts(image_pths)

In [19]:
results["category_id"].value_counts()

category_id
Trophozoite    598
WBC             91
Name: count, dtype: int64

In [20]:
results.sample(10)

Unnamed: 0,image_id,x,y,w,h,category_id,score
380,id_zdg96srigj.jpg,1724.18,275.62,33.28,32.81,Trophozoite,0.304
569,id_4wkzpeu6or.jpg,2375.1,2762.81,84.09,107.1,Trophozoite,0.322
464,id_ch6r0g46fr.jpg,1101.46,214.43,77.33,94.46,Trophozoite,0.313
662,id_by6e6shi2z.jpg,518.17,436.08,34.51,31.71,Trophozoite,0.351
650,id_by6e6shi2z.jpg,932.57,489.53,36.4,36.93,Trophozoite,0.42
320,id_dg0icorzno.jpg,923.5,985.12,43.15,27.71,Trophozoite,0.11
376,id_zdg96srigj.jpg,784.55,109.06,34.82,33.93,Trophozoite,0.319
72,id_ytq3slqkjm.jpg,1295.77,276.19,33.97,32.08,Trophozoite,0.445
565,id_4wkzpeu6or.jpg,2312.87,854.95,86.38,109.81,Trophozoite,0.366
624,id_idjqlz4ppb.jpg,1103.62,1668.38,75.73,91.91,Trophozoite,0.138


In [21]:
batch_size = 16
test_images = test["image_id"].unique()
results = [
	predicts(test_images[i : i + batch_size])
	for i in tqdm(
		range(0, len(test_images), batch_size), total=len(test_images) // batch_size + 1
	) if i < len(test_images)
]

100%|██████████| 18/18 [01:20<00:00,  4.48s/it]


In [22]:
predictions = pd.concat(results, ignore_index=True)

In [23]:
predictions.sample(10)

Unnamed: 0,image_id,x,y,w,h,category_id,score
8931,id_oheo90515w.jpg,2676.14,798.0,233.63,219.09,WBC,0.328
3081,id_4svm2mbojb.jpg,2378.95,236.0,95.84,108.56,Trophozoite,0.229
2486,id_ndhdv3wuvg.jpg,1164.99,2.12,73.15,36.67,WBC,0.239
8009,id_aviegh5t3l.jpg,2238.38,1119.67,77.44,97.5,Trophozoite,0.123
9940,id_gf1jjrs2ig.jpg,1655.77,1631.21,73.91,90.58,Trophozoite,0.221
6097,id_oc5se3b1wo.jpg,458.94,2003.4,84.8,106.79,Trophozoite,0.393
9602,id_5cmiy2xmxy.jpg,1929.12,1093.55,76.98,91.29,Trophozoite,0.238
1814,id_jl93f5rtzc.jpg,429.22,332.77,32.89,31.61,Trophozoite,0.399
1460,id_6ujuefpy4m.jpg,339.66,438.34,32.3,30.4,Trophozoite,0.338
4699,id_jtsa3ltfpf.jpg,1136.69,749.91,33.95,32.82,Trophozoite,0.336


In [24]:
predictions["category_id"].value_counts(True)

category_id
Trophozoite    0.816655
WBC            0.183345
Name: proportion, dtype: float64

In [25]:
predictions["score"].describe()

count    10003.000000
mean         0.263023
std          0.133959
min          0.046000
25%          0.147500
50%          0.254000
75%          0.355500
max          0.694000
Name: score, dtype: float64

In [26]:
f"zindi_data/validation/prediction_{FILE_NAME}.csv"

'zindi_data/validation/prediction_cond-detr-50_THR0.000_IOU0.400_ID580.csv'

In [27]:
predictions = predictions.rename(columns={"x": "xmin", "y": "ymin"})
predictions = predictions.rename(columns={"category_id": "class", "image_id": "Image_ID", "score": "confidence"})

predictions["xmax"] = predictions["xmin"] + predictions["w"]
predictions["ymax"] = predictions["ymin"] + predictions["h"]

predictions.to_csv(f"zindi_data/validation/prediction_{FILE_NAME}.csv", index=False)