In [1]:
import pandas as pd
import numpy as np
from PIL import Image, ExifTags
import torch
from tqdm import tqdm

from transformers import AutoModelForObjectDetection, AutoImageProcessor

In [None]:
from concurrent.futures import ThreadPoolExecutor

PATH = "logs/checkpoint-19208"
model = AutoModelForObjectDetection.from_pretrained(PATH)
image_processor = AutoImageProcessor.from_pretrained(PATH)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for flag in ExifTags.TAGS.keys():
	if ExifTags.TAGS[flag] == "Orientation":
		break


def load_image(image_path):
	image = Image.open(image_path)

	exif = image._getexif()
	if exif is not None:
		orientation = exif.get(flag, None)
		if orientation == 3:
			image = image.rotate(180, expand=True)
		elif orientation == 6:
			image = image.rotate(270, expand=True)
		elif orientation == 8:
			image = image.rotate(90, expand=True)
	return image

def load_images(image_paths):
	with ThreadPoolExecutor() as executor:
		images = list(executor.map(load_image, image_paths))
	return images

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [3]:
from glob import glob

files = glob("dataset/images/test/*")
len(files)

1626

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision.ops import nms


# Make predictions with thresholds and NMS
@torch.inference_mode()
def predict_images(image_paths, confidence_threshold=0.5, iou_threshold=0.5):
    # Load and process image
    images = load_images(image_paths)
    inputs = image_processor(images=images, return_tensors="pt").to(device)

    # Make prediction
    outputs = model(**inputs)

    # Process outputs with confidence threshold
    target_sizes = torch.tensor([image.size[::-1] for image in images]).to(device)
    batch_results = image_processor.post_process_object_detection(
        outputs, threshold=confidence_threshold, target_sizes=target_sizes
    )

    for results, image in zip(batch_results, images):
        # Apply NMS if needed
        boxes = results["boxes"]
        scores = results["scores"]
        labels = results["labels"]

        # Apply NMS for each class
        keep_indices = []

        # Apply NMS
        class_keep = nms(boxes, scores, iou_threshold)
        keep_indices = class_keep.tolist()

        # Extract final detections
        final_boxes = boxes[keep_indices].cpu().numpy()
        final_scores = scores[keep_indices].cpu().numpy()
        final_labels = labels[keep_indices].cpu().numpy()

        yield {
            "image": image,
            "boxes": final_boxes,
            "scores": final_scores,
            "labels": final_labels,
        }

In [7]:
predictions = list(predict_images([files[0], files[1]], confidence_threshold=0.1, iou_threshold=0.3))

len(predictions)

2

In [8]:
predictions[0]

{'image': <PIL.Image.Image image mode=RGB size=1800x4000>,
 'boxes': array([[ -17.037928,   41.865944, 1653.9338  , 3978.7322  ],
        [1423.0753  ,  322.27536 , 1779.8544  , 1115.6741  ]],
       dtype=float32),
 'scores': array([0.44632742, 0.1484672 ], dtype=float32),
 'labels': array([2, 2])}

In [9]:
files[:2]

['dataset/images/test/ID_cWEAQI.jpeg', 'dataset/images/test/ID_NtqErb.jpg']

In [None]:
batch_size = 16
predictions = [
    list(predict_images(
        files[i : i + batch_size], confidence_threshold=0.005, iou_threshold=0.7
    ))
    for i in tqdm(range(0, len(files), batch_size))
]

100%|██████████| 102/102 [06:16<00:00,  3.70s/it]


In [11]:
predictions = sum(predictions, start=[])

len(predictions)

1626

In [12]:
predictions[0]

{'image': <PIL.Image.Image image mode=RGB size=1800x4000>,
 'boxes': array([[-1.5221375e+01,  5.6677937e+01,  1.5725955e+03,  3.9755698e+03],
        [ 1.4103761e+03,  3.2205890e+02,  1.7391385e+03,  1.0897238e+03],
        [ 1.3658841e+01,  1.4869922e+02,  4.4927451e+02,  1.1096399e+03],
        [ 1.1209342e+03,  1.4121279e+01,  1.6993610e+03,  4.7483231e+02],
        [ 5.1627827e-01,  7.5630225e+02,  2.1345480e+02,  1.5534452e+03],
        [ 9.3504883e+02,  2.8794141e+03,  1.3658700e+03,  3.6686372e+03]],
       dtype=float32),
 'scores': array([0.43240353, 0.14345668, 0.08802307, 0.07881356, 0.06542471,
        0.05037794], dtype=float32),
 'labels': array([2, 2, 2, 2, 2, 0])}

In [13]:
type(predictions[0]["boxes"])

numpy.ndarray

In [14]:
label2id = {'anthracnose': 0, 'cssvd': 1, 'healthy': 2}
id2label = {v: k for k, v in label2id.items()}

In [15]:
import pandas as pd
import os

# Convert predictions into a DataFrame
data = []
for file, prediction in zip(files, predictions):
	for box, score, label in zip(prediction['boxes'], prediction['scores'], prediction['labels']):
		data.append({
			'Image_ID': os.path.basename(file),
			# 'box': box.tolist(),
			'confidence': float(score),
			'class_id': int(label),
			'class': id2label[int(label)],
			"x_min": float(box[0]),
			"y_min": float(box[1]),
			"x_max": float(box[2]),
			"y_max": float(box[3]),
		})

df_predictions = pd.DataFrame(data)
df_predictions.head()

Unnamed: 0,Image_ID,confidence,class_id,class,x_min,y_min,x_max,y_max
0,ID_cWEAQI.jpeg,0.432404,2,healthy,-15.221375,56.677937,1572.595459,3975.569824
1,ID_cWEAQI.jpeg,0.143457,2,healthy,1410.376099,322.058899,1739.13855,1089.723755
2,ID_cWEAQI.jpeg,0.088023,2,healthy,13.658841,148.699219,449.274506,1109.639893
3,ID_cWEAQI.jpeg,0.078814,2,healthy,1120.934204,14.121279,1699.360962,474.832306
4,ID_cWEAQI.jpeg,0.065425,2,healthy,0.516278,756.302246,213.454803,1553.44519


In [16]:
df_predictions["Image_ID"].value_counts().describe()

count    1626.000000
mean       11.296433
std         5.680906
min         1.000000
25%         7.000000
50%        11.000000
75%        15.000000
max        35.000000
Name: count, dtype: float64

In [17]:
df_predictions.to_csv("dataset/predictions/04-predictions.csv", index=False)