In [1]:
# Import libraries
import pandas as pd
import os
from pathlib import Path
from tqdm import tqdm
import yaml
import matplotlib.pyplot as plt
from ultralytics.engine.results import Results
from ultralytics import YOLO
import numpy as np
from PIL import Image, ExifTags
import torch

In [2]:
# INPUT_DIRS
INPUT_DATA_DIR = Path('dataset')
## Drop the Folder if it already exists
DATASETS_DIR = Path('dataset')
# Image & labels directory
TRAIN_IMAGES_DIR = DATASETS_DIR / 'images' / 'train'
TRAIN_LABELS_DIR = DATASETS_DIR / 'labels'/ 'train'
TEST_IMAGES_DIR = DATASETS_DIR / 'images' / 'test'
VAL_IMAGES_DIR = DATASETS_DIR / 'images' /'val'
VAL_LABELS_DIR = DATASETS_DIR / 'labels' /'val'

# Load train and test files
train = pd.read_csv(INPUT_DATA_DIR / 'Train_df.csv')
val = pd.read_csv(INPUT_DATA_DIR / 'Val_df.csv')
test = pd.read_csv(INPUT_DATA_DIR / 'Test.csv')
ss = pd.read_csv(INPUT_DATA_DIR / 'SampleSubmission.csv')

class_map = {cls: i for i, cls in enumerate(sorted(train['class'].unique().tolist()))}
# Strip any spacing from the class item and make sure that it is a str
train['class'] = train['class'].str.strip()

# Map {'healthy': 2, 'cssvd': 1, anthracnose: 0}
train['class_id'] = train['class'].map(class_map)

train_df = train
val_df = val

# Create a data.yaml file required by yolo
class_names = sorted(train['class'].unique().tolist())
num_classes = len(class_names)
data_yaml = {
    "path" : str(DATASETS_DIR.absolute()),
    'train': str(TRAIN_IMAGES_DIR.absolute()),
    'val': str(VAL_IMAGES_DIR.absolute()),
    'test': str(TEST_IMAGES_DIR.absolute()),
    'nc': num_classes,
    'names': class_names
}

val_image_names = [str(Path(name).stem) for name in val_df['Image_ID'].unique()]
train_image_names = [str(Path(name).stem) for name in train['ImagePath'].unique()]

In [3]:
from glob import glob

# Validate the model on the validation set
BEST_PATH = sorted(glob("zindi_challenge_cacao_stage2/train*/weights/best.pt"))[-1]
# BEST_PATH = "zindi_challenge_cacao/train2/weights/best.pt"
BEST_PATH

'zindi_challenge_cacao_stage2/train4/weights/best.pt'

In [4]:
for flag, v in ExifTags.TAGS.items():
    if v == "Orientation":
        break


def load_image_(filepath):
    image = Image.open(filepath)
    # return image

    exif = image._getexif()
    if exif is None:
        return image

    orientation_value = exif.get(flag, None)

    if orientation_value == 3:
        image = image.rotate(180, expand=True)
    elif orientation_value == 6:
        image = image.rotate(270, expand=True)
    elif orientation_value == 8:
        image = image.rotate(90, expand=True)
    return image

from ultralytics.utils.patches import imread
import cv2

def load_image(filepath):
    return imread(filepath, cv2.IMREAD_COLOR)


flag

274

In [5]:
# Validate the model on the validation set
BEST_CFG = sorted(glob("zindi_challenge_cacao_stage2/train*/args.yaml"))[-1]
# BEST_CFG = "zindi_challenge_cacao/train2/args.yaml"
BEST_CFG

'zindi_challenge_cacao_stage2/train4/args.yaml'

In [6]:
import yaml
with open(BEST_CFG, 'r') as f:
	cfg: dict = yaml.safe_load(f)
	print(cfg)

{'task': 'detect', 'mode': 'train', 'model': 'yolo11l.pt', 'data': 'data.yaml', 'epochs': 150, 'time': 2.5, 'patience': 20, 'batch': 8, 'imgsz': 1024, 'save': True, 'save_period': -1, 'cache': False, 'device': 'cuda:0', 'workers': 4, 'project': 'zindi_challenge_cacao_stage2', 'name': 'train4', 'exist_ok': False, 'pretrained': True, 'optimizer': 'auto', 'verbose': True, 'seed': 0, 'deterministic': True, 'single_cls': False, 'rect': False, 'cos_lr': True, 'close_mosaic': 10, 'resume': False, 'amp': True, 'fraction': 1.0, 'profile': False, 'freeze': None, 'multi_scale': False, 'overlap_mask': True, 'mask_ratio': 4, 'dropout': 0.21625404864404263, 'val': True, 'split': 'val', 'save_json': False, 'conf': None, 'iou': 0.6656773157840741, 'max_det': 150, 'half': True, 'dnn': False, 'plots': True, 'source': None, 'vid_stride': 1, 'stream_buffer': False, 'visualize': False, 'augment': True, 'agnostic_nms': True, 'classes': None, 'retina_masks': False, 'embed': None, 'show': False, 'save_frames'

In [None]:
# Batch size for predictions
batch_size = 16

cfg["device"] = "cuda:1"
cfg["batch"] = batch_size
cfg["conf"] = 0.0
cfg["verbose"] = False

cfg.pop("source", None)
# cfg.pop("batch_size")
cfg.pop("visualize", None)
cfg.pop("data", None)
cfg.pop("name", None)

# cfg["model"] = "val"
cfg.pop("model", None)

keys = list(cfg.keys())
for col in keys:
    if (
        "show" in col
        or "save" in col
        or "freeze" in col
        # or "nms" in col
        # or "multi_scale" in col
        or "plot" in col
        or "aug" in col
        or "drop" in col
    ):
        cfg.pop(col)

In [8]:
# Load the trained YOLO model
model = YOLO(BEST_PATH)

# Path to the test images directory
test_dir_path = TEST_IMAGES_DIR

# Get a list of all image files in the test directory
image_files = os.listdir(test_dir_path)

# Initialize an empty list to store the results for all images
all_data = []

# Initialize an empty list to store the results for all images
all_data = []

# Batch size for predictions
batch_size = 16

# Process images in batches
for i in tqdm(range(0, len(image_files), batch_size)):
	batch_files = image_files[i:i + batch_size]
	batch_images = [load_image(os.path.join(test_dir_path, img_file)) for img_file in batch_files]

	# Make predictions on the batch of images
	results = model.predict(
		batch_images,
		**cfg,
	)

	# Iterate through each result in the batch
	for img_file, result in zip(batch_files, results):
		boxes = result.boxes.xyxy.tolist() if result.boxes else []  # Bounding boxes in xyxy format
		classes = result.boxes.cls.tolist() if result.boxes else []  # Class indices
		confidences = result.boxes.conf.tolist() if result.boxes else []  # Confidence scores
		names = result.names  # Class names dictionary

		if boxes:  # If detections are found
			for box, cls, conf in zip(boxes, classes, confidences):
				x1, y1, x2, y2 = box
				detected_class = names[int(cls)]  # Get the class name from the names dictionary

				# Add the result to the all_data list
				all_data.append(
					{
						"Image_ID": str(img_file),
						"class": detected_class,
						"confidence": conf,
						"ymin": y1,
						"xmin": x1,
						"ymax": y2,
						"xmax": x2,
					}
				)
		else:  # If no objects are detected
			all_data.append(
				{
					"Image_ID": str(img_file),
					"class": "None",
					"confidence": None,
					"ymin": None,
					"xmin": None,
					"ymax": None,
					"xmax": None,
				}
			)


100%|██████████| 102/102 [01:58<00:00,  1.17s/it]


In [9]:
# Convert the list to a DataFrame for all images
sub = pd.DataFrame(all_data)

In [10]:
sub.head()

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax
0,ID_cWEAQI.jpeg,healthy,0.698242,72.265617,0.0,3810.546631,1687.499878
1,ID_cWEAQI.jpeg,anthracnose,0.010094,2726.562256,638.671814,3874.999756,1568.359253
2,ID_cWEAQI.jpeg,healthy,0.006073,166.01561,0.0,1314.453003,544.921814
3,ID_cWEAQI.jpeg,anthracnose,0.004757,3523.437256,7.8125,3999.999756,753.906189
4,ID_cWEAQI.jpeg,healthy,0.002958,366.210907,1406.249878,1158.203003,1800.0


In [11]:
sub.describe()

Unnamed: 0,confidence,ymin,xmin,ymax,xmax
count,243900.0,243900.0,243900.0,243900.0,243900.0
mean,0.00992,801.19302,696.192284,1369.993094,1290.171572
std,0.071032,1020.093933,788.641701,1193.833171,959.751022
min,3e-06,0.0,0.0,0.0,0.0
25%,6.5e-05,1.476562,66.25,378.105469,541.054688
50%,0.000218,370.124969,431.34375,1075.78125,960.0
75%,0.000856,1215.644501,1035.755859,2048.0,1866.0
max,0.915039,4047.375,4031.25,4128.0,4128.0


In [12]:
sub['class'].value_counts()

class
healthy        93943
cssvd          85129
anthracnose    64828
Name: count, dtype: int64

In [13]:
sub.isna().sum()

Image_ID      0
class         0
confidence    0
ymin          0
xmin          0
ymax          0
xmax          0
dtype: int64

class
healthy        1153
cssvd           801
anthracnose     694
None             57
Name: count, dtype: int6

In [14]:
sub.to_csv("dataset/predictions/09-predictions.csv", index=False)

In [15]:
sub["confidence"].describe()

count    243900.000000
mean          0.009920
std           0.071032
min           0.000003
25%           0.000065
50%           0.000218
75%           0.000856
max           0.915039
Name: confidence, dtype: float64

In [16]:
import pandas as pd

sub = pd.read_csv('dataset/predictions/09-predictions.csv')

sub.sample(6)

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax
154598,ID_FzkXs3.jpg,cssvd,2e-05,510.625,756.25,839.375,960.0
205238,ID_YpuhPQ.jpg,cssvd,0.002003,868.59375,1695.75,1466.25,2435.25
868,ID_EsG9PW.jpeg,anthracnose,2.5e-05,0.492187,0.0,228.128891,936.140564
34550,ID_wGs0TG.JPG,healthy,0.002058,0.0,0.0,105.512688,332.718719
5978,ID_Dh68Pg.jpeg,healthy,0.000171,3945.374756,2354.624756,4031.999756,3024.0
152802,ID_rMv7Zl.jpg,cssvd,7e-06,1043.085938,406.054688,1080.0,631.757812


In [17]:
sub["Image_ID"].value_counts().describe()

count    1626.0
mean      150.0
std         0.0
min       150.0
25%       150.0
50%       150.0
75%       150.0
max       150.0
Name: count, dtype: float64

In [18]:
sub["Image_ID"].nunique()

1626

In [19]:
sub.isna().sum()

Image_ID      0
class         0
confidence    0
ymin          0
xmin          0
ymax          0
xmax          0
dtype: int64