In [1]:
# Import libraries
import pandas as pd
import os
from pathlib import Path
from tqdm import tqdm
import yaml
import matplotlib.pyplot as plt
from ultralytics.engine.results import Results
from ultralytics import YOLO
import numpy as np
from PIL import Image, ExifTags
import torch

In [2]:
# INPUT_DIRS
INPUT_DATA_DIR = Path('dataset')
## Drop the Folder if it already exists
DATASETS_DIR = Path('dataset')
# Image & labels directory
TRAIN_IMAGES_DIR = DATASETS_DIR / 'images' / 'train'
TRAIN_LABELS_DIR = DATASETS_DIR / 'labels'/ 'train'
TEST_IMAGES_DIR = DATASETS_DIR / 'images' / 'test'
VAL_IMAGES_DIR = DATASETS_DIR / 'images' /'val'
VAL_LABELS_DIR = DATASETS_DIR / 'labels' /'val'

# Load train and test files
train = pd.read_csv(INPUT_DATA_DIR / 'Train_df.csv')
val = pd.read_csv(INPUT_DATA_DIR / 'Val_df.csv')
test = pd.read_csv(INPUT_DATA_DIR / 'Test.csv')
ss = pd.read_csv(INPUT_DATA_DIR / 'SampleSubmission.csv')

class_map = {cls: i for i, cls in enumerate(sorted(train['class'].unique().tolist()))}
# Strip any spacing from the class item and make sure that it is a str
train['class'] = train['class'].str.strip()

# Map {'healthy': 2, 'cssvd': 1, anthracnose: 0}
train['class_id'] = train['class'].map(class_map)

train_df = train
val_df = val

# Create a data.yaml file required by yolo
class_names = sorted(train['class'].unique().tolist())
num_classes = len(class_names)
data_yaml = {
    "path" : str(DATASETS_DIR.absolute()),
    'train': str(TRAIN_IMAGES_DIR.absolute()),
    'val': str(VAL_IMAGES_DIR.absolute()),
    'test': str(TEST_IMAGES_DIR.absolute()),
    'nc': num_classes,
    'names': class_names
}

val_image_names = [str(Path(name).stem) for name in val_df['Image_ID'].unique()]
train_image_names = [str(Path(name).stem) for name in train['ImagePath'].unique()]

In [3]:
from glob import glob

# Validate the model on the validation set
BEST_PATH = sorted(glob("zindi_challenge_cacao/train*/weights/best.pt"))[-1]
BEST_PATH = "zindi_challenge_cacao/saved/last2.pt"
BEST_PATH

'zindi_challenge_cacao/saved/last2.pt'

In [4]:
for flag, v in ExifTags.TAGS.items():
    if v == "Orientation":
        break


def load_image_(filepath):
    image = Image.open(filepath)
    # return image

    exif = image._getexif()
    if exif is None:
        return image

    orientation_value = exif.get(flag, None)

    if orientation_value == 3:
        image = image.rotate(180, expand=True)
    elif orientation_value == 6:
        image = image.rotate(270, expand=True)
    elif orientation_value == 8:
        image = image.rotate(90, expand=True)
    return image

from ultralytics.utils.patches import imread
import cv2

def load_image(filepath):
    return imread(filepath, cv2.IMREAD_COLOR)


flag

274

In [5]:
from glob import glob

PATHS = [
    "zindi_challenge_cacao/train5/weights/best.pt",
	"zindi_challenge_cacao/train6/weights/best.pt",
	"zindi_challenge_cacao/train7/weights/best.pt",
]

In [6]:
# Validate the model on the validation set
CFG_PATHS = [
    "zindi_challenge_cacao/train5/args.yaml",
	"zindi_challenge_cacao/train6/args.yaml",
	"zindi_challenge_cacao/train7/args.yaml",
]

In [7]:
from MultiPredictions import MergedYOLOPredictor

# Load the trained YOLO model
model = MergedYOLOPredictor(PATHS)

Loaded 3 models.
Class mapping: {0: 'anthracnose', 1: 'cssvd', 2: 'healthy'}


In [8]:
import yaml

cfgs: list[dict] = []
for path in CFG_PATHS:
	# Load the YAML file
	with open(path, 'r') as f:
		cfg: dict = yaml.safe_load(f)
	cfgs.append(cfg)

In [9]:
# Batch size for predictions
batch_size = 16
for cfg in cfgs:
	cfg["device"] = "cuda:1"
	cfg["batch"] = batch_size
	cfg["conf"] = 0.
	cfg["verbose"] = False

	cfg.pop("source")
	# cfg.pop("batch_size")
	cfg.pop("visualize")

	keys = list(cfg.keys())
	for col in keys:
		if "show" in col or "save" in col:
			cfg.pop(col)

In [10]:
# Path to the test images directory
test_dir_path = TEST_IMAGES_DIR

# Get a list of all image files in the test directory
image_files = os.listdir(test_dir_path)

# Initialize an empty list to store the results for all images
all_data = []

# Initialize an empty list to store the results for all images
all_data = []

# Batch size for predictions
batch_size = 16

with torch.no_grad():
    # Process images in batches
	for i in tqdm(range(0, len(image_files), batch_size)):
		batch_files = image_files[i:i + batch_size]
		batch_images = [load_image(os.path.join(test_dir_path, img_file)) for img_file in batch_files]

		# Make predictions on the batch of images
		results = model.predict(
			batch_images,
			cfgs,
		)

		# Iterate through each result in the batch
		for img_file, result in zip(batch_files, results):
			if result["detections"]:  # If detections are found
				for raw in result["detections"]:
					x1, y1, x2, y2 = raw["bbox"]  # Bounding boxes in xyxy format
					cls = raw["class"]  # Class indices
					conf = raw["confidence"]  # Confidence scores
					# Add the result to the all_data list
					all_data.append(
						{
							"Image_ID": str(img_file),
							"class": cls,
							"confidence": conf,
							"ymin": y1,
							"xmin": x1,
							"ymax": y2,
							"xmax": x2,
						}
					)
			else:  # If no objects are detected
				all_data.append(
					{
						"Image_ID": str(img_file),
						"class": "None",
						"confidence": None,
						"ymin": None,
						"xmin": None,
						"ymax": None,
						"xmax": None,
					}
				)

100%|██████████| 102/102 [04:24<00:00,  2.60s/it]


In [11]:
# Convert the list to a DataFrame for all images
sub = pd.DataFrame(all_data)

In [12]:
sub.head()

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax
0,ID_cWEAQI.jpeg,anthracnose,0.002872,60.666199,15.254377,4000.0,1679.71167
1,ID_cWEAQI.jpeg,anthracnose,0.0003,3672.0354,84.970467,4000.0,579.54834
2,ID_cWEAQI.jpeg,anthracnose,0.000245,930.34967,444.719177,2732.435547,1620.193848
3,ID_cWEAQI.jpeg,anthracnose,0.000188,228.629547,0.0,909.342712,437.89502
4,ID_cWEAQI.jpeg,anthracnose,0.000111,3156.461914,20.296572,3988.080322,1098.258911


In [13]:
sub.describe()

Unnamed: 0,confidence,ymin,xmin,ymax,xmax
count,146340.0,146340.0,146340.0,146340.0,146340.0
mean,0.014902,739.423728,690.515701,1368.425922,1245.011804
std,0.089599,980.004211,818.750295,1200.753204,969.328937
min,2e-06,0.0,0.0,0.0,0.0
25%,1.9e-05,0.948049,23.302939,364.502663,471.977547
50%,8.5e-05,294.210266,403.331604,1050.650574,960.0
75%,0.00064,1128.99881,1007.34375,2048.0,1800.0
max,0.893525,4084.330566,4094.873291,4128.0,4128.0


In [14]:
sub['class'].value_counts()

class
anthracnose    48780
cssvd          48780
healthy        48780
Name: count, dtype: int64

In [15]:
sub.isna().sum()

Image_ID      0
class         0
confidence    0
ymin          0
xmin          0
ymax          0
xmax          0
dtype: int64

class
healthy        1153
cssvd           801
anthracnose     694
None             57
Name: count, dtype: int6

In [16]:
sub.to_csv("dataset/predictions/08-predictions.csv", index=False)

In [17]:
sub["confidence"].describe()

count    146340.000000
mean          0.014902
std           0.089599
min           0.000002
25%           0.000019
50%           0.000085
75%           0.000640
max           0.893525
Name: confidence, dtype: float64

In [18]:
import pandas as pd

sub = pd.read_csv('dataset/predictions/08-predictions.csv')

sub.sample(6)

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax
107168,ID_y9PmTs.JPG,healthy,0.001134,508.24527,1901.012207,899.269531,2187.624512
91415,ID_AvhFY7.jpg,healthy,0.000311,0.0,0.0,187.635193,1095.733521
125993,ID_eA9nie.jpg,healthy,0.000214,0.588417,42.292099,145.095932,351.706787
71624,ID_ACg6Qf.jpeg,healthy,2.5e-05,62.732944,0.0,470.317322,0.0
140939,ID_Fh5Pcm.jpg,healthy,1.1e-05,0.028881,270.626587,43.781097,655.65271
102599,ID_ras2bs.jpg,healthy,5e-06,1244.161133,151.484528,1280.0,398.569275


In [19]:
sub["Image_ID"].value_counts().describe()

count    1626.0
mean       90.0
std         0.0
min        90.0
25%        90.0
50%        90.0
75%        90.0
max        90.0
Name: count, dtype: float64

In [20]:
sub["Image_ID"].nunique()

1626

In [21]:
sub.isna().sum()

Image_ID      0
class         0
confidence    0
ymin          0
xmin          0
ymax          0
xmax          0
dtype: int64