In [1]:
# Import libraries
import pandas as pd
import os
from pathlib import Path
from tqdm import tqdm
import yaml
import matplotlib.pyplot as plt
from ultralytics.engine.results import Results
from ultralytics import YOLO
import numpy as np
from PIL import Image, ExifTags
import torch

In [2]:
# INPUT_DIRS
INPUT_DATA_DIR = Path('dataset')
## Drop the Folder if it already exists
DATASETS_DIR = Path('dataset')
# Image & labels directory
TRAIN_IMAGES_DIR = DATASETS_DIR / 'images' / 'train'
TRAIN_LABELS_DIR = DATASETS_DIR / 'labels'/ 'train'
TEST_IMAGES_DIR = DATASETS_DIR / 'images' / 'test'
VAL_IMAGES_DIR = DATASETS_DIR / 'images' /'val'
VAL_LABELS_DIR = DATASETS_DIR / 'labels' /'val'

# Load train and test files
train = pd.read_csv(INPUT_DATA_DIR / 'Train_df.csv')
val = pd.read_csv(INPUT_DATA_DIR / 'Val_df.csv')
test = pd.read_csv(INPUT_DATA_DIR / 'Test.csv')
ss = pd.read_csv(INPUT_DATA_DIR / 'SampleSubmission.csv')

class_map = {cls: i for i, cls in enumerate(sorted(train['class'].unique().tolist()))}
# Strip any spacing from the class item and make sure that it is a str
train['class'] = train['class'].str.strip()

# Map {'healthy': 2, 'cssvd': 1, anthracnose: 0}
train['class_id'] = train['class'].map(class_map)

train_df = train
val_df = val

# Create a data.yaml file required by yolo
class_names = sorted(train['class'].unique().tolist())
num_classes = len(class_names)
data_yaml = {
    "path" : str(DATASETS_DIR.absolute()),
    'train': str(TRAIN_IMAGES_DIR.absolute()),
    'val': str(VAL_IMAGES_DIR.absolute()),
    'test': str(TEST_IMAGES_DIR.absolute()),
    'nc': num_classes,
    'names': class_names
}

val_image_names = [str(Path(name).stem) for name in val_df['Image_ID'].unique()]
train_image_names = [str(Path(name).stem) for name in train['ImagePath'].unique()]

In [3]:
from glob import glob

# Validate the model on the validation set
BEST_PATH = sorted(glob("runs/detect/train*/weights/best.pt"))[-1]
# BEST_PATH = "runs/detect/train3/weights/best.pt"
BEST_PATH

'runs/detect/train2/weights/best.pt'

In [4]:
for flag, v in ExifTags.TAGS.items():
    if v == "Orientation":
        break


def load_image(filepath):
    image = Image.open(filepath)
    return image

    exif = image._getexif()
    if exif is None:
        return image

    orientation_value = exif.get(flag, None)

    if orientation_value == 3:
        image = image.rotate(180, expand=True)
    elif orientation_value == 6:
        image = image.rotate(270, expand=True)
    elif orientation_value == 8:
        image = image.rotate(90, expand=True)
    return image


flag

274

In [5]:
from ultralytics.engine.predictor import BasePredictor

BEST_CONFIG = sorted(glob("runs/detect/train*/args.yaml"))[-1]
predictor = BasePredictor(BEST_CONFIG)

In [None]:
# Load the trained YOLO model
model = YOLO(BEST_PATH)

# Path to the test images directory
test_dir_path = TEST_IMAGES_DIR

# Get a list of all image files in the test directory
image_files = os.listdir(test_dir_path)

# Initialize an empty list to store the results for all images
all_data = []

# Initialize an empty list to store the results for all images
all_data = []

# Iterate through each image in the directory
for image_file in tqdm(image_files):
    # Full path to the image
    img_path = os.path.join(test_dir_path, image_file)

    # Make predictions on the image
    results: list[Results] = model.predict(
        load_image(img_path),
        conf=0.0,
        imgsz=1024,
        # max_det=100,
        verbose=False,
        nms=True,
        iou=0.7,
        # nbs=64,
        device="cuda:0",
    )

    # Extract bounding boxes, confidence scores, and class labels
    boxes = (
        results[0].boxes.xyxy.tolist() if results[0].boxes else []
    )  # Bounding boxes in xyxy format
    classes = results[0].boxes.cls.tolist() if results[0].boxes else []  # Class indices
    confidences = (
        results[0].boxes.conf.tolist() if results[0].boxes else []
    )  # Confidence scores
    names = results[0].names  # Class names dictionary

    if boxes:  # If detections are found
        for box, cls, conf in zip(boxes, classes, confidences):
            x1, y1, x2, y2 = box
            detected_class = names[
                int(cls)
            ]  # Get the class name from the names dictionary

            # Add the result to the all_data list
            all_data.append(
                {
                    "Image_ID": str(image_file),
                    "class": detected_class,
                    "confidence": conf,
                    "ymin": y1,
                    "xmin": x1,
                    "ymax": y2,
                    "xmax": x2,
                }
            )
    else:  # If no objects are detected
        all_data.append(
            {
                "Image_ID": str(image_file),
                "class": None,
                "confidence": None,
                "ymin": None,
                "xmin": None,
                "ymax": None,
                "xmax": None,
            }
        )

100%|██████████| 1626/1626 [03:15<00:00,  8.30it/s]


In [7]:
# Convert the list to a DataFrame for all images
sub = pd.DataFrame(all_data)

In [8]:
sub.head()

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax
0,ID_cWEAQI.jpeg,healthy,0.544732,0.0,0.0,3671.547607,1677.223511
1,ID_cWEAQI.jpeg,healthy,0.011575,319.800598,1257.447021,1133.294067,1794.507324
2,ID_cWEAQI.jpeg,cssvd,0.010274,2121.080322,25.210379,3996.958008,1582.924072
3,ID_cWEAQI.jpeg,healthy,0.008656,1568.007812,16.477583,3980.346924,1614.395996
4,ID_cWEAQI.jpeg,anthracnose,0.007832,2376.124756,12.456416,3995.283447,1585.965576


In [9]:
sub.describe()

Unnamed: 0,confidence,ymin,xmin,ymax,xmax
count,487800.0,487800.0,487800.0,487800.0,487800.0
mean,0.004946,774.20017,696.899026,1399.405646,1216.761743
std,0.041294,955.91946,838.080582,1164.923586,989.669973
min,9e-06,0.0,0.0,2.183855,6.420255
25%,0.000202,3.771681,4.735268,416.0,415.793793
50%,0.000432,390.323761,386.498703,1079.765869,951.745758
75%,0.00111,1156.936432,1024.98703,2048.0,1798.047943
max,0.89479,4034.561768,4047.558105,4128.0,4128.0


In [10]:
sub['class'].value_counts()

class
cssvd          179064
healthy        158489
anthracnose    150247
Name: count, dtype: int64

In [11]:
sub.isna().sum()

Image_ID      0
class         0
confidence    0
ymin          0
xmin          0
ymax          0
xmax          0
dtype: int64

class
healthy        1153
cssvd           801
anthracnose     694
None             57
Name: count, dtype: int6

In [12]:
sub.to_csv("dataset/predictions/03-predictions.csv", index=False)

In [13]:
sub["confidence"].describe()

count    487800.000000
mean          0.004946
std           0.041294
min           0.000009
25%           0.000202
50%           0.000432
75%           0.001110
max           0.894790
Name: confidence, dtype: float64

In [14]:
import pandas as pd

sub = pd.read_csv('dataset/predictions/03-predictions.csv')

sub.sample(6)

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax
366005,ID_ZNueIB.jpg,cssvd,0.018313,0.0,18.79158,369.739136,728.420044
293914,ID_VEfmr3.jpeg,healthy,0.000792,1680.510132,3.630003,2486.222412,700.18988
421633,ID_d7p17d.jpeg,anthracnose,0.000529,1377.809082,2760.769531,2958.419678,3024.0
59961,ID_yAA9sh.jpeg,cssvd,0.000365,173.71788,0.0,1310.096069,270.039886
455206,ID_kEdXYw.jpg,anthracnose,0.000787,565.797607,0.0,2024.513428,395.216431
205422,ID_XUxHSs.jpg,cssvd,4e-05,4.428606,324.185638,150.299423,815.499756


In [15]:
sub["Image_ID"].value_counts().describe()

count    1626.0
mean      300.0
std         0.0
min       300.0
25%       300.0
50%       300.0
75%       300.0
max       300.0
Name: count, dtype: float64

In [16]:
sub["Image_ID"].nunique()

1626

In [17]:
sub.isna().sum()

Image_ID      0
class         0
confidence    0
ymin          0
xmin          0
ymax          0
xmax          0
dtype: int64