# Predicting and checking YOLO results

These scripts can be used to detect, check and correct, if necessary, data from YOLOv8 detections. 

**Warning**

The following scripts have been created to process data from manifest IIIF following the download protocol set up in the '0_Download_processing.ipynb' notebook.
These scripts are not designed for local processing of data or data for which no URL is available.

**Notice concerning use** 
Any use, even partial, of the content of this notebook must be accompanied by an appropriate citation.

&copy; 2023 Marion Charpier

## Environment

In [None]:
import os
import uuid
import time
from datetime import datetime
import json
import unicodedata
import random
from glob import glob

import cv2
import pandas as pd
from PIL import Image
import torch
from ultralytics import YOLO

import sys
sys.path.append(os.path.join('..', 'modules'))

from class_names_functions import get_labels, get_class_name, get_class_code
from transform_coordinates_functions import from_relative_coordinates_to_absolute
from device_function import which_device

## Functions

### Prediction script

#### Process all images

In [2]:
def process_images_with_yolo(yolo_model_folder, img_dataset_folder):
    """
    This function processes all image files in the specified dataset folder and its subfolders recursively using a YOLO model. 
    It searches for images with `.jpg` or `.png` extensions and applies object detection on each image using the provided 
    YOLO model weights. The results are saved in a structured format for further analysis.

    Documentation: [YOLO Ultralytics GitHub](https://github.com/ultralytics/ultralytics/issues/2143)

    :param yolo_model_folder: 
        - Type: str
        - Description: The path to the folder containing the YOLO model weights. The function expects a 
                       `weights/best.pt` file within this folder for performing predictions.

    :param img_dataset_folder: 
        - Type: str
        - Description: The path to the folder containing the dataset of images. The function will search through 
                       all subdirectories of this folder to identify and process image files.

    :return: 
        - Type: None
        - Description: This function does not return a value. It processes each image using the YOLO model 
                       and saves the detected labels in a separate file in a `labels` folder.

    This function automates the batch processing of large datasets, ensuring that all images in the specified folder 
    are analyzed with the YOLO model and the results are stored systematically.

    &copy; Dominique Stutzmann - IRHT CNRS
    """

    for root, dirs, files in os.walk(img_dataset_folder):
        
        # Exclude hidden folders (i.e folders whose names start with ".")
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        
        for filename in files:
            if filename.lower().endswith(('.jpg', '.png')):
                image_path = os.path.join(root, filename)
                process_single_image_with_yolo(yolo_model_folder, img_dataset_folder, image_path)
                print(dirs, image_path)

#### Process on single image

In [3]:
def process_single_image_with_yolo(yolo_model_folder, img_dataset_folder, image_path):
    """
    This function makes predictions on a single image using a YOLO model and saves the results as a label file 
    in YOLO format. The function uses the YOLO model specified in `yolo_model_folder` and processes the image 
    located at `image_path`. The detected bounding boxes, class labels, and confidence scores are saved 
    in a corresponding `labels` folder.

    :param yolo_model_folder: 
        - Type: str
        - Description: The path to the folder containing the YOLO model weights (`best.pt`). 
                       The function loads the YOLO model from this path to make predictions.

    :param img_dataset_folder: 
        - Type: str
        - Description: The path to the dataset folder containing the images. This is used to determine 
                       the relative paths for organizing prediction results.

    :param image_path: 
        - Type: str
        - Description: The absolute path to the image file to be processed. The function reads this image, 
                       applies YOLO predictions, and saves the results in a `.txt` file.

    :return: 
        - Type: None
        - Description: This function does not return a value. It saves the detected bounding boxes and 
                       class labels in a `.txt` file in the corresponding `labels` folder.

    This function allows for efficient processing of single images, making it a core component for batch 
    processing large datasets using YOLO.
    """

    # Check if the GPU is available - if not, use the CPU
    device = which_device()
    
    # Load a model
    yolo_model = YOLO(os.path.join(yolo_model_folder, 'weights/best.pt'))
    
    output_directory = os.path.join(
        os.path.dirname(os.path.dirname(yolo_model_folder)), 
        'predict', 
        os.path.basename(img_dataset_folder.split('/')[-3]) + '_' + os.path.basename(yolo_model_folder),
        os.path.dirname(os.path.relpath(image_path, img_dataset_folder)),
        'labels'        
    )
    print(output_directory)
    os.makedirs(output_directory, exist_ok=True)
    
    image = cv2.imread(image_path)

    # Process the image using YOLO
    results = yolo_model.predict(source=image,
                                 agnostic_nms=True,
                                 imgsz=640,
                                 # name=os.path.basename(output_directory), # os.path.basename(output_directory), # Use the project folder as the name of the project
                                 save_txt=False,
                                 save_conf=False,
                                 # project=output_directory
                                )
    
    boxes = results[0].boxes
    
    if not boxes:
            print(f"No detections found in {os.path.basename(image_path)}")
            return
    
    # Save the labels in the /labels/ folder
    label_filename = os.path.splitext(os.path.basename(image_path))[0] + '.txt'
    label_path = os.path.join(output_directory, label_filename)
    
    with open(label_path, 'w') as label_file:
        for box in boxes:
            xywh = " ".join([f"{value:.4f}" for value in box.xywhn.cpu().squeeze().tolist()])
            label_data = f'''{box.cls.cpu().item()} {xywh} {box.conf.cpu().item()}\n'''
            label_file.write(label_data)

### Create a csv with image data

In [4]:
def get_image_data(img_dataset_folder):
    """
    This function creates a CSV file containing metadata for each image in a specified folder. The generated file includes 
    details such as image name, format, dimensions, and file paths, making it useful for the annotation process, 
    dataset analysis, or validating training and inference results.

    :param img_dataset_folder: 
        - Type: str
        - Description: The absolute path to the folder containing the images to be processed. This folder should contain 
                       image files with the `.jpg` extension.

    :return: 
        - Type: None
        - Description: This function does not return a value. It generates a CSV file in the `img_dataset_folder` with 
                       the extracted metadata for each image.

    This function helps create a structured overview of the dataset, providing essential metadata for further 
    analysis or processing.
    """

    data = []

    images = [img for img in os.listdir(img_dataset_folder) if img.endswith(('jpg', 'png', 'tiff')) ]
    
    for file in images:
        img_name = '.'.join(file.split('.')[:-1])
        folder = img_dataset_folder
        with Image.open(os.path.join(img_dataset_folder, file)) as img:
            absolute_path = img.filename
            format = img.format
            width, height  = img.size
            img_size = width*height

        img_data = {
              'Image_name' : img_name,
              'Folder' : folder,
              'Absolute_path' : absolute_path,
              'Format' : format,
              'Width' : width,
              'Height': height,
              'Image_size': int(width)*int(height)
        }

        data.append(img_data)
        
    # Create a DataFrame from the image data list
    df = pd.DataFrame(data)
    
    # Save DataFrame to a CSV file
    csv_filename = os.path.join(img_dataset_folder, os.path.basename(img_dataset_folder.split('/')[-3]) + '.csv')
    df.to_csv(csv_filename, sep=';', index=False)

### Create a csv with YOLO results

#### Normalize finalename

In [5]:
def normalize_filename(filename):
    """
    Normalize the filename to remove special characters and ensure consistency.
    This function converts the filename to ASCII, removing accents and other special characters, 
    making it easier to match filenames across different platforms.
    
    :param filename: 
        - Type: str
        - Description: The filename to be normalized.

    :return: 
        - Type: str
        - Description: The normalized filename, with special characters removed.
    """
    return unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode('ASCII')

#### Store the YOLO prediction in a CSV file

In [6]:
def yolo_to_csv(img_dataset_folder, yolo_model_folder):
    """
    This function processes YOLO predictions stored in `.txt` files and generates a CSV file with metadata 
    and annotation results for each image in the specified dataset folder. The CSV file includes details 
    such as detected classes, bounding box coordinates (relative and absolute), and confidence scores.

    :param img_dataset_folder: 
        - Type: str
        - Description: The absolute path to the folder containing the images used for training or inference.

    :param yolo_model_folder: 
        - Type: str
        - Description: The path to the folder containing the YOLO model and corresponding labels. 
                       This folder should have a `weights` and `labels` subdirectory for processing predictions.

    :return: 
        - Type: None
        - Description: This function does not return a value. It generates a CSV file with the results for 
                       each image and stores it in a `results` folder within the dataset directory.

    This function helps consolidate YOLO predictions and image metadata into a single CSV file for easy analysis 
    and visualization.
    """
    for root, dirs, files in os.walk(img_dataset_folder):
        # Ignore hidden folders
        dirs[:] = [d for d in dirs if not d.startswith('.')]

        # Determine the labels folder based on the current directory
        labels_folder = os.path.join(
            os.path.dirname(os.path.dirname(yolo_model_folder)),
            'predict',
            os.path.basename(os.path.dirname(os.path.dirname(img_dataset_folder))) + '_' + os.path.basename(yolo_model_folder),
            'labels')

        # Check if labels folder exists
        if not os.path.exists(labels_folder):
            print(f"Labels folder {labels_folder} does not exist.")
            continue
        
        # Search for annotation files (.txt files)
        annotation_files = [file for file in os.listdir(labels_folder) if file.endswith('.txt')]
        
        if len(annotation_files) == 0:
            print(f'No annotations found in {labels_folder}.')
            # continue

        # Create results folder if it doesn't already exist
        results_folder = labels_folder.replace('labels', 'results')
        if not os.path.exists(results_folder):
            os.makedirs(results_folder)
            print(f'Created results folder at {results_folder}')
        
        rows = []
        
        # Process each image file
        for filename in files:
            if filename.lower().endswith(('.jpg', '.png', 'tiff')):
                image_path = os.path.join(root, filename)  # Utilisez `files` pour construire le chemin complet
                with Image.open(image_path) as img:
                    image_width, image_height = img.size
        
                # Trouver les annotations correspondantes
                matching_annotations = [
                    ann_file for ann_file in annotation_files
                    if normalize_filename(os.path.splitext(os.path.basename(image_path))[0]) == normalize_filename(os.path.splitext(os.path.basename(ann_file))[0])
                ]
                
                # print(f"Image: {filename}")
                # print(f"Matching Annotations: {matching_annotations}")
                
                # If no matching annotation, continue
                if not matching_annotations:
                    print(f"No annotation found for image {image_path}.")
                    rows.append({
                                'Image_Path': image_path,
                                'Image_Width': image_width,
                                'Image_Height': image_height,
                                'YOLO_Results_File': '',
                                'Class_Id': '',
                                'Class_Name': '',
                                'Detected_coordinates': '',
                                'Absolute_coordinates': '',
                                'Confidence': '',
                            })
                    # continue

                # Process matching annotations
                for matching_annotation in matching_annotations:
                    annotation_path = os.path.join(labels_folder, matching_annotation)
                    with open(annotation_path, 'r') as f:
                        for line in f.readlines():
                            class_id, x_center, y_center, width, height, confidence = map(float, line.split())
                            # Convert relative YOLO coordinates to absolute
                            x, y, abs_width, abs_height = from_relative_coordinates_to_absolute(
                                x_center, y_center, width, height, image_width, image_height)

                            # Add row of data for the DataFrame
                            rows.append({
                                'Image_Path': image_path,
                                'Image_Width': image_width,
                                'Image_Height': image_height,
                                'YOLO_Results_File': annotation_path,
                                'Class_Id': int(class_id),
                                'Class_Name': get_class_name(int(class_id), get_labels(os.path.join(yolo_model_folder, 'labels.txt'))),
                                'Detected_coordinates': f'{x_center} {y_center} {width} {height}',
                                'Absolute_coordinates': f"{x} {y} {abs_width} {abs_height}",
                                'Confidence': confidence,
                            })
                    print(f"Processed annotation for {image_path}")

        # Generate and save the CSV with results
        if rows:
            df = pd.DataFrame(rows)
            df_sorted = df.sort_values('Image_Path')

            csv_filename = os.path.basename(img_dataset_folder.split('/')[-3]) + '.csv'
            df_sorted.to_csv(os.path.join(results_folder, csv_filename), sep=';', index=False)
            print(f'The file {os.path.join(results_folder, csv_filename)} has been created.')
        else:
            print("No correspondence found between images and annotations.")

### Get the data in Label Studio format

#### Convert YOLO results into Label Studio's Json format (for local files)

In [7]:
def convert_yolo_annotations_to_label_studio_format(yolo_annotations, image_path, yolo_model_folder):
    """
    This function converts YOLO annotation data into Label Studio's JSON format. The converted annotations can 
    then be imported into Label Studio for visualization, review, and further editing. The function uses the 
    YOLO annotation values (class ID, bounding box coordinates, and confidence score) to generate a compatible 
    JSON structure for Label Studio.

    Documentation: [Label Studio Converter](https://github.com/heartexlabs/label-studio-converter/blob/master/label_studio_converter/imports/yolo.py#L85)

    :param yolo_annotations: 
        - Type: list of str
        - Description: A list of annotation strings in YOLO format. Each string contains class ID, bounding box coordinates 
                       (x_center, y_center, width, height), and confidence score, separated by spaces.

    :param image_path: 
        - Type: str
        - Description: The absolute path to the image file corresponding to the annotations. The image dimensions are 
                       used to convert YOLO relative coordinates into absolute coordinates for Label Studio.

    :param yolo_model_folder: 
        - Type: str
        - Description: The path to the folder containing the YOLO model. This folder is used to retrieve class names 
                       from the `labels.txt` file and set the model version in the JSON output.

    :return: 
        - Type: list
        - Description: Returns a list containing the formatted JSON data compatible with Label Studio. The JSON includes 
                       image metadata, bounding box annotations, and additional properties required for visualization.

    This function helps streamline the process of converting YOLO annotations into Label Studio format, making it easier 
    to visualize and refine the predictions in an interactive environment.
    """

    results = []

        # Get the image dimensions
    with Image.open(image_path) as img:
        image_width, image_height = img.size
        # print(f'Largeur: {image_width}, Hauteur: {image_height}')
    
    # Get the bounding_boxes coordinates
    for line in yolo_annotations:
        class_id, x_center, y_center, width, height, confidence = map(float, line.split())
            # print(f'class id: {class_id}, x center: {x_center}, y center: {y_center}, width: {width}, height: {height}')

        result = {
                "id": f'{uuid.uuid1()}',
                "type": "rectanglelabels",
                "from_name": "label",
                "to_name": "image",
                "original_width": image_width,
                "original_height": image_height,
                "image_rotation": 0,
                "value":{
                    "rotation": 0,
                    "x": (x_center - width / 2) * 100,
                    "y": (y_center - height / 2) * 100,
                    "width": width * 100,
                    "height": height * 100,
                    "rectanglelabels": [f"{get_class_name(int(class_id), get_labels(os.path.join(yolo_model_folder, 'labels.txt')))}"]
                },
            "score": confidence
        }
        results.append(result)
        # print(results)

    label_studio_format = [{
        "data": {
            "image": image_path
        },
        "predictions":[{
            "model_version": os.path.basename(yolo_model_folder),
            "score": '',
            "result": results
        }]
        
    }]

    #print(label_studio_format)
    return label_studio_format

#### Convert unannotated image into Label Studio format

In [8]:
def convert_unannotated_to_label_studio_format(image_path: str, yolo_model_folder: str) -> list:
    """
    Build a Label Studio–compatible JSON entry for an image that has no YOLO annotations.

    :param image_path:
        - Type: str
        - Description: Path or URL to the image to import into Label Studio.
    :param yolo_model_folder:
        - Type: str
        - Description: Path to the YOLO model folder; its basename is used as the `model_version` field.
    :return:
        - Type: list of dict
        - Description: A one‐element list containing the Label Studio task JSON with an empty
                       `result` array so that the image appears unannotated in the UI.
    """
    # Read image size (not strictly required when result is empty, but retained for completeness)
    with Image.open(image_path) as img:
        width, height = img.size

    entry = {
        "data": {
            "image": image_path
        },
        "predictions": [
            {
                "model_version": os.path.basename(yolo_model_folder),
                "score": "",
                "result": []  # no annotations yet
            }
        ]
    }

    return [entry]


#### Convert YOLO annotations for local files into a Label Studio-compatible JSON format

In [None]:
def get_ls_for_local_files(img_dataset_folder: str, yolo_model_folder: str) -> str:
    """
    Batch-convert all images in img_dataset_folder into a single Label Studio JSON import file,
    handling both images with YOLO annotations (.txt files) and unannotated images.

    :param img_dataset_folder:
        - Type: str
        - Description: Absolute path to the root folder containing the dataset images.  
                       The function will scan this directory for image files (e.g., .jpg, .png, .tiff).

    :param yolo_model_folder:
        - Type: str
        - Description: Absolute path to the YOLO model folder, which contains the prediction outputs
                       (labels subfolder) and `labels.txt`. Used to locate and interpret YOLO annotation files
                       and set the `model_version` in the Label Studio payload.

    :return:
        - Type: str
        - Description: Path to the generated JSON file aggregating all Label Studio task entries.
                       This file can be directly imported via `label-studio import tasks --format json`.

    This function walks through each image in the dataset folder. For each image, it checks for a corresponding
    YOLO `.txt` file in the model’s `labels` directory:
      - If the `.txt` exists, it reads the YOLO annotations and converts them into
        Label Studio’s rectangle-label format.
      - If no annotation file is found, it generates an “empty” task entry so that the image appears
        unannotated in Label Studio.

    All entries are then aggregated, their file paths rewritten for Label Studio’s
    `local-files` serving, and written to a single JSON file in the model’s `results` folder.
    """

    # Path construction
    base_predict = os.path.join(
        os.path.dirname(os.path.dirname(yolo_model_folder)),
        'predict',
        f"{os.path.basename(os.path.dirname(os.path.dirname(img_dataset_folder)))}_{os.path.basename(yolo_model_folder)}"
    )
    labels_folder  = os.path.join(base_predict, 'labels')
    images_folder  = img_dataset_folder
    results_folder = os.path.join(base_predict, 'results')
    os.makedirs(results_folder, exist_ok=True)

    all_ls = []

    # Browse all the images in the folder
    for ext in ('.jpg', '.png', '.tiff'):
        pattern = os.path.join(images_folder, f"*{ext}")
        for image_path in glob(pattern):
            basename = os.path.splitext(os.path.basename(image_path))[0]
            label_txt = os.path.join(labels_folder, basename + '.txt')

            if os.path.exists(label_txt):
                # 1) Annotated image → Label Studio conversion
                with open(label_txt, 'r') as f:
                    lines = f.read().splitlines()
                entries = convert_yolo_annotations_to_label_studio_format(
                    lines, image_path, yolo_model_folder
                )
            else:
                # 2) Unannotated image → empty entry
                entries = convert_unannotated_to_label_studio_format(
                    image_path, yolo_model_folder
                )
                
            all_ls.extend(entries)




    """            from pathlib import Path

                # racine des images (doit être le même que LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT)
                root = Path("/Users/MC/Documents/image_inputs")

                # pour chaque image_path trouvé :
                rel_path = Path(image_path).relative_to(root)    # e.g. "eval_images/f11v.jpg"
                ls_url   = f"/data/local-files/?d={rel_path.as_posix()}"""


    # Rewriting paths for Label Studio
    new_prefix = '/data/local-files/?d=' + images_folder
    for ann in all_ls:
        print(ann)
        
        ann['data']['image'] = ann['data']['image'].replace(images_folder, new_prefix)

    # Writing the JSON file
    output_file = os.path.join(
        results_folder,
        os.path.basename(images_folder) + '_ls_local_files.json'
    )
    with open(output_file, 'w') as f:
        json.dump(all_ls, f, indent=2)

    if os.path.exists(output_file):
        print(f"Label Studio annotations written to {output_file}")
    else:
        print("Your file hasn't be created")

### Get labeling code

#### Generate random colours in rgb style

In [10]:
def generate_random_colours():
    """
    This function generates a random color in hexadecimal RGB format. The color is created by selecting 
    random values for the red, green, and blue channels, and then formatting these values into a hex string.

    :return: 
        - Type: str
        - Description: A string representing the random color in hexadecimal format (e.g., `#a1b2c3`).
    """
    r = random.randint(2, 255)
    g = random.randint(2, 255)
    b = random.randint(2, 255)

    hex_colour = '#{:02x}{:02x}{:02x}'.format(r, g, b)
    
    return hex_colour

#### Get the labels used for the dectection session

In [11]:
def get_labeling_code(img_dataset_folder, yolo_model_folder):
    """
    This function generates a text file containing the labeling code template for a new Label Studio project. 
    The template uses the labels generated by a YOLO model and assigns random colors to each label for visualization 
    in Label Studio. This code can be copied directly into a new Label Studio project configuration.

    !!! Note: The generated file cannot be loaded directly into Label Studio. The code needs to be copied manually 
    into a new project configuration in Label Studio.

    :param img_dataset_folder: 
        - Type: str
        - Description: The absolute path to the folder containing the image dataset. This path is used to locate 
                       the corresponding `results` folder for saving the generated labeling code.

    :param yolo_model_folder: 
        - Type: str
        - Description: The path to the folder containing the YOLO model and `labels.txt` file. This file is used 
                       to retrieve the class labels.

    :return: 
        - Type: None
        - Description: This function does not return a value. It generates a text file with the Label Studio 
                       labeling code and saves it in the `results` folder within the prediction directory.

    This function simplifies the process of creating a labeling configuration for Label Studio, making it easier 
    to set up a new project with existing class labels and customized colors.
    """

    for root, dirs, files in os.walk(img_dataset_folder):
        dirs[:] = [d for d in dirs if not d.startswith('.')]  # Ignore folders beginning with '.'
    
        if dirs == []:
            results_folder = os.path.join(
                os.path.dirname(os.path.dirname(yolo_model_folder)),
                'predict',
                os.path.basename(img_dataset_folder.split('/')[-3]) + '_' + os.path.basename(yolo_model_folder),
                'results')
        else:
            for dir in dirs:
                results_folder = os.path.join(
                    os.path.dirname(os.path.dirname(yolo_model_folder)),
                    'predict',
                    os.path.basename(img_dataset_folder.split('/')[-3]) + '_' + os.path.basename(yolo_model_folder),
                    os.path.basename(dir),
                    'results')



    labels = get_labels(os.path.join(yolo_model_folder, 'labels.txt'))
    label_names = labels.values()
    
    # Add the generated colour to your model for each label usiung the Label Studio template for bounding boxes
    
    labeling_template = """
    <View>
        <View style="display:flex;align-items:start;gap:8px;flex-direction:row">
            <Image name="image" value="$image" zoom="true" zoomControl="true" rotateControl="false"/>
            <RectangleLabels name="label" toName="image" showInline="false">
                
    {label_backgrounds}
    
            </RectangleLabels>
        </View>
    </View>
    """
    
    # Generate the part of the model for each label with a random colour
    label_backgrounds = ""
    for label in label_names:
        random_colour = generate_random_colours()
        label_backgrounds += '          <Label value="{label}" background="{colour}"/>\n'.format(label=label, colour=random_colour)
    
    # Intégrez la partie du modèle générée pour chaque étiquette
    labeling_template = labeling_template.format(label_backgrounds=label_backgrounds)
    
    with open(os.path.join(results_folder, 'labeling_code.txt'), 'w') as file:
        file.write(labeling_template)
    
    # Utilisez le modèle avec les couleurs générées
    print(f"The labeling template is saved in {os.path.join(results_folder, 'labeling_code.txt')}")

## Processing

### Define corpus to be processed

In [12]:
img_dataset_folder = 'ABSPATHTOTHEFOLDER' # to be changed, asbolute path to a folder with images only, without annotations.
yolo_model_folder = 'ABSPATHTOTHEMODELFOLDER' # to be changed, asbolute path to the folder with the training data

In [14]:
time_sleep = 1 # to be changed as needed (in seconds)

### Launch Yolo

In [None]:
%%prun
process_images_with_yolo(yolo_model_folder, img_dataset_folder)

### Generate the files for local files (csv and Json files)

#### Generate overview of results (CSV file)

In [16]:
yolo_to_csv(img_dataset_folder, yolo_model_folder)

#### Generate the files for local files (only Json file)

In [None]:
get_ls_for_local_files(img_dataset_folder, yolo_model_folder)

### Generate the labeling code file

In [None]:
get_labeling_code(img_dataset_folder, yolo_model_folder)