# Predicting and checking YOLO results

These scripts can be used to detect, check and correct, if necessary, data from YOLOv8 detections. 

**Warning**

The following scripts have been created to process data from manifest IIIF following the download protocol set up in the '0_Download_processing.ipynb' notebook.
These scripts are not designed for local processing of data or data for which no URL is available.

## Environment

In [None]:
import os
import cv2
import pandas as pd
import uuid
from PIL import Image
from ultralytics import YOLO
import time
from datetime import datetime
import json
import unicodedata

from PIL import Image

from modules.class_names_functions import get_labels, get_class_name, get_class_code
from modules.transform_coordinates_functions import from_relative_coordonates_to_absolute

## Functions

### Prediction script

In [None]:
def process_images_with_yolo(yolo_model_folder, dataset_path):
    """
    Function to process all image files in a folder and its subfolders recursively
    """
    for root, dirs, files in os.walk(dataset_path):
        
        # Exclude hidden folders (i.e folders whose names start with ".")
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        
        for filename in files:
            if filename.lower().endswith(('.jpg', '.png')):
                image_path = os.path.join(root, filename)
                process_single_image_with_yolo(yolo_model_folder, dataset_path, image_path)
                print(dirs, image_path)

                
def process_single_image_with_yolo(yolo_model_folder, dataset_path, image_path): #suppr time_sleep
    """
    This function makes predictions using YOLO for the various files returned thanks to predict_on_dataset.
    """
    yolo_model_path = os.path.join(yolo_model_folder, 'weights/best.pt')
    yolo_model = YOLO(yolo_model_path)
    
    output_directory = os.path.join(
        os.path.dirname(os.path.dirname(yolo_model_folder)), 
        'predict', 
        os.path.basename(dataset_path),
        os.path.dirname(os.path.relpath(image_path, dataset_path)),
        'labels'        
    )
    print(output_directory)
    os.makedirs(output_directory, exist_ok=True)
    
    image = cv2.imread(image_path)

    # Process the image using YOLO
    results = yolo_model.predict(source=image,
                       imgsz=640,
                       # name=os.path.basename(output_directory), # os.path.basename(output_directory), # Use the project folder as the name of the project
                       save_txt=False,
                       save_conf=False,
                       # project=output_directory
                      )
    
    boxes = results[0].boxes
    
    if not boxes:
            print(f"No detections found in {os.path.basename(image_path)}")
            return
    
    # Save the labels in the /labels/ folder
    label_filename = os.path.splitext(os.path.basename(image_path))[0] + '.txt'
    label_path = os.path.join(output_directory, label_filename)
    
    with open(label_path, 'w') as label_file:
        for box in boxes:
            xywh = " ".join([f"{value:.4f}" for value in box.xywhn.cpu().squeeze().tolist()])
            label_data = f'''{box.cls.cpu().item()} {xywh} {box.conf.cpu().item()}\n'''
            label_file.write(label_data)
        
        
""" Documentation : https://github.com/ultralytics/ultralytics/issues/2143
model = YOLO('weights/best.pt')
predictions = model(image_path, save_txt=None)

with open("predicted_labels.txt", '+w') as file:
      for idx, prediction in enumerate(predictions[0].boxes.xywhn): # change final attribute to desired box format
          cls = int(predictions[0].boxes.cls[idx].item())
          # Write line to file in YOLO label format : cls x y w h
          file.write(f"{cls} {prediction[0].item()} {prediction[1].item()} {prediction[2].item()} {prediction[3].item()}\n")
"""

### Create a csv with YOLO results

In [None]:
def normalize_filename(filename):
    """
    Normalize the filename to remove special characters and ensure consistency.
    """
    return unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode('ASCII')

def yolo_to_csv(dataset_path, yolo_model_folder):
    for root, dirs, files in os.walk(dataset_path):
        dirs[:] = [d for d in dirs if not d.startswith('.')]  # Ignorer les dossiers commençant par '.'

        if dirs == []:
            labels_folder = os.path.join(
                os.path.dirname(os.path.dirname(yolo_model_folder)),
                'predict',
                os.path.basename(dataset_path),
                'labels')
        
        else:
            for dir in dirs:
                # Path to YOLO labels folder (annotations)
                labels_folder = os.path.join(
                    os.path.dirname(os.path.dirname(yolo_model_folder)),
                    'predict',
                    os.path.basename(dataset_path),
                    os.path.basename(dir),
                    'labels')
                
        
        # Search for annotation files (.txt files) in the labels folder
        annotation_files = [file for file in os.listdir(labels_folder) if file.endswith('.txt')]
        print(len(annotation_files))

        # Check for annotations
        if len(annotation_files) == 0:
            print(f'No detection on the data set {labels_folder}.')
            return

        else:
            # Create results folder if it doesn't already exist
            results_folder = os.path.join(labels_folder.replace('labels', 'results'))
            if not os.path.exists(results_folder):
                os.makedirs(results_folder)
                print(f'The results folder created at {results_folder}')
            else:
                print(f'The results folder already exists at {results_folder}')
        
        rows =[]
        
        # CSV output for each image
        for filename in files:
            if filename.lower().endswith(('.jpg', '.png')):
                image_path = os.path.join(root, filename)

            if filename.lower().endswith(('.csv')):
                csv_file = os.path.join(root, filename)

                # Read data from CSV file
                images_data = pd.read_csv(csv_file, sep=';')

                for _, row in images_data.iterrows():
                    image_path = row["imageFileName"]
                    image_width = row["imageWidthAsDownloaded"]
                    image_height = row["imageHeightAsDownloaded"]
                    image_url = row['urlImage']

                    # Check whether the image corresponds to an annotation (with standardised names to ensure consistency)
                    matching_annotations = [annotation_file for annotation_file in annotation_files if normalize_filename(os.path.basename(image_path)) == normalize_filename(os.path.basename(annotation_file)).replace('txt', 'jpg')]

                    for matching_annotation in matching_annotations:             
                        with open(os.path.join(labels_folder, matching_annotation), 'r') as f:
                            for line in f.readlines():
                                class_id, x_center, y_center, width, height, confidence = map(float, line.split())
                                x, y, abs_width, abs_height = from_relative_coordonates_to_absolute(x_center, y_center, width, height, image_width, image_height)

                                # Create a line of data for the DataFrame
                                rows.append({
                                    'Image_Path': image_path,
                                    'Image_Width': image_width,
                                    'Image_Height': image_height,
                                    'YOLO_Results_File': os.path.join(labels_folder, matching_annotation),
                                    'Class_Id': int(class_id),
                                    'Class_Name': get_class_name(int(class_id), get_labels(os.path.join(yolo_model_folder, 'labels.txt'))),
                                    'Detected_coordinates': f'{x_center} {y_center} {width} {height}',
                                    'Absolute_coordinates': f"{x} {y} {abs_width} {abs_height}",
                                    'Confidence': confidence,
                                    'Url_Detection': image_url.replace("full", f"{x},{y},{abs_width},{abs_height}", 1),
                                    'Url_Image': image_url
                                })

        # Create a Pandas DataFrame from the data and save the output CSV file
        if len(rows) == 0:
            print(f"No correspondence found.")
        else:
            df = pd.DataFrame(rows)
            df_sorted = df.sort_values('Image_Path')

            if dirs == []:
                df_sorted.to_csv(os.path.join(results_folder, os.path.basename(dataset_path) + '.csv'), index=False)
                print(f"The file {os.path.join(results_folder, os.path.basename(dataset_path) + '.csv')} has been created")
            else:
                df_sorted.to_csv(os.path.join(results_folder, os.path.basename(dir) + '.csv'), index=False)
                print(f"The file {os.path.join(results_folder, os.path.basename(dir) + '.csv')} has been created")

### Get the yolo results in .html file

In [None]:
def generate_html(dataset_path, model_folder):
    
    """
    This function generates an HTML file that displays the contents of the annotation boxes,
    so you can quickly check the results.
    
    Parameters:
    The 'image_urls' parameter is the list of images URL.
    The 'folder_name' parameter is the name of the folder in which the results will be stored.
    The 'results_path' parameter is the name of the folder in which the results will be stored.
    The 'csv_result' is the path to the csv created with 'yolo_to_csv' containing the detected image data 
    and the YOLOv8 detection results.
    """
    
    base_path = os.path.dirname(os.path.commonprefix([dataset_path, model_folder]))
    folder_name = os.path.basename(dataset_path)
    results_folder = folder_name
    project = os.path.join(base_path, 'runs/predict')
    results_path = os.path.join(project, folder_name) # do not change
    csv_result = os.path.join(results_path, 'results', folder_name + '.csv')
    
    
    
   
    
    html_content = """
    <!DOCTYPE html>
    <html>
    <head>
      <title>Affichage des détections YOLOv8</title>
      <style>
        body {
          display: flex;
          flex-wrap: wrap;
        }

        img {
          max-height: 320px;
          flex: 0 0 auto;
          margin: 10px;
        }
      </style>
    </head>
    <body>
      <h1>Affichage des détections YOLOv8</h1>
      
      <div id="image-container"></div>

      <script>
        var imageUrls = [{{image_urls}}];

        var imageContainer = document.getElementById("image-container");
        for (var i = 0; i < imageUrls.length; i++) {
          var imageUrl = imageUrls[i];

          var imgElement = document.createElement("img");
          imgElement.src = imageUrl;
          imageContainer.appendChild(imgElement);

        }
      </script>
    </body>
    </html>
    """
    
    if os.path.exists(csv_result) == False:
        print('No detection on the data set, the html file has not been created.')
        print('You do not need to continue.')

    else:
        df = pd.read_csv(csv_result)

        image_urls = []

        for url in df["Url_Detection"]:
            image_urls.append(url)

        # Convert lists into character strings for insertion into the HTML template
        image_urls_str = ", ".join(['"' + url + '"' for url in image_urls])

        # Replace the position markers in the HTML template with the actual data
        html_content = html_content.replace("{{image_urls}}", image_urls_str)

        # Writing HTML content to a file
        with open(os.path.join(results_path, 'results', folder_name + '.html'), "w") as file:
            file.write(html_content)

        print(f"Le fichier {os.path.join(results_path, 'results', folder_name + '.html')} a été généré")

### Get the data in Label Studio format

#### Convert YOLO results into Label Studio's Json format (with URL)

In [None]:
def get_image_annotations(image_url, yolo_annotations):
    
    annotations = []
    
    for _, row in yolo_annotations.iterrows():
        if row['Url_Image'] == image_url:
            class_name = row['Class_Name']
            x_center, y_center, width, height = map(float, row['Detected_coordinates'].split(' '))
            image_width = row['Image_Width']
            image_height = row['Image_Height']
            
            result = {
                "id": f"result{len(annotations) + 1}",
                "type": "rectanglelabels",
                "from_name": "label",
                "to_name": "image",
                "original_width": image_width,
                "original_height": image_height,
                "image_rotation": 0,
                "value": {
                    "rotation": 0,
                    "x": (x_center - width / 2) * 100,
                    "y": (y_center - height / 2) * 100,
                    "width": width * 100,
                    "height": height * 100,
                    "rectanglelabels": [class_name]
                }
            }
            annotations.append(result)
    
    label_studio_format = [{
        "data": {
            "image": image_url
        },
        "predictions":[{
            "model_version": "yolov8_Folio_retrain",
            "score": '',
            "result": annotations
        }]

    }]
    return label_studio_format

def get_ls_with_url(dataset_path, yolo_model_folder):
    results = []
    
    for root, dirs, files in os.walk(dataset_path):
        dirs[:] = [d for d in dirs if not d.startswith('.')]  # Ignore folders beginning with '.'

        if dirs == []:
            results_csv = os.path.join(
                os.path.dirname(os.path.dirname(yolo_model_folder)),
                'predict',
                os.path.basename(dataset_path),
                'results',
                os.path.basename(dataset_path) + '.csv')
        
        else:
            for dir in dirs:
                # Path to YOLO labels folder (annotations)
                results_csv = os.path.join(
                    os.path.dirname(os.path.dirname(yolo_model_folder)),
                    'predict',
                    os.path.basename(dataset_path),
                    os.path.basename(dir),
                    'results',
                    os.path.basename(dir) + '.csv')
    
    # Read YOLO annotations from CSV file
    yolo_annotations = pd.read_csv(results_csv)

    # Get unique image URLs
    unique_image_urls = yolo_annotations['Url_Image'].unique()
    # print(len(unique_image_urls))
    
    # Collect all Label Studio annotations
    all_label_studio_annotations = []
    
    for image_url in unique_image_urls:
        image_annotations = get_image_annotations(image_url, yolo_annotations)
    
        all_label_studio_annotations.extend(image_annotations)
    
    
    # Write the resulting annotations into a JSON file
    output_file = results_csv.replace('.csv', '_ls_with_url.json')
    
    with open(output_file, 'w') as file:
        json.dump(all_label_studio_annotations, file, indent=2)
        #print(all_label_studio_annotations)

    print(f'Label Studio annotations were written to {output_file}')

#### Convert YOLO results into Label Studio's Json format (for local files)

In [None]:
def convert_yolo_to_label_studio_local(yolo_annotations, image_path):
    results = []

        # Get the image dimensions
    with Image.open(image_path) as img:
        image_width, image_height = img.size
        # print(f'Largeur: {image_width}, Hauteur: {image_height}')
    
    # Get the bounding_boxes coordinates
    for line in yolo_annotations:
        class_id, x_center, y_center, width, height, confidence = map(float, line.split())
            # print(f'class id: {class_id}, x center: {x_center}, y center: {y_center}, width: {width}, height: {height}')


        """
        I've changed the results structure as the previous structure caused data to be deleted if there was more than one detection per task.
        Now, each detection for each image is added to the list of results. I copied the structure from https://labelstud.io/guide/export.html#Raw-JSON-format-of-completed-tasks.
        When the Json is downloaded into Label Studio,
        the various annotations can be reworked to fit the groundtruth (if the detection is wrong or the bounding box is misplaced).
        """

        # The conversion for label studio coordinates were find in the label studio converter : https://github.com/heartexlabs/label-studio-converter/blob/master/label_studio_converter/imports/yolo.py#L85

        result = {
                "id": f'{uuid.uuid1()}',
                "type": "rectanglelabels",
                "from_name": "label",
                "to_name": "image",
                "original_width": image_width,
                "original_height": image_height,
                "image_rotation": 0,
                "value":{
                    "rotation": 0,
                    "x": (x_center - width / 2) * 100,
                    "y": (y_center - height / 2) * 100,
                    "width": width * 100,
                    "height": height * 100,
                    "rectanglelabels": [f"{get_class_name(int(class_id), get_labels(os.path.join(yolo_model_folder, 'labels.txt')))}"]
                }         
        }
        results.append(result)
        # print(results)

    label_studio_format = [{
        "data": {
            "image": image_path
        },
        "predictions":[{
            "model_version": "yolov8_Folio_retrain",
            "score": '',
            "result": results
        }]
        
    }]

    #print(label_studio_format)
    return label_studio_format


def get_ls_for_local_files(dataset_path, yolo_model_folder):
    
    for root, dirs, files in os.walk(dataset_path):
        dirs[:] = [d for d in dirs if not d.startswith('.')]  # Ignore folders beginning with '.'
    
        if dirs == []:
            labels_folder = os.path.join(
                os.path.dirname(os.path.dirname(yolo_model_folder)),
                'predict',
                os.path.basename(dataset_path),
                'labels')

            images_folder = dataset_path
            

        else:
            for dir in dirs:
                # Path to YOLO labels folder (annotations)
                labels_folder = os.path.join(
                    os.path.dirname(os.path.dirname(yolo_model_folder)),
                    'predict',
                    os.path.basename(dataset_path),
                    os.path.basename(dir),
                    'labels')

                images_folder = os.path.join(dataset_path, os.path.basename(dir))
                
    #Create the results folder if not exists
    results_folder = os.path.join(labels_folder.replace('labels', 'results'))
            
    if not os.path.exists(results_folder):
        os.makedirs(results_folder)
        print(f'The results folder created at {results_folder}')
    else:
        print(f'The results folder already exists at {results_folder}')
            
    # Collect all Label Studio annotations
    all_label_studio_annotations = []
    
    # Run through all label files
    for label_file in os.listdir(labels_folder):
        label_file_path = os.path.join(labels_folder, label_file)
        # Make sure it is a text file
        if label_file_path.endswith('.txt'):
            # Check each image extension
            for image_ext in ['.jpg', '.png']:
                image_file = label_file.replace('.txt', image_ext)
                image_file_path = os.path.join(images_folder, image_file)

                # Check if the image file exists for this extension
                if os.path.exists(image_file_path):
                    # Read the YOLO annotations
                    with open(label_file_path, 'r') as file:
                        yolo_annotations = file.readlines()

                    # Convert the YOLO annotations into the Label Studio JSON format.
                    label_studio_annotations = convert_yolo_to_label_studio_local(yolo_annotations, image_file_path)
                    all_label_studio_annotations.extend(label_studio_annotations)
                    break  # Exit the loop since we found a valid image file

            else:
                print(f"No corresponding image found for {label_file}")
                
    # Update the path to the image data in the JSON data
    new_path = '/data/local-files/?d=/' + images_folder ## BE AWARE of the prefix: /data/local-files/?d=/
    for annotation in all_label_studio_annotations:
        annotation['data']['image'] = annotation['data']['image'].replace(images_folder, new_path)

    # Write the resulting annotations into a JSON file
    if dirs == []:
        output_file = os.path.join(os.path.dirname(os.path.dirname(yolo_model_folder)),'predict',
                    os.path.basename(dataset_path),'results',os.path.basename(dataset_path) + '_ls_locale_files.json')
        with open(output_file, 'w') as file:
            json.dump(all_label_studio_annotations, file, indent=2)

        print(f'Label Studio annotations were written in {output_file}.')

    else:
        output_file = os.path.join(os.path.dirname(os.path.dirname(yolo_model_folder)), 'predict',
                        os.path.basename(dataset_path), os.path.basename(dir), 'results', os.path.basename(dir) + '_ls_locale_files.json')
        with open(output_file, 'w') as file:
            json.dump(all_label_studio_annotations, file, indent=2)

        print(f'Label Studio annotations were written in {output_file}.')

#### Get the Label Studio Json file

## Process

### Define corpus to be processed

In [None]:
dataset_path = 'ABSPATHTOTHEFOLDER' # to be changed, asbolute path to a folder with images only, without annotations.
yolo_model_folder = 'ABSPATHTOTHEMODELFOLDER' # to be changed, asbolute path to the folder with the training data

In [None]:
time_sleep = 0 # to be changed as needed, by default use 10 (in seconds)

### Launch Yolo

In [None]:
%%time

process_images_with_yolo(yolo_model_folder, dataset_path)

### Generate the files for IIIF corpora (csv, html, Json files)

#### Generate overview of results (CSV file)

In [None]:
yolo_to_csv(dataset_path, yolo_model_folder)

#### Generate overview of results (html file)

In [None]:
generate_html(dataset_path, yolo_model_folder)

#### Generate Label Studio file (for checking and correcting)

In [None]:
get_ls_with_url(dataset_path, yolo_model_folder)

### Generate the files for local files (only Json file)

In [None]:
#get_ls_for_local_files(dataset_path, yolo_model_folder)