In [1]:
#!pip3 install opencv-python
import os
import cv2

In [2]:
dataset = "RoboFlow-2"

In [3]:
# get the current working directory
script_dir = os.getcwd()
# get the parent directory
src_dir = os.path.dirname(script_dir)
prj_dir = os.path.dirname(src_dir)
datasets_dir = os.path.join(prj_dir, "datasets", "extracted")

data_dir = os.path.join(datasets_dir, dataset)

In [4]:
# Function to count instances
def conta_istanze_YOLO11(file_path, frequencies, set):
    istanze = {}  # Dictionary to store the counts of instances

    if set not in frequencies:
        frequencies[set] = {}
    
    # Reading the content of the annotation file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # if no lines, "bg": 1
    if not lines:
        if 0 not in frequencies[set]:
            frequencies[set][0] = 1
        else:
            frequencies[set][0] += 1
        return {-1: 1}
    
    nlines = 0

    # Iterating through each line in the file
    for line in lines:
        line = line.strip()  # Removing leading and trailing whitespaces
        # Skipping lines starting with '#' or empty lines
        if line.startswith('#') or line == '':
            continue
        
        # Extracting the class label from the line
        classe, *_ = line.split(' ')
        
        try:
            classe = int(classe)  # Converting the class label to an integer
        except ValueError:
            # Handling the case where the class label cannot be converted to an integer
            print(f"Warning: The first value cannot be converted to an integer in the line: {line}")
            continue

        nlines += 1  # Counting the number of valid lines

        istanza = classe  # Considering each class label as an 'instance'

        # Updating the count of instances for each class label
        if istanza in istanze:
            istanze[istanza] += 1
        else:
            istanze[istanza] = 1

    if nlines not in frequencies[set]:
        frequencies[set][nlines] = 1
    else:
        frequencies[set][nlines] += 1

    return istanze  # Returning the dictionary containing the counts of instances for each class label

# Function to analyze all annotation files in a directory
def analizza_cartella_YOLO11(directory):
    # stores the number of instances for each class and for each set (train, valid, test)
    # risultati_totali[set][class] = count
    risultati_totali = {}  # Dictionary to store the total counts of instances across all files
    frequencies = {}  # Dictionary to store the frequency of each class label
    nimages = {}  # Dictionary to store the number of images in each set

    for set in ["train", "valid", "test"]:
        risultati_totali[set] = {}
        nimages[set] = 0  # Initializing the count of images for each set
        # Iterating through each file in the directory
        labels_dir = os.path.join(directory, set, "labels")  # Constructing the path to the labels directory
        if not os.path.exists(labels_dir):
            print(f"Directory {labels_dir} does not exist. Skipping...")
            continue
        for filename in os.listdir(labels_dir):
            if filename.endswith(".txt"):  # Considering only files with '.txt' extension
                file_path = os.path.join(labels_dir, filename)  # Getting the full path of the file
                istanze_contate = conta_istanze_YOLO11(file_path, frequencies, set)  # Counting instances in the current file

                # Updating the total counts of instances across all files
                for istanza, conteggio in istanze_contate.items():
                    if istanza in risultati_totali[set]:
                        risultati_totali[set][istanza] += conteggio
                    else:
                        risultati_totali[set][istanza] = conteggio
                nimages[set] += 1

    # Calculate total instances per class across all sets
    total_per_class = {}
    total_instances = 0
    for set_counts in risultati_totali.values():
        for cls, count in set_counts.items():
            total_per_class[cls] = total_per_class.get(cls, 0) + count
            total_instances += count

    # Printing the total counts of instances for each class label
    print("\nTotal Results:")

    print(f"Total images: {sum(nimages.values())}")
    print(f"Total instances across all sets: {total_instances}")
    print(f"Total classes across all sets: {len(total_per_class)}")
    for set, conteggi in risultati_totali.items():
        set_total = sum(conteggi.values())
        print(f"\n{set.capitalize()} Set ({set_total} instances, {set_total/total_instances*100:.2f}%):")
        print(f"Images: {nimages[set]} ({nimages[set]/sum(nimages.values())*100:.2f}%)")
        for istanza in sorted(conteggi.keys()):
            class_total = total_per_class[istanza]
            percent_in_class = (conteggi[istanza] / class_total * 100) if class_total > 0 else 0
            print(f"Class {istanza}: {conteggi[istanza]} ({percent_in_class:.2f}%)")

    print("\nFrequencies:")
    for set, frequenze in frequencies.items():
        print(f"\n{set.capitalize()} Set:")
        for nlines, count in sorted(frequenze.items()):
            print(f"{nlines} instancies: {count} images")


In [5]:
if os.path.exists(data_dir):
    analizza_cartella_YOLO11(data_dir)  # Analyzing the dataset directory
else:
    print(f"The directory {data_dir} does not exist.")


Total Results:
Total images: 13103
Total instances across all sets: 18919
Total classes across all sets: 42

Train Set (16536 instances, 87.40%):
Images: 11465 (87.50%)
Class 0: 39 (84.78%)
Class 1: 3846 (86.58%)
Class 3: 1362 (86.75%)
Class 4: 3 (100.00%)
Class 5: 6 (100.00%)
Class 7: 735 (88.24%)
Class 8: 15 (93.75%)
Class 9: 12 (92.31%)
Class 11: 3 (100.00%)
Class 12: 1206 (88.94%)
Class 14: 63 (98.44%)
Class 16: 33 (80.49%)
Class 17: 1431 (86.89%)
Class 18: 402 (89.53%)
Class 19: 27 (90.00%)
Class 20: 15 (100.00%)
Class 21: 63 (96.92%)
Class 22: 21 (95.45%)
Class 23: 1878 (88.13%)
Class 24: 2700 (88.32%)
Class 25: 48 (97.96%)
Class 26: 39 (81.25%)
Class 27: 9 (100.00%)
Class 28: 716 (84.93%)
Class 29: 51 (78.46%)
Class 30: 18 (90.00%)
Class 31: 12 (100.00%)
Class 32: 27 (93.10%)
Class 33: 765 (88.13%)
Class 34: 12 (100.00%)
Class 35: 9 (100.00%)
Class 36: 39 (90.70%)
Class 37: 45 (67.16%)
Class 38: 747 (87.68%)
Class 39: 27 (77.14%)
Class 41: 112 (82.96%)

Valid Set (1559 instance

In [6]:
# plot image and bboxes (CV2)
def plot_image_with_bboxes(image_path, labels_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return

    with open(labels_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 5:
            continue  # Skip invalid lines
        class_id = int(parts[0])
        x_center, y_center, width, height = map(float, parts[1:5])

        # Convert YOLO format to bounding box coordinates
        img_height, img_width = image.shape[:2]
        x1 = int((x_center - width / 2) * img_width)
        y1 = int((y_center - height / 2) * img_height)
        x2 = int((x_center + width / 2) * img_width)
        y2 = int((y_center + height / 2) * img_height)

        # Draw the bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, str(class_id), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.imshow('Image with Bounding Boxes', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [9]:
img_name = "4c9d8d3d-R_3500_jpg.rf.ba25a5669f6e5ad3385e02c12efcaf6d"
img_path = os.path.join(data_dir, "test", "images",  f"{img_name}.jpg")
labels_path = os.path.join(data_dir, "test", "labels", f"{img_name}.txt")
plot_image_with_bboxes(img_path, labels_path)  # Plotting the image with bounding boxes

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to target thread (0x625353ab1bf0)

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to target thread (0x625353ab1bf0)

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to target thread (0x625353ab1bf0)

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to target thread (0x625353ab1bf0)

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to target thread (0x625353ab1bf0)

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to target thread (0x625353ab1bf0)

QObject::moveToThread: Current thread (0x625353ab1bf0) is not the object's thread (0x625353f091f0).
Cannot move to tar