In [1]:
#!pip3 install opencv-python
import os
import cv2

In [2]:
dataset = "RoboFlow-3-augmented"

In [3]:
# get the current working directory
script_dir = os.getcwd()
# get the parent directory
src_dir = os.path.dirname(script_dir)
prj_dir = os.path.dirname(src_dir)
datasets_dir = os.path.join(prj_dir, "datasets", "extracted")

data_dir = os.path.join(datasets_dir, dataset)
print("data_dir:", data_dir)

data_dir: /home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented


In [4]:
# Function to count instances
def conta_istanze_YOLO11(file_path, frequencies, set):
    istanze = {}  # Dictionary to store the counts of instances

    if set not in frequencies:
        frequencies[set] = {}
    
    # Reading the content of the annotation file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # if no lines, "bg": 1
    if not lines:
        if 0 not in frequencies[set]:
            frequencies[set][0] = 1
        else:
            frequencies[set][0] += 1
        return {-1: 1}
    
    nlines = 0

    # Iterating through each line in the file
    for line in lines:
        line = line.strip()  # Removing leading and trailing whitespaces
        # Skipping lines starting with '#' or empty lines
        if line.startswith('#') or line == '':
            continue
        
        # Extracting the class label from the line
        classe, *_ = line.split(' ')
        
        try:
            classe = int(classe)  # Converting the class label to an integer
        except ValueError:
            # Handling the case where the class label cannot be converted to an integer
            print(f"Warning: The first value cannot be converted to an integer in the line: {line}")
            continue

        nlines += 1  # Counting the number of valid lines

        istanza = classe  # Considering each class label as an 'instance'

        # Updating the count of instances for each class label
        if istanza in istanze:
            istanze[istanza] += 1
        else:
            istanze[istanza] = 1

    if nlines not in frequencies[set]:
        frequencies[set][nlines] = 1
    else:
        frequencies[set][nlines] += 1

    return istanze  # Returning the dictionary containing the counts of instances for each class label

# Function to analyze all annotation files in a directory
def analizza_cartella_YOLO11(directory):
    # stores the number of instances for each class and for each set (train, valid, test)
    # risultati_totali[set][class] = count
    risultati_totali = {}  # Dictionary to store the total counts of instances across all files
    frequencies = {}  # Dictionary to store the frequency of each class label
    nimages = {}  # Dictionary to store the number of images in each set

    for set in ["train", "valid", "test"]:
        risultati_totali[set] = {}
        nimages[set] = 0  # Initializing the count of images for each set
        # Iterating through each file in the directory
        labels_dir = os.path.join(directory, set, "labels")  # Constructing the path to the labels directory
        if not os.path.exists(labels_dir):
            print(f"Directory {labels_dir} does not exist. Skipping...")
            continue
        for filename in os.listdir(labels_dir):
            if filename.endswith(".txt"):  # Considering only files with '.txt' extension
                file_path = os.path.join(labels_dir, filename)  # Getting the full path of the file
                istanze_contate = conta_istanze_YOLO11(file_path, frequencies, set)  # Counting instances in the current file

                # Updating the total counts of instances across all files
                for istanza, conteggio in istanze_contate.items():
                    if istanza in risultati_totali[set]:
                        risultati_totali[set][istanza] += conteggio
                    else:
                        risultati_totali[set][istanza] = conteggio
                nimages[set] += 1

    # Calculate total instances per class across all sets
    total_per_class = {}
    total_instances = 0
    for set_counts in risultati_totali.values():
        for cls, count in set_counts.items():
            total_per_class[cls] = total_per_class.get(cls, 0) + count
            total_instances += count

    # Printing the total counts of instances for each class label
    print("\nTotal Results:")

    print(f"Total images: {sum(nimages.values())}")
    print(f"Total instances across all sets: {total_instances}")
    print(f"Total classes across all sets: {len(total_per_class)}")
    for set, conteggi in risultati_totali.items():
        set_total = sum(conteggi.values())
        print(f"\n{set.capitalize()} Set ({set_total} instances, {set_total/total_instances*100:.2f}%):")
        print(f"Images: {nimages[set]} ({nimages[set]/sum(nimages.values())*100:.2f}%)")
        for istanza in sorted(conteggi.keys()):
            class_total = total_per_class[istanza]
            percent_in_class = (conteggi[istanza] / class_total * 100) if class_total > 0 else 0
            print(f"Class {istanza}: {conteggi[istanza]} ({percent_in_class:.2f}%)")

    print("\nFrequencies:")
    for set, frequenze in frequencies.items():
        print(f"\n{set.capitalize()} Set:")
        for nlines, count in sorted(frequenze.items()):
            print(f"{nlines} instancies: {count} images")


In [5]:
if os.path.exists(data_dir):
    analizza_cartella_YOLO11(data_dir)  # Analyzing the dataset directory
else:
    print(f"The directory {data_dir} does not exist.")

Directory /home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented/valid/labels does not exist. Skipping...
Directory /home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented/test/labels does not exist. Skipping...

Total Results:
Total images: 8579
Total instances across all sets: 17777
Total classes across all sets: 7

Train Set (17777 instances, 100.00%):
Images: 8579 (100.00%)
Class -1: 45 (100.00%)
Class 0: 1531 (100.00%)
Class 1: 3162 (100.00%)
Class 2: 3191 (100.00%)
Class 3: 3882 (100.00%)
Class 4: 3055 (100.00%)
Class 5: 2911 (100.00%)

Valid Set (0 instances, 0.00%):
Images: 0 (0.00%)

Test Set (0 instances, 0.00%):
Images: 0 (0.00%)

Frequencies:

Train Set:
0 instancies: 45 images
1 instancies: 6030 images
2 instancies: 854 images
3 instancies: 559 images
4 instancies: 337 images
5 instancies: 194 images
6 

In [6]:
# plot image and bboxes (CV2)
def plot_image_with_bboxes(image_path, labels_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return

    with open(labels_path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 5:
            continue  # Skip invalid lines
        class_id = int(parts[0])
        x_center, y_center, width, height = map(float, parts[1:5])

        # Convert YOLO format to bounding box coordinates
        img_height, img_width = image.shape[:2]
        x1 = int((x_center - width / 2) * img_width)
        y1 = int((y_center - height / 2) * img_height)
        x2 = int((x_center + width / 2) * img_width)
        y2 = int((y_center + height / 2) * img_height)

        # Draw the bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, str(class_id), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.imshow('Image with Bounding Boxes', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()



In [7]:
dataset = "RoboFlow-2-augmented"
img_name = "00a6b9b0-R_537_jpg.rf.9ba4e32cadefbdbc99cd400f48c2a2b3_180_vertical_3,21"
img_path = os.path.join(data_dir, "train", "images",  f"{img_name}.jpg")
labels_path = os.path.join(data_dir, "train", "labels", f"{img_name}.txt")
labels_clusters_path = os.path.join(data_dir, "test", "labels_clusters", f"{img_name}.txt")
plot_image_with_bboxes(img_path, labels_path)  # Plotting the image with bounding boxes
plot_image_with_bboxes(img_path, labels_clusters_path)

Error loading image: /home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented/train/images/00a6b9b0-R_537_jpg.rf.9ba4e32cadefbdbc99cd400f48c2a2b3_180_vertical_3,21.jpg
Error loading image: /home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented/train/images/00a6b9b0-R_537_jpg.rf.9ba4e32cadefbdbc99cd400f48c2a2b3_180_vertical_3,21.jpg


[ WARN:0@0.293] global loadsave.cpp:268 findDecoder imread_('/home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented/train/images/00a6b9b0-R_537_jpg.rf.9ba4e32cadefbdbc99cd400f48c2a2b3_180_vertical_3,21.jpg'): can't open/read file: check file path/integrity
[ WARN:0@0.294] global loadsave.cpp:268 findDecoder imread_('/home/francescobarcherinii/Scrivania/FrancescoUni/Esami/unipi/1year2sem/IS/project/AI-waste-detection/datasets/extracted/RoboFlow-3-augmented/train/images/00a6b9b0-R_537_jpg.rf.9ba4e32cadefbdbc99cd400f48c2a2b3_180_vertical_3,21.jpg'): can't open/read file: check file path/integrity


In [None]:
# Compare original and geometric transformed images
import glob

dataset = "RoboFlow-2"
# Get all image names in the train/images directory
image_files = glob.glob(os.path.join(datasets_dir, dataset, "train", "images", "*.jpg"))

for img_path in image_files:
    # Get base name without suffix
    base_name = os.path.basename(img_path)

    # Show original image
    labels_path = os.path.join(datasets_dir, dataset, "train", "labels", f"{base_name[:-4]}.txt")
    plot_image_with_bboxes(img_path, labels_path)  # Plotting the image with bounding boxes

    # Show augmented images (same base name plus possible suffix)
    augmented_files = glob.glob(os.path.join(datasets_dir, dataset+"-augmented", "train", "images", f"{base_name[:-4]}_*.jpg"))
    for aug_img_path in augmented_files:
        # Get base name without suffix
        aug_base_name = os.path.basename(aug_img_path)

        # Show augmented image
        labels_path = os.path.join(datasets_dir, dataset+"-augmented", "train", "labels", f"{aug_base_name[:-4]}.txt")
        plot_image_with_bboxes(aug_img_path, labels_path)  # Plotting the image with bounding boxes

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread (0xdb91a70).
Cannot move to target thread (0xe445240)

QObject::moveToThread: Current thread (0xe445240) is not the object's thread