### Roboflow Dataset

#### Download dataset from Roboflow

In [None]:

import os

# URL of the public Roboflow dataset
url = "https://public.roboflow.com/ds/cEbvuuZwUx?key=xJzZQRRk02"

# Base directory where you want to save the dataset
base_dir = "Yolo_test"

# Create a 'dataset' subfolder
target_dir = os.path.join(base_dir, "dataset")
os.makedirs(target_dir, exist_ok=True)

print("Downloading dataset...")
response = requests.get(url, stream=True)
if response.status_code == 200:
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        z.extractall(target_dir)
    print(f"Dataset downloaded and extracted to {target_dir}")
else:
    print("Download failed:", response.status_code, response.text)


#### Modify the dataset from Roboflow

In [None]:
# Path to the root folder of your dataset
dataset_folder = 'dataset/roboflow_Aquarium_Combined.v6i.yolov8'

# Define the original classes from your data.yaml file
# IMPORTANT: The number must match the class ID in the original label files.
# For example, if 'shark' is the 4th item in the YAML list, its ID is 3.
original_class_map = {
    0: 'fish',
    1: 'jellyfish',
    2: 'penguin',
    3: 'shark',
    4: 'puffin',
    5: 'stingray',
    6: 'starfish'
}

# List the names of the classes you want to merge into one single "fish" class
classes_to_keep_and_merge = ['fish', 'shark', 'stingray']

# The new class ID for your single "fish" class will be 0
new_fish_class_id = 0


# --- 2. Main Logic: Loop through all label files and modify them ---

# We need to process the labels in the train, validation, and test sets
for data_split in ['train', 'valid', 'test']:
    
    labels_path = os.path.join(dataset_folder, data_split, 'labels')
    print(f"--- Processing folder: {labels_path} ---")

    # Get a list of all the .txt label files in the directory
    label_files = os.listdir(labels_path)

    # Loop over every label file
    for file_name in label_files:
        # We only want to process text files
        if not file_name.endswith('.txt'):
            continue

        file_path = os.path.join(labels_path, file_name)
        
        lines_to_keep = [] # A temporary list to store the modified lines

        # Open and read the current label file
        with open(file_path, 'r') as f:
            lines = f.readlines()

        # Check each line (each detected object) in the file
        for line in lines:
            parts = line.strip().split()
            original_id = int(parts[0])

            # Look up the class name using the original ID
            class_name = original_class_map[original_id]

            # If the class is one we want to keep, we modify its ID
            if class_name in classes_to_keep_and_merge:
                # Create the new line with the new ID (0) and the original coordinates
                new_line = f"{new_fish_class_id} {parts[1]} {parts[2]} {parts[3]} {parts[4]}"
                lines_to_keep.append(new_line)

        # Overwrite the original file with only the lines we decided to keep
        with open(file_path, 'w') as f:
            f.write('\n'.join(lines_to_keep))
            
print("\n--- All label files have been modified successfully! ---")

##### Check lable change was successful

In [None]:
import os

# --- Configuration ---
# The path to your dataset's root directory.
dataset_folder = 'dataset/roboflow_Aquarium_Combined.v6i.yolov8'

# --- Main Script ---
# List of subdirectories to check for labels.
data_splits = ['train', 'valid', 'test']

print(f"--- Displaying all labels in '{dataset_folder}' ---")

# Loop through each data split (train, valid, test).
for split in data_splits:
    labels_dir = os.path.join(dataset_folder, split, 'labels')
    
    print(f"\n--- Checking Folder: {labels_dir} ---")

    # Ensure the directory exists before trying to read from it.
    if not os.path.isdir(labels_dir):
        print("Directory not found.")
        continue

    # Get every file in the labels directory.
    for filename in os.listdir(labels_dir):
        # We only care about the annotation text files.
        if filename.endswith('.txt'):
            file_path = os.path.join(labels_dir, filename)
            
            with open(file_path, 'r') as f:
                content = f.read().strip()

            print(f"File: {filename}")
            if content:
                print(content)
            else:
                print("(This file is empty)")

print("\n--- Script Finished ---")