In [16]:
#convert zooniverse labels into YOLOv5 format and move labels into new directories
import os
import csv
from PIL import Image

# Function to convert coordinates to YOLO format
def convert_to_yolo_format(image_width, image_height, xmin, ymin, width, height):
    x_center = (xmin + width/2) / image_width
    y_center = (ymin + height/2) / image_height
    yolo_width = width / image_width
    yolo_height = height / image_height
    return x_center, y_center, yolo_width, yolo_height

# Path to the directory containing the CSV files
csv_directory = 'D:\\YOLOv5Rowan\\data\\images\\zooniverse\\'

# Path to the directory where the YOLO format annotations will be saved
yolo_directory = 'D:\\YOLOv5Rowan\\data\\labels\\zooniverse\\'

# List to store missing images
missing_images = []

# Process each split (train, val, test)
for split in ['train', 'val', 'test']:
    # Read the annotations from the CSV file
    csv_file = os.path.join(csv_directory, f'{split}.csv')

# Read the CSV file
    with open(os.path.join(csv_directory, csv_file), 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row

        # Create a dictionary to store the annotations for each image
        annotations = {}

        # Process each annotation in the CSV file
        for row in reader:
            filename, bbox, label = row[0], row[2], row[3]
            bbox = list(map(float, bbox[1:-1].split(',')))
            xmin, ymin, width, height = bbox
            if filename not in annotations:
                annotations[filename] = []
            annotations[filename].append((xmin, ymin, width, height, label))

        # Process each image and its annotations
        for filename, image_annotations in annotations.items():
            # Load the image to get its dimensions
            image_path = os.path.join(csv_directory, filename)
            if not os.path.exists(image_path):
                missing_images.append(filename)
                continue

            image_width, image_height = Image.open(image_path).size

            # Convert each annotation to YOLO format
            yolo_annotations = []
            for xmin, ymin, xmax, ymax, label in image_annotations:
                x_center, y_center, width, height = convert_to_yolo_format(
                    image_width, image_height, float(xmin), float(ymin), float(xmax), float(ymax)
                )
                yolo_annotations.append(f"{label} {x_center} {y_center} {width} {height}")

            # Write the YOLO annotations to a text file
            output_file = f"{yolo_directory}/{split}/{os.path.splitext(filename)[0]}.txt"
            os.makedirs(os.path.dirname(output_file), exist_ok=True)
            with open(output_file, 'w') as outfile:
                outfile.write('\n'.join(yolo_annotations))

# Save the list of missing images
missing_images_file = os.path.join(yolo_directory, 'missing_images.txt')
with open(missing_images_file, 'w') as file:
    file.write("\n".join(missing_images))

In [19]:
#Quick check that the script seems to have run correctly
import pandas as pd
val1 = pd.read_csv("D:\\YOLOv5Rowan\\data\\images\\zooniverse\\test.csv")
len(val1["filename"].unique())

6970

In [22]:
#Move images into train and test folders

import os
import shutil

# Path to the directory containing the image files
image_directory = 'D:\\YOLOv5Rowan\\data\\images\\zooniverse\\'

# Path to the directory containing the label files
label_directory = 'D:\\YOLOv5Rowan\\data\\labels\\zooniverse\\'

# Create "train" and "test" subdirectories in the image directory
train_directory = os.path.join(image_directory, 'train')
val_directory = os.path.join(image_directory, 'val')
test_directory = os.path.join(image_directory, 'test')
os.makedirs(train_directory, exist_ok=True)
os.makedirs(val_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

# Iterate through the label directory
for root, dirs, files in os.walk(label_directory):
    for file in files:
        # Get the label file name without extension
        label_file_name = os.path.splitext(file)[0]

        # Check if the corresponding image file exists
        image_file_path = os.path.join(image_directory, f"{label_file_name}.png")
        if not os.path.isfile(image_file_path):
            continue

        # Determine the destination directory based on the label file's parent directory
        if 'train' in root:
            destination_directory = train_directory
        elif 'test' in root:
            destination_directory = test_directory
        elif 'val' in root:
            destination_directory = val_directory
        else:
            continue

        try:
            # Move the image file to the destination directory
            shutil.move(image_file_path, os.path.join(destination_directory, f"{label_file_name}.jpg"))
        except (shutil.Error, IOError) as e:
            print(f"Failed to move {image_file_path}: {e}")
            continue


Failed to move D:\YOLOv5Rowan\data\images\zooniverse\20211201_Atrisco_0461_01_01.png: [WinError 32] The process cannot access the file because it is being used by another process: 'D:\\YOLOv5Rowan\\data\\images\\zooniverse\\20211201_Atrisco_0461_01_01.png'


In [None]:
##EXPERT

In [2]:
#convert Expert labels into YOLOv5 format and move labels into new directories
import os
import csv
from PIL import Image

# Function to convert coordinates to YOLO format
def convert_to_yolo_format(image_width, image_height, xmin, ymin, width, height):
    x_center = (xmin + width/2) / image_width
    y_center = (ymin + height/2) / image_height
    yolo_width = width / image_width
    yolo_height = height / image_height
    return x_center, y_center, yolo_width, yolo_height

# Path to the directory containing the CSV files
csv_directory = 'D:\\YOLOv5Rowan\\data\\images\\usfws\\'

# Path to the directory where the YOLO format annotations will be saved
yolo_directory = 'D:\\YOLOv5Rowan\\data\\labels\\usfws\\'

# List to store missing images
missing_images = []

# Process each split (train, val, test)
for split in ['train', 'val', 'test']:
    # Read the annotations from the CSV file
    csv_file = os.path.join(csv_directory, f'{split}.csv')

# Read the CSV file
    with open(os.path.join(csv_directory, csv_file), 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row

        # Create a dictionary to store the annotations for each image
        annotations = {}

        # Process each annotation in the CSV file
        for row in reader:
            filename, bbox, label = row[0], row[2], row[3]
            bbox = list(map(float, bbox[1:-1].split(',')))
            xmin, ymin, width, height = bbox
            if filename not in annotations:
                annotations[filename] = []
            annotations[filename].append((xmin, ymin, width, height, label))

        # Process each image and its annotations
        for filename, image_annotations in annotations.items():
            # Load the image to get its dimensions
            image_path = os.path.join(csv_directory, filename)
            if not os.path.exists(image_path):
                missing_images.append(filename)
                continue

            image_width, image_height = Image.open(image_path).size

            # Convert each annotation to YOLO format
            yolo_annotations = []
            for xmin, ymin, xmax, ymax, label in image_annotations:
                x_center, y_center, width, height = convert_to_yolo_format(
                    image_width, image_height, float(xmin), float(ymin), float(xmax), float(ymax)
                )
                yolo_annotations.append(f"{label} {x_center} {y_center} {width} {height}")

            # Write the YOLO annotations to a text file
            output_file = f"{yolo_directory}/{split}/{os.path.splitext(filename)[0]}.txt"
            os.makedirs(os.path.dirname(output_file), exist_ok=True)
            with open(output_file, 'w') as outfile:
                outfile.write('\n'.join(yolo_annotations))

# Save the list of missing images
missing_images_file = os.path.join(yolo_directory, 'missing_images.txt')
with open(missing_images_file, 'w') as file:
    file.write("\n".join(missing_images))

In [4]:
#Move images into train and test folders

import os
import shutil

# Path to the directory containing the image files
image_directory = 'D:\\YOLOv5Rowan\\data\\images\\usfws\\'

# Path to the directory containing the label files
label_directory = 'D:\\YOLOv5Rowan\\data\\labels\\usfws\\'

# Create "train" and "test" subdirectories in the image directory
train_directory = os.path.join(image_directory, 'train')
val_directory = os.path.join(image_directory, 'val')
test_directory = os.path.join(image_directory, 'test')
os.makedirs(train_directory, exist_ok=True)
os.makedirs(val_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

# Iterate through the label directory
for root, dirs, files in os.walk(label_directory):
    for file in files:
        # Get the label file name without extension
        label_file_name = os.path.splitext(file)[0]

        # Check if the corresponding image file exists
        image_file_path = os.path.join(image_directory, f"{label_file_name}.jpg")
        if not os.path.isfile(image_file_path):
            continue

        # Determine the destination directory based on the label file's parent directory
        if 'train' in root:
            destination_directory = train_directory
        elif 'test' in root:
            destination_directory = test_directory
        elif 'val' in root:
            destination_directory = val_directory
        else:
            continue

        try:
            # Move the image file to the destination directory
            shutil.move(image_file_path, os.path.join(destination_directory, f"{label_file_name}.jpg"))
        except (shutil.Error, IOError) as e:
            print(f"Failed to move {image_file_path}: {e}")
            continue
