# downlaoding the cyclist data from roboflow and saving it to a folder

In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="YR3Kzt494NMSiJw8npFK")
project = rf.workspace("bicycle-detection").project("bike-detect-ct")
version = project.version(5)
dataset = version.download("yolov11")
                
                

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Bike-Detect---CT-5 to yolov11:: 100%|██████████| 5665052/5665052 [03:27<00:00, 27323.11it/s]





Extracting Dataset Version Zip to Bike-Detect---CT-5 in yolov11:: 100%|██████████| 36717/36717 [01:54<00:00, 320.56it/s]


# Import Necessary Libraries

In [None]:
import os
import psycopg2
import shutil
from PIL import Image, ImageDraw
import random

## Count the images in a folder

In [3]:
def count_images_in_folder(folder_path, extensions=('jpg', 'jpeg', 'png', 'bmp', 'gif', 'txt')):
    """
    Counts the number of image files in a folder.

    Args:
        folder_path (str): Path to the folder.
        extensions (tuple): Allowed image file extensions.

    Returns:
        int: Total count of images in the folder.
    """
    count = 0
    for file in os.listdir(folder_path):
        if file.lower().endswith(extensions):
            count += 1
    return count

# Example usage
folder_path = "dataset/train/images"  # Replace with your folder path
image_count = count_images_in_folder(folder_path)
print(f"Total images in folder '{folder_path}': {image_count}")


Total images in folder 'dataset/train/images': 18036


In [4]:
folder_path = "dataset/train/labels"  # Replace with your folder path
label_count = count_images_in_folder(folder_path, extensions=('txt'))
print(f"Total labels in folder '{folder_path}': {label_count}")

Total labels in folder 'dataset/train/labels': 18036


## Combining images and labels folders into a single folder

In [None]:
def combine_folders(image_folders, label_folders, output_folder):
    """
    Combines multiple image folders and label folders into single 'images' and 'labels' folders.

    Args:
        image_folders (list): List of paths to image folders.
        label_folders (list): List of paths to label folders.
        output_folder (str): Path to the output folder.

    Returns:
        None
    """
    # Create combined output folders
    output_images = os.path.join(output_folder, "images")
    output_labels = os.path.join(output_folder, "labels")
    os.makedirs(output_images, exist_ok=True)
    os.makedirs(output_labels, exist_ok=True)

    # Move images
    for folder in image_folders:
        for file in os.listdir(folder):
            if file.lower().endswith(('jpg', 'jpeg', 'png', 'bmp', 'gif')):
                src = os.path.join(folder, file)
                dst = os.path.join(output_images, file)
                shutil.move(src, dst)  # Move the image
                print(f"Moved image: {file}")

    # Move labels
    for folder in label_folders:
        for file in os.listdir(folder):
            if file.lower().endswith(('txt', 'json', 'xml')):
                src = os.path.join(folder, file)
                dst = os.path.join(output_labels, file)
                shutil.move(src, dst)  # Move the label
                print(f"Moved label: {file}")

    print("Folders combined successfully!")

# Example usage
image_folders = [r"dataset\test\images", r"dataset\train\images", r"data_tsinghua\images"]
label_folders = [r"dataset\test\labels", r"dataset\train\labels", r"data_tsinghua\labels"]
output_folder = "final_database"

combine_folders(image_folders, label_folders, output_folder)


Moved label: 1000_jpg.rf.06386acde02aae74325af3ef824a0f5c.txt
Moved label: 1001_jpg.rf.570f164aa6dd098f02eafda4e37d613c.txt
Moved label: 1007_jpg.rf.0c72422219ce3b636264e7fcd0c6f416.txt
Moved label: 100_jpg.rf.d8d5a7696c84b6a61814ccf976c16401.txt
Moved label: 1013_jpg.rf.9c2b1a2f91ed69496c2a24f462598437.txt
Moved label: 1015_jpg.rf.50927d82713d444ca6b382632997f8c2.txt
Moved label: 1016_jpg.rf.6ae7a0d2c4db3dbe70aae8475b60e8a4.txt
Moved label: 1019_jpg.rf.383cc704b39d7d81ec846012b833de21.txt
Moved label: 1028_jpg.rf.70c03a9167624389e4b63151918f8867.txt
Moved label: 1039_jpg.rf.395c34f1807879390a277bcf9b2e91a0.txt
Moved label: 1041_jpg.rf.77f629e26382ba798ed918e4a7041c35.txt
Moved label: 1046_jpg.rf.74315e42f880becfa2080409c0eba601.txt
Moved label: 1048_jpg.rf.2e0ea96316a485b7d059cfe2f5885f96.txt
Moved label: 1071_jpg.rf.47486eb8fb2ae8fdabd16a7eabe6e545.txt
Moved label: 1083_jpg.rf.bdbb2b3d6d9bb23e2838e71de0b78f45.txt
Moved label: 1086_jpg.rf.eac554d1f90b4c440b4fc4cb9af22eb3.txt
Moved lab

## Renaming the images to have a consistent naming convention

In [9]:
def rename_images_and_labels(image_folder, label_folder, prefix="image"):
    """
    Renames images and corresponding labels to a consistent naming convention.

    Args:
        image_folder (str): Path to the folder containing images.
        label_folder (str): Path to the folder containing labels.
        prefix (str): Prefix for renaming files (default is 'image').

    Returns:
        None
    """
    # List images and labels, sorted to maintain consistent order
    images = sorted([f for f in os.listdir(image_folder) if f.lower().endswith(('jpg', 'jpeg', 'png'))])
    labels = sorted([f for f in os.listdir(label_folder) if f.lower().endswith(('txt', 'json', 'xml'))])

    # Check if image and label counts match
    if len(images) != len(labels):
        print("Error: Number of images and labels do not match.")
        return

    # Rename images and labels
    for i, (image, label) in enumerate(zip(images, labels), start=1):
        # Generate new names with consistent numbering
        new_image_name = f"{prefix}_{i:04d}{os.path.splitext(image)[1]}"  # e.g., image_0001.jpg
        new_label_name = f"{prefix}_{i:04d}{os.path.splitext(label)[1]}"  # e.g., image_0001.txt

        # Full paths for old and new names
        old_image_path = os.path.join(image_folder, image)
        old_label_path = os.path.join(label_folder, label)
        new_image_path = os.path.join(image_folder, new_image_name)
        new_label_path = os.path.join(label_folder, new_label_name)

        # Rename files
        os.rename(old_image_path, new_image_path)
        os.rename(old_label_path, new_label_path)
        print(f"Renamed: {image} -> {new_image_name}, {label} -> {new_label_name}")

    print("Renaming completed successfully!")

# Example usage
image_folder = "final_database\images"
label_folder = "final_database\labels"
rename_images_and_labels(image_folder, label_folder, prefix="image")


Renamed: 000000.jpg -> image_0001.jpg, 000000.txt -> image_0001.txt
Renamed: 000001.jpg -> image_0002.jpg, 000001.txt -> image_0002.txt
Renamed: 000002.jpg -> image_0003.jpg, 000002.txt -> image_0003.txt
Renamed: 000003.jpg -> image_0004.jpg, 000003.txt -> image_0004.txt
Renamed: 000004.jpg -> image_0005.jpg, 000004.txt -> image_0005.txt
Renamed: 000005.jpg -> image_0006.jpg, 000005.txt -> image_0006.txt
Renamed: 000006.jpg -> image_0007.jpg, 000006.txt -> image_0007.txt
Renamed: 000007.jpg -> image_0008.jpg, 000007.txt -> image_0008.txt
Renamed: 000008.jpg -> image_0009.jpg, 000008.txt -> image_0009.txt
Renamed: 000009.jpg -> image_0010.jpg, 000009.txt -> image_0010.txt
Renamed: 000010.jpg -> image_0011.jpg, 000010.txt -> image_0011.txt
Renamed: 000011.jpg -> image_0012.jpg, 000011.txt -> image_0012.txt
Renamed: 000012.jpg -> image_0013.jpg, 000012.txt -> image_0013.txt
Renamed: 000013.jpg -> image_0014.jpg, 000013.txt -> image_0014.txt
Renamed: 000014.jpg -> image_0015.jpg, 000014.tx

## Inser the images and labels to the postgresql database

In [None]:
def insert_images_and_annotations_yolo(image_folder, label_folder, db_config):
    """
    Inserts image metadata and YOLO-formatted annotations into PostgreSQL.

    Args:
        image_folder (str): Path to the folder containing images.
        label_folder (str): Path to the folder containing labels.
        db_config (dict): PostgreSQL database configuration.
    """
    # Connect to the database
    conn = psycopg2.connect(**db_config)
    cursor = conn.cursor()

    # List all image files
    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('jpg', 'jpeg', 'png'))]
    print(f"Found {len(image_files)} images to insert.")

    # Process each image and its corresponding label
    for image_file in image_files:
        # Image and label paths
        image_path = os.path.join(image_folder, image_file)
        label_file = os.path.splitext(image_file)[0] + ".txt"  # Matching label file
        label_path = os.path.join(label_folder, label_file)

        # Skip if label file doesn't exist
        if not os.path.exists(label_path):
            print(f"Label file missing for: {image_file}")
            continue

        # Get image dimensions
        with Image.open(image_path) as img:
            width, height = img.size

        # Insert image metadata into 'images' table
        cursor.execute("""
            INSERT INTO images (file_name, file_path, width, height, split)
            VALUES (%s, %s, %s, %s, NULL) RETURNING id;
        """, (image_file, image_path, width, height))
        image_id = cursor.fetchone()[0]  # Retrieve the generated ID for the image

        # Process YOLO labels (x_center, y_center, width, height, class_name)
        with open(label_path, "r") as file:
            for line in file:
                parts = line.strip().split()
                if len(parts) != 5:
                    print(f"Invalid label format in file: {label_file}")
                    continue

                class_name = parts[0]
                x_center = float(parts[1])
                y_center = float(parts[2])
                bbox_width = float(parts[3])
                bbox_height = float(parts[4])

                # Insert annotation into 'annotations' table
                cursor.execute("""
                    INSERT INTO annotations (image_id, x_center, y_center, width, height, class_name)
                    VALUES (%s, %s, %s, %s, %s, %s);
                """, (image_id, x_center, y_center, bbox_width, bbox_height, class_name))

        print(f"Inserted image '{image_file}' and its annotations.")

    # Commit changes and close the connection
    conn.commit()
    cursor.close()
    conn.close()
    print("All YOLO data inserted successfully!")

# PostgreSQL Configuration
db_config = {
    "host": "localhost",
    "database": "cyclists",
    "user": "postgres",
    "password": "*******",
    "port": 5432
}

# Paths to images and labels
image_folder = r"final_database\images"
label_folder = r"final_database\labels"

# Insert data
insert_images_and_annotations_yolo(image_folder, label_folder, db_config)


Found 32028 images to insert.
Inserted image 'image_0001.jpg' and its annotations.
Inserted image 'image_0002.jpg' and its annotations.
Inserted image 'image_0003.jpg' and its annotations.
Inserted image 'image_0004.jpg' and its annotations.
Inserted image 'image_0005.jpg' and its annotations.
Inserted image 'image_0006.jpg' and its annotations.
Inserted image 'image_0007.jpg' and its annotations.
Inserted image 'image_0008.jpg' and its annotations.
Inserted image 'image_0009.jpg' and its annotations.
Inserted image 'image_0010.jpg' and its annotations.
Inserted image 'image_0011.jpg' and its annotations.
Inserted image 'image_0012.jpg' and its annotations.
Inserted image 'image_0013.jpg' and its annotations.
Inserted image 'image_0014.jpg' and its annotations.
Inserted image 'image_0015.jpg' and its annotations.
Inserted image 'image_0016.jpg' and its annotations.
Inserted image 'image_0017.jpg' and its annotations.
Inserted image 'image_0018.jpg' and its annotations.
Inserted image '

In [36]:
output_folder = "example_folder"  # Replace with your desired output path
num_images = 500  # Number of images to fetch

## Fetch random images from the database

In [37]:
def fetch_and_download_data(db_config, output_folder, num_images, split_data=True, 
                            train_ratio=0.8, val_ratio=0.1, test_ratio=0.1, filter_classes=None):
    """
    Fetches a specified number of images and labels (YOLO format) from the database, splits them, 
    filters by class (if specified), and downloads the data.
    """
    # Connect to the database
    conn = psycopg2.connect(**db_config)
    cursor = conn.cursor()

    # Define SQL query with class filtering
    if filter_classes is None:
        filter_classes = ['0']  # Default to class 0 (cyclists)

    format_classes = tuple(filter_classes)
    class_filter_query = """
        SELECT i.file_path, a.x_center, a.y_center, a.width, a.height, a.class_name
        FROM images i
        JOIN annotations a ON i.id = a.image_id
        WHERE a.class_name IN %s
        LIMIT %s;
    """

    # Fetch filtered records
    cursor.execute(class_filter_query, (format_classes, num_images))
    records = cursor.fetchall()
    random.shuffle(records)  # Shuffle for randomness

    # Define output paths
    train_images = os.path.join(output_folder, "train", "images")
    train_labels = os.path.join(output_folder, "train", "labels")
    val_images = os.path.join(output_folder, "val", "images")
    val_labels = os.path.join(output_folder, "val", "labels")
    test_images = os.path.join(output_folder, "test", "images")
    test_labels = os.path.join(output_folder, "test", "labels")

    # Create folders
    folders = [train_images, train_labels, val_images, val_labels, test_images, test_labels]
    if not split_data:  # Single output folder
        folders = [os.path.join(output_folder, "images"), os.path.join(output_folder, "labels")]
    for folder in folders:
        os.makedirs(folder, exist_ok=True)

    # Split data
    total_records = len(records)
    train_count = int(total_records * train_ratio)
    val_count = int(total_records * val_ratio)

    splits = {
        "train": records[:train_count],
        "val": records[train_count:train_count + val_count],
        "test": records[train_count + val_count:]
    }

    if not split_data:
        splits = {"all": records}

    # Copy files and write YOLO labels
    label_data = {}
    for split_name, data in splits.items():
        image_folder = os.path.join(output_folder, split_name, "images") if split_data else folders[0]
        label_folder = os.path.join(output_folder, split_name, "labels") if split_data else folders[1]

        for file_path, x_center, y_center, width, height, class_name in data:
            file_name = os.path.basename(file_path)

            # Copy image
            shutil.copy(file_path, os.path.join(image_folder, file_name))

            # Prepare label content in YOLO format
            if file_name not in label_data:
                label_data[file_name] = []
            label_data[file_name].append(f"{class_name} {x_center} {y_center} {width} {height}")

            # Write label file
            label_file = os.path.splitext(file_name)[0] + ".txt"
            with open(os.path.join(label_folder, label_file), "w") as f:
                f.write("\n".join(label_data[file_name]))

    print(f"Data ({num_images} images) exported successfully to {output_folder}.")

    cursor.close()
    conn.close()


In [38]:
fetch_and_download_data(db_config, output_folder, num_images, split_data=True)

Data (500 images) exported successfully to example_folder.


## Generate info statistics about the database

In [17]:
def generate_statistics(db_config):
    """
    Generates statistics for the dataset, including class distribution and image count.
    """
    # Connect to the database
    conn = psycopg2.connect(**db_config)
    cursor = conn.cursor()

    # Count total images
    cursor.execute("SELECT COUNT(*) FROM images;")
    total_images = cursor.fetchone()[0]

    # Class distribution
    cursor.execute("""
        SELECT class_name, COUNT(*) 
        FROM annotations 
        GROUP BY class_name;
    """)
    class_counts = cursor.fetchall()

    # Print statistics
    print(f"Total Images: {total_images}")
    print("Class Distribution:")
    for class_name, count in class_counts:
        print(f"  Class '{class_name}': {count} bounding boxes")

    cursor.close()
    conn.close()


In [18]:
generate_statistics(db_config)

Total Images: 32028
Class Distribution:
  Class '0': 49657 bounding boxes


## Preview a random image with annotations

In [24]:
def preview_random_image(db_config):
    """
    Displays a random image with all bounding boxes drawn.
    """
    # Connect to the database
    conn = psycopg2.connect(**db_config)
    cursor = conn.cursor()

    # Fetch a random image and all its annotations
    cursor.execute("""
        SELECT i.file_path, a.x_center, a.y_center, a.width, a.height, a.class_name
        FROM images i
        JOIN annotations a ON i.id = a.image_id
        WHERE i.id = (
            SELECT id FROM images ORDER BY RANDOM() LIMIT 1
        );
    """)
    records = cursor.fetchall()

    if not records:
        print("No data available.")
        return

    # Load image
    file_path = records[0][0]
    image = Image.open(file_path)
    draw = ImageDraw.Draw(image)

    # Draw all bounding boxes for this image
    for _, x_center, y_center, width, height, class_name in records:
        img_width, img_height = image.size
        x_min = int((x_center - width / 2) * img_width)
        y_min = int((y_center - height / 2) * img_height)
        x_max = int((x_center + width / 2) * img_width)
        y_max = int((y_center + height / 2) * img_height)

        # Draw bounding box
        draw.rectangle([x_min, y_min, x_max, y_max], outline="red", width=2)
        draw.text((x_min, y_min - 10), f"{class_name}", fill="red")

    # Display the image with all bounding boxes
    image.show()
    cursor.close()
    conn.close()
    print(f"Displayed image with bounding boxes from: {file_path}")


In [35]:
preview_random_image(db_config)

Displayed image with bounding boxes from: final_database\images\image_3991.jpg
