In [None]:
# %%
# üß© Convert YOLO + CSV Datasets into Pandas DataFrames (no saving)

import os
from pathlib import Path
import yaml
import pandas as pd
from glob import glob
from PIL import Image
import cv2

# %%
# üìò 1. Load YOLO config

yaml_path = "datasets/Playing-Cards-Object-Detection-main/data.yaml"
with open(yaml_path, "r") as f:
    yolo_cfg = yaml.safe_load(f)

yolo_class_names = yolo_cfg["names"]
yolo_class_to_id = {name: idx for idx, name in enumerate(yolo_class_names)}

print(f"Loaded {len(yolo_class_names)} classes from {yaml_path}")

# %%
# üß© Helpers

IMG_EXTS = [".jpg", ".jpeg", ".png"]

def find_image(images_dir: Path, stem: str) -> Path | None:
    """Return the first existing image path among common extensions."""
    for ext in IMG_EXTS:
        p = images_dir / f"{stem}{ext}"
        if p.exists():
            return p
    return None

def get_image_size(filename, base_folder):
    path = Path(base_folder) / filename
    with Image.open(path) as im:
        return im.width, im.height
# %%
# üìò 2. Parse YOLO label files into a DataFrame (using actual structure)

def load_yolo_split(base_dir: Path) -> pd.DataFrame:
    labels_dir = base_dir / "labels"
    images_dir = base_dir / "images"

    if not labels_dir.exists():
        print(f"‚ö†Ô∏è No labels folder found: {labels_dir}")
        return pd.DataFrame(columns=["image", "class_id", "class_name", "bbox_x_center", "bbox_y_center", "bbox_width", "bbox_height", "image_width", "image_height"])

    txt_files = sorted(labels_dir.glob("*.txt"))
    if not txt_files:
        print(f"‚ö†Ô∏è No label files found in {labels_dir}")
        return pd.DataFrame(columns=["image", "class_id", "class_name", "bbox_x_center", "bbox_y_center", "bbox_width", "bbox_height", "image_width", "image_height"])

    all_rows = []
    for txt_file in txt_files:
        stem = txt_file.stem
        image_path = find_image(images_dir, stem)
        image_str = str(image_path) if image_path else str(images_dir / f"{stem}.jpg")
        image_width, image_height = get_image_size( f"{stem}.jpg",images_dir)

        with open(txt_file, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                class_id, x_center, y_center, width, height = parts
                class_id = int(class_id)
                if class_id >= len(yolo_class_names):
                    continue
                class_name = yolo_class_names[class_id]
                all_rows.append(
                    [image_str, class_id, class_name, float(x_center), float(y_center), float(width), float(height), float(image_width), float(image_height)]
                )
    df = pd.DataFrame(all_rows, columns=["image", "class_id", "class_name", "bbox_x_center", "bbox_y_center", "bbox_width", "bbox_height", "image_width", "image_height"])
    return df

# Real dataset paths
base_path = Path("datasets/Playing-Cards-Object-Detection-main")
train_base = base_path / "train"
valid_base = base_path / "valid"
test_base  = base_path / "test"

yolo_train_df = load_yolo_split(train_base)
yolo_valid_df = load_yolo_split(valid_base)
yolo_test_df  = load_yolo_split(test_base)

print(f"YOLO train: {len(yolo_train_df)}  |  valid: {len(yolo_valid_df)}  |  test: {len(yolo_test_df)}")

# %%
# üß† 3. Mapping from full names ‚Üí YOLO short names

full_to_short = {
    "ace of clubs": "Ac", "ace of diamonds": "Ad", "ace of hearts": "Ah", "ace of spades": "As",
    "two of clubs": "2c", "two of diamonds": "2d", "two of hearts": "2h", "two of spades": "2s",
    "three of clubs": "3c", "three of diamonds": "3d", "three of hearts": "3h", "three of spades": "3s",
    "four of clubs": "4c", "four of diamonds": "4d", "four of hearts": "4h", "four of spades": "4s",
    "five of clubs": "5c", "five of diamonds": "5d", "five of hearts": "5h", "five of spades": "5s",
    "six of clubs": "6c", "six of diamonds": "6d", "six of hearts": "6h", "six of spades": "6s",
    "seven of clubs": "7c", "seven of diamonds": "7d", "seven of hearts": "7h", "seven of spades": "7s",
    "eight of clubs": "8c", "eight of diamonds": "8d", "eight of hearts": "8h", "eight of spades": "8s",
    "nine of clubs": "9c", "nine of diamonds": "9d", "nine of hearts": "9h", "nine of spades": "9s",
    "ten of clubs": "10c", "ten of diamonds": "10d", "ten of hearts": "10h", "ten of spades": "10s",
    "jack of clubs": "Jc", "jack of diamonds": "Jd", "jack of hearts": "Jh", "jack of spades": "Js",
    "queen of clubs": "Qc", "queen of diamonds": "Qd", "queen of hearts": "Qh", "queen of spades": "Qs",
    "king of clubs": "Kc", "king of diamonds": "Kd", "king of hearts": "Kh", "king of spades": "Ks",
}

typo_fixes = {"eigth": "eight"}
SUIT_BY_PREFIX = {"c": "clubs", "d": "diamonds", "h": "hearts", "s": "spades"}

def normalize_full_name(s: str) -> str:
    s = s.strip().lower()
    for bad, good in typo_fixes.items():
        s = s.replace(bad, good)
    return " ".join(s.split())

def repair_label(label: str, filename: str) -> str | None:
    if label == "seven of seven":
        prefix = Path(filename).stem[0].lower()
        suit = SUIT_BY_PREFIX.get(prefix)
        if suit:
            return f"seven of {suit}"
    return None

def map_full_to_short(label: str, filename: str) -> str | None:
    norm = normalize_full_name(label)
    if norm in full_to_short:
        return full_to_short[norm]
    repaired = repair_label(norm, filename)
    if repaired in full_to_short:
        return full_to_short[repaired]
    return None

# %%
# üìò 4. Parse VOC CSVs and convert to YOLO format

def convert_voc_to_yolo(xmin, ymin, xmax, ymax, img_w, img_h):
    x_center = (xmin + xmax) / 2 / img_w
    y_center = (ymin + ymax) / 2 / img_h
    width = (xmax - xmin) / img_w
    height = (ymax - ymin) / img_h
    return x_center, y_center, width, height

def parse_voc_csv(csv_path: str, base_dir: str) -> pd.DataFrame:
    """
    Parse VOC-style CSV and convert to YOLO format.
    Adds full image path based on the dataset base directory.
    """
    if not os.path.exists(csv_path):
        return pd.DataFrame()

    df_csv = pd.read_csv(csv_path)
    all_rows = []
    skipped = 0

    for _, row in df_csv.iterrows():
        fname = str(row["filename"]).strip()
        label = str(row["class"]).strip()

        mapped = map_full_to_short(label, fname)
        if not mapped:
            print(f"‚ö†Ô∏è Skipping unknown class: '{label}' in {fname}")
            skipped += 1
            continue

        try:
            img_w = float(row["width"])
            img_h = float(row["height"])
            if img_w == 0 or img_h == 0:
                print(f"‚ö†Ô∏è Skipping {fname}: image size is zero ({img_w}x{img_h})")
                skipped += 1
                continue
        except Exception as e:
            print(f"‚ö†Ô∏è Skipping {fname}: bad width/height ({e})")
            skipped += 1
            continue

        x_center, y_center, width, height = convert_voc_to_yolo(
            float(row["xmin"]), float(row["ymin"]),
            float(row["xmax"]), float(row["ymax"]),
            img_w, img_h
        )

        class_id = yolo_class_to_id[mapped]
        image_path = str(Path(base_dir) / fname)

        all_rows.append([image_path, class_id, mapped, x_center, y_center, width, height, img_w, img_h])

    df = pd.DataFrame(
        all_rows,
        columns=["image", "class_id", "class_name", "bbox_x_center", "bbox_y_center", "bbox_width", "bbox_height", "image_width", "image_height"]
    )
    print(f"{csv_path}: parsed {len(df)} valid annotations, skipped {skipped}.")
    return df


voc_train_csv = "datasets/Playing-Cards-Images-Object-Detection-Dataset/train_labels.csv"
voc_test_csv  = "datasets/Playing-Cards-Images-Object-Detection-Dataset/test_labels.csv"

voc_train_base = "datasets/Playing-Cards-Images-Object-Detection-Dataset/train/train"
voc_test_base  = "datasets/Playing-Cards-Images-Object-Detection-Dataset/test/test"

voc_train_df = parse_voc_csv(voc_train_csv, voc_train_base)
voc_test_df  = parse_voc_csv(voc_test_csv, voc_test_base)

voc_df = pd.concat([voc_train_df, voc_test_df], ignore_index=True)
print(f"VOC total (CSV): {len(voc_df)} annotations")

# %%
# ‚úÖ 5. Final Output

print("YOLO (train):")
display(yolo_train_df.head())

print("YOLO (valid):")
display(yolo_valid_df.head())

print("YOLO (test):")
display(yolo_test_df.head())

print("VOC (train):")
display(voc_train_df.head())

print("VOC (test):")
display(voc_test_df.head())

# 6. Save to CSV
yolo_train_df.to_csv("datasets/Playing-Cards-Object-Detection-main/yolo_train_converted.csv", index=False)
yolo_valid_df.to_csv("datasets/Playing-Cards-Object-Detection-main/yolo_valid_converted.csv", index=False)
yolo_test_df.to_csv("datasets/Playing-Cards-Object-Detection-main/yolo_test_converted.csv", index=False)
yolo_all_df = pd.concat([yolo_train_df, yolo_valid_df, yolo_test_df], ignore_index=True)
yolo_all_df.to_csv("datasets/Playing-Cards-Object-Detection-main/yolo_all_converted.csv", index=False)
voc_train_df.to_csv("datasets/Playing-Cards-Images-Object-Detection-Dataset/voc_train_converted.csv", index=False)
voc_test_df.to_csv("datasets/Playing-Cards-Images-Object-Detection-Dataset/voc_test_converted.csv", index=False)
voc_df.to_csv("datasets/Playing-Cards-Images-Object-Detection-Dataset/voc_all_converted.csv", index=False)

In [None]:
# %%
# üìò 5. Parse Labelized CSVs (Already in short class format, like "Qc", "9s", "2h")

def parse_labelized_csv(csv_path: str, base_dir: str) -> pd.DataFrame:
    """
    Parse the Labelized CSV dataset where class names are already short (e.g. 'Qc', '9s').
    Converts VOC-style bounding boxes to YOLO format and returns a DataFrame.
    """
    if not os.path.exists(csv_path):
        print(f"‚ö†Ô∏è Missing file: {csv_path}")
        return pd.DataFrame()

    df_csv = pd.read_csv(csv_path)
    all_rows = []
    skipped = 0

    for _, row in df_csv.iterrows():
        fname = str(row["filename"]).strip()
        label = str(row["class"]).strip()

        if label not in yolo_class_to_id:
            print(f"‚ö†Ô∏è Skipping unknown class '{label}' in {fname}")
            skipped += 1
            continue

        try:
            img_w = float(row["width"])
            img_h = float(row["height"])
            if img_w == 0 or img_h == 0:
                print(f"‚ö†Ô∏è Skipping {fname}: invalid size ({img_w}x{img_h})")
                skipped += 1
                continue
        except Exception as e:
            print(f"‚ö†Ô∏è Skipping {fname}: error reading width/height ({e})")
            skipped += 1
            continue

        x_center, y_center, width, height = convert_voc_to_yolo(
            float(row["xmin"]), float(row["ymin"]),
            float(row["xmax"]), float(row["ymax"]),
            img_w, img_h
        )

        class_id = yolo_class_to_id[label]
        image_path = str(Path(base_dir) / fname)

        all_rows.append([image_path, class_id, label, x_center, y_center, width, height, img_w, img_h])

    df = pd.DataFrame(
        all_rows,
        columns=["image", "class_id", "class_name", "bbox_x_center", "bbox_y_center", "bbox_width", "bbox_height", "image_width", "image_height"]
    )

    print(f"{csv_path}: parsed {len(df)} valid annotations, skipped {skipped}.")
    return df


# Paths
labelized_train_csv = "datasets/Playing-Cards-Labelized-Dataset/train_cards_label.csv"
labelized_test_csv  = "datasets/Playing-Cards-Labelized-Dataset/test_cards_label.csv"

labelized_train_base = "datasets/Playing-Cards-Labelized-Dataset/train"
labelized_test_base  = "datasets/Playing-Cards-Labelized-Dataset/test"

# Load
labelized_train_df = parse_labelized_csv(labelized_train_csv, labelized_train_base)
labelized_test_df  = parse_labelized_csv(labelized_test_csv, labelized_test_base)

labelized_df = pd.concat([labelized_train_df, labelized_test_df], ignore_index=True)
print(f"Labelized total: {len(labelized_df)} annotations")

# Save
labelized_train_df.to_csv("datasets/Playing-Cards-Labelized-Dataset/labelized_train_converted.csv", index=False)
labelized_test_df.to_csv("datasets/Playing-Cards-Labelized-Dataset/labelized_test_converted.csv", index=False)
labelized_df.to_csv("datasets/Playing-Cards-Labelized-Dataset/labelized_all_converted.csv", index=False)


datasets/Playing-Cards-Labelized-Dataset/train_cards_label.csv: parsed 40089 valid annotations, skipped 0.
datasets/Playing-Cards-Labelized-Dataset/test_cards_label.csv: parsed 8004 valid annotations, skipped 0.
Labelized total: 48093 annotations


In [None]:
# %%
# üìò 6. Parse "The-Complete-Playing-Card-Dataset" (YOLO format, with only .txt and images)

import re
from PIL import Image

COMPLETE_CLASSES = [
    '10c', '10d', '10h', '10s',
    '2c', '2d', '2h', '2s',
    '3c', '3d', '3h', '3s',
    '4c', '4d', '4h', '4s',
    '5c', '5d', '5h', '5s',
    '6c', '6d', '6h', '6s',
    '7c', '7d', '7h', '7s',
    '8c', '8d', '8h', '8s',
    '9c', '9d', '9h', '9s',
    'Ac', 'Ad', 'Ah', 'As',
    'Jc', 'Jd', 'Jh', 'Js',
    'Kc', 'Kd', 'Kh', 'Ks',
    'Qc', 'Qd', 'Qh', 'Qs'
]
COMPLETE_CLASS_TO_ID = {name.lower(): idx for idx, name in enumerate(COMPLETE_CLASSES)}

def parse_complete_yolo_dataset(base_dir: str) -> pd.DataFrame:
    base_dir = Path(base_dir)
    labels_dir = base_dir / "YOLO_Annotations" / "YOLO_Annotations"
    images_dir = base_dir / "Images" / "Images"

    if not labels_dir.exists():
        print(f"‚ö†Ô∏è Labels folder not found: {labels_dir}")
        return pd.DataFrame()

    all_rows = []
    txt_files = sorted(labels_dir.glob("*.txt"))

    for txt_file in txt_files:
        stem = txt_file.stem  # e.g. "10C15", "AC12"
        match = re.match(r"([0-9]{1,2}[cdhsCDHS]|[AJQKajqk][cdhsCDHS])", stem)
        if not match:
            print(f"‚ö†Ô∏è Could not infer class from {txt_file.name}")
            continue

        class_name = match.group(1).lower()
        if class_name not in COMPLETE_CLASS_TO_ID:
            print(f"‚ö†Ô∏è Unknown class '{class_name}' in {txt_file.name}")
            continue

        class_id = COMPLETE_CLASS_TO_ID[class_name]
        image_path = find_image(images_dir, stem)
        if not image_path:
            print(f"‚ö†Ô∏è Missing image for {stem}")
            continue

        with Image.open(image_path) as im:
            img_w, img_h = im.width, im.height

        with open(txt_file, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                _, x_center, y_center, width, height = parts
                all_rows.append([
                    str(image_path), class_id, class_name,
                    float(x_center), float(y_center), float(width), float(height),
                    float(img_w), float(img_h)
                ])

    df = pd.DataFrame(
        all_rows,
        columns=[
            "image", "class_id", "class_name",
            "bbox_x_center", "bbox_y_center",
            "bbox_width", "bbox_height",
            "image_width", "image_height"
        ]
    )

    print(f"‚úÖ Parsed {len(df)} annotations from {labels_dir}")
    return df


# Esegui la conversione
complete_base = "datasets/The-Complete-Playing-Card-Dataset"
complete_df = parse_complete_yolo_dataset(complete_base)

# Salva il CSV risultante
complete_df.to_csv("datasets/The-Complete-Playing-Card-Dataset/complete_converted.csv", index=False)

# Mostra un‚Äôanteprima
display(complete_df.head())


‚ö†Ô∏è Could not infer class from JOKER0.txt
‚ö†Ô∏è Could not infer class from JOKER1.txt
‚ö†Ô∏è Could not infer class from JOKER10.txt
‚ö†Ô∏è Could not infer class from JOKER11.txt
‚ö†Ô∏è Could not infer class from JOKER12.txt
‚ö†Ô∏è Could not infer class from JOKER13.txt
‚ö†Ô∏è Could not infer class from JOKER14.txt
‚ö†Ô∏è Could not infer class from JOKER15.txt
‚ö†Ô∏è Could not infer class from JOKER16.txt
‚ö†Ô∏è Could not infer class from JOKER17.txt
‚ö†Ô∏è Could not infer class from JOKER18.txt
‚ö†Ô∏è Could not infer class from JOKER19.txt
‚ö†Ô∏è Could not infer class from JOKER2.txt
‚ö†Ô∏è Could not infer class from JOKER20.txt
‚ö†Ô∏è Could not infer class from JOKER21.txt
‚ö†Ô∏è Could not infer class from JOKER22.txt
‚ö†Ô∏è Could not infer class from JOKER23.txt
‚ö†Ô∏è Could not infer class from JOKER24.txt
‚ö†Ô∏è Could not infer class from JOKER25.txt
‚ö†Ô∏è Could not infer class from JOKER26.txt
‚ö†Ô∏è Could not infer class from JOKER27.txt
‚ö†Ô∏è Could not infer class from JOK

Unnamed: 0,image,class_id,class_name,bbox_x_center,bbox_y_center,bbox_width,bbox_height,image_width,image_height
0,datasets/The-Complete-Playing-Card-Dataset/Ima...,0,10c,0.325087,0.665365,0.083767,0.039641,4608.0,3456.0
1,datasets/The-Complete-Playing-Card-Dataset/Ima...,0,10c,0.32628,0.623698,0.073134,0.069734,4608.0,3456.0
2,datasets/The-Complete-Playing-Card-Dataset/Ima...,0,10c,0.559462,0.498264,0.092448,0.063079,4608.0,3456.0
3,datasets/The-Complete-Playing-Card-Dataset/Ima...,0,10c,0.513997,0.449508,0.051432,0.068576,4608.0,3456.0
4,datasets/The-Complete-Playing-Card-Dataset/Ima...,0,10c,0.307834,0.513889,0.055339,0.079282,4608.0,3456.0
