In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import os
import yaml
from IPython.display import display, Markdown

# --- STEP 1: DEFINE PATHS AND MAPPING ---

# **IMPORTANT: CHANGE THIS LINE TO YOUR ACTUAL DATASET ROOT PATH**
DATASET_ROOT = '/content/drive/MyDrive/dataset_#01'

# The original class indices for this dataset are:
# 0: 'Caries'
# 1: 'Cavity'
# 2: 'Crack'
# 3: 'Tooth'

# Map OLD class index to NEW class index
# New classes: ['Cavity', 'Non Cavity'] (Indices 0, 1)
CLASS_MAPPING = {
    # Decay/Cavity class -> New Index 0 ('Cavity')
    0: 0, # 'Caries'
    1: 0, #'Cavity'

    # Non-Cavity/Other Disease classes -> New Index 1 ('Non-Cavity')
    2: 1, # 'Crack'
    3: 1, # 'Tooth'
}

# Directories to process (assuming the standard Roboflow structure)
LABEL_DIRS = ['test/labels', 'train/labels', 'valid/labels']

display(Markdown(f"## ‚öôÔ∏è Starting Class Re-Mapping"))
display(Markdown(f"**Dataset Root Directory:** `{DATASET_ROOT}`"))
display(Markdown(f"**Mapping:** {CLASS_MAPPING}"))
display(Markdown("---"))


# --- STEP 2: DEFINE THE LABEL UPDATE FUNCTION (No Change) ---

def update_yolo_labels(labels_dir_relative, mapping):
    """Updates class indices and provides accurate counts."""
    full_path = os.path.join(DATASET_ROOT, labels_dir_relative)

    if not os.path.exists(full_path):
        print(f"‚ö†Ô∏è Warning: Directory NOT FOUND: {full_path}")
        return

    all_files = os.listdir(full_path)
    txt_files = [f for f in all_files if f.endswith('.txt') and f != 'classes.txt']

    if not txt_files:
        print(f"‚ÑπÔ∏è No .txt files found in: {labels_dir_relative} (Check folder name casing!)")
        return

    processed_count = 0
    for filename in txt_files:
        filepath = os.path.join(full_path, filename)
        new_lines = []
        modified = False

        with open(filepath, 'r') as f:
            lines = f.readlines()

        for line in lines:
            parts = line.strip().split()
            if len(parts) < 5:
                continue

            try:
                old_index = int(parts[0])
            except ValueError:
                continue

            # Apply mapping
            if old_index in mapping:
                new_index = mapping[old_index]
                # Only mark as modified if the index actually changes
                if new_index != old_index:
                    modified = True

                new_line = f"{new_index} {' '.join(parts[1:])}\n"
                new_lines.append(new_line)
            else:
                # Keep original if not in mapping
                new_lines.append(line)

        # Write back and increment count
        with open(filepath, 'w') as f:
            f.writelines(new_lines)

        processed_count += 1

    print(f"‚úÖ Successfully processed {processed_count} files in {labels_dir_relative}")

# --- STEP 3: EXECUTE LABEL UPDATES (No Change) ---

for label_dir in LABEL_DIRS:
    update_yolo_labels(label_dir, CLASS_MAPPING)


# --- STEP 4: UPDATE THE data.yaml FILE ---

yaml_path = os.path.join(DATASET_ROOT, 'data.yaml')

display(Markdown("---"))
display(Markdown("## üìÑ Updating `data.yaml`"))

# Load the existing data.yaml
if not os.path.exists(yaml_path):
    print(f"‚ùå Error: data.yaml not found at {yaml_path}. Cannot proceed with final step.")
else:
    with open(yaml_path, 'r') as f:
        data_yaml = yaml.safe_load(f)

    # Update the number of classes (nc) to 2
    data_yaml['nc'] = 2

    # Update the class names to the new binary labels
    data_yaml['names'] = ['Cavity', 'Non Cavity']

    # Remove the roboflow section to clean up the config
    if 'roboflow' in data_yaml:
        del data_yaml['roboflow']

    # Write the updated data.yaml back to the file
    with open(yaml_path, 'w') as f:
        yaml.safe_dump(data_yaml, f, sort_keys=False) # sort_keys=False to keep a clean look

    print(f"‚úÖ Successfully updated {yaml_path}")
    display(Markdown("### New `data.yaml` Contents:"))
    !cat {yaml_path}

## ‚öôÔ∏è Starting Class Re-Mapping

**Dataset Root Directory:** `/content/drive/MyDrive/dataset_#01`

**Mapping:** {0: 0, 1: 0, 2: 1, 3: 1}

---

‚úÖ Successfully processed 253 files in test/labels
‚úÖ Successfully processed 1991 files in train/labels
‚úÖ Successfully processed 251 files in valid/labels


---

## üìÑ Updating `data.yaml`

‚úÖ Successfully updated /content/drive/MyDrive/dataset_#01/data.yaml


### New `data.yaml` Contents:

train: ../train/images
val: ../valid/images
test: ../test/images
nc: 2
names:
- Cavity
- Non Cavity
