In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os

# Set the working directory
cbis_path = '/content/drive/MyDrive/colab_scripts/repository/datasets'
os.chdir(cbis_path)

# Confirm current directory
print("Current Directory:", os.getcwd())

Current Directory: /content/drive/MyDrive/colab_scripts/repository/datasets


In [3]:
!pip install tqdm
import pandas as pd
import os
import shutil
from tqdm import tqdm
from ast import literal_eval




In [4]:
csv_path = 'CBIS-DDSM/csv/test.csv'
df = pd.read_csv(csv_path)

In [5]:
# ---üìÅ Function to Create Images Folder with Progress ---

def create_images_folder(df, output_dir='CBIS-DDSM/NEW/images2'):
    os.makedirs(output_dir, exist_ok=True)
    grouped = df.groupby('new name')

    print(f"\nüìÅ Creating images in: {output_dir}")
    for new_name, group in tqdm(grouped, desc="Copying images"):
        # üü¢ Find the row with the lowest abnormality id
        group_sorted = group.sort_values('abnormality id')
        ref_row = group_sorted.iloc[0]
        img_path = ref_row['jpg image file path']

        if not os.path.exists(img_path):
            print(f"‚ö†Ô∏è Image not found: {img_path}")
            continue

        dest_path = os.path.join(output_dir, f"{new_name}.jpg")
        shutil.copyfile(img_path, dest_path)


In [6]:
# ---üìù Function to Create Labels Folder with Progress ---
def create_labels_folder(df, output_dir='CBIS-DDSM/NEW/labels2'):
    os.makedirs(output_dir, exist_ok=True)
    grouped = df.groupby('new name')

    print(f"\nüìù Creating labels in: {output_dir}")
    for new_name, group in tqdm(grouped, desc="Writing labels"):
        label_lines = []

        # üü¢ Sort rows by abnormality id
        group_sorted = group.sort_values('abnormality id')

        for _, row in group_sorted.iterrows():
            try:
                bbox = literal_eval(row['yolo_bbox'])
                x_center, y_center, width, height = bbox
            except:
                print(f"‚ö†Ô∏è Invalid bbox for '{new_name}': {row['yolo_bbox']}")
                continue

            pathology = row['pathology'].strip().upper()
            if pathology in ['BENIGN', 'BENIGN_WITHOUT_CALLBACK']:
                cls = 0
            elif pathology == 'MALIGNANT':
                cls = 1
            else:
                print(f"‚ö†Ô∏è Unknown pathology in '{new_name}': {pathology}")
                continue

            label_line = f"{cls} {x_center} {y_center} {width} {height}"
            label_lines.append(label_line)

        if label_lines:
            label_path = os.path.join(output_dir, f"{new_name}.txt")
            with open(label_path, 'w') as f:
                f.write("\n".join(label_lines))



In [7]:
# üìå Step 4: Run Everything
create_images_folder(df)
create_labels_folder(df)

print("\n‚úÖ Done! Images and label files are created with proper order and references.")



üìÅ Creating images in: CBIS-DDSM/NEW/images2


Copying images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 348/348 [03:47<00:00,  1.53it/s]



üìù Creating labels in: CBIS-DDSM/NEW/labels2


Writing labels: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 348/348 [00:04<00:00, 86.47it/s]


‚úÖ Done! Images and label files are created with proper order and references.



