In [1]:
import os
import shutil
import pandas as pd
import zipfile
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
base_path = "/content"
drive_path = "/content/drive/MyDrive/GlaucomaProject"

In [4]:
os.makedirs(drive_path, exist_ok=True)

In [5]:
zip_files = [
    "DRISHTI.zip",
    "RIMONE.zip",
    "ACRIMA.zip",
    "REFUGE_preprocessed.zip",
    "EyePACS.zip"
]

In [6]:
for zip_file in zip_files:
    zip_path = os.path.join(base_path, zip_file)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(base_path)
    print(f"✅ Unzipped: {zip_file}")

✅ Unzipped: DRISHTI.zip
✅ Unzipped: RIMONE.zip
✅ Unzipped: ACRIMA.zip
✅ Unzipped: REFUGE_preprocessed.zip
✅ Unzipped: EyePACS.zip


In [7]:
output_dir = os.path.join(base_path, "unified_dataset")
os.makedirs(os.path.join(output_dir, "images"), exist_ok=True)

In [8]:
metadata = []

In [9]:
def copy_images(dataset_name, base_path, class_map):
    for class_name, label in class_map.items():
        path = os.path.join(base_path, class_name)
        if not os.path.exists(path):
            continue
        for fname in os.listdir(path):
            if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
                continue
            src_path = os.path.join(path, fname)
            new_name = f"{dataset_name}_{fname}"
            dst_path = os.path.join(output_dir, "images", new_name)
            shutil.copy(src_path, dst_path)
            metadata.append({"image_path": f"images/{new_name}", "label": label, "dataset": dataset_name})

In [10]:
copy_images("DRISHTI", os.path.join(base_path, "DRISHTI/Train/Images"), {"Glaucoma": 1, "Normal": 0})
copy_images("DRISHTI", os.path.join(base_path, "DRISHTI/Test"), {"Glaucoma": 1, "Normal": 0})

In [11]:
copy_images("RIMONE", os.path.join(base_path, "RIMONE/Train"), {"Glaucoma": 1, "Normal": 0})
copy_images("RIMONE", os.path.join(base_path, "RIMONE/Test"), {"Glaucoma": 1, "Normal": 0})

In [12]:
copy_images("ACRIMA", os.path.join(base_path, "ACRIMA/Train"), {"Glaucoma": 1, "Normal": 0})
copy_images("ACRIMA", os.path.join(base_path, "ACRIMA/Test"), {"Glaucoma": 1, "Normal": 0})

In [13]:
copy_images("REFUGE", os.path.join(base_path, "REFUGE/Train"), {"Glaucoma": 1, "Normal": 0})

In [14]:
for split in ["train", "test", "validation"]:
    split_path = os.path.join(base_path, f"eyepac-light-v2-512-jpg/{split}")
    copy_images("EYEPACS", split_path, {"RG": 1, "NRG": 0})

In [15]:
labels_csv = os.path.join(output_dir, "labels.csv")
df = pd.DataFrame(metadata)
df.to_csv(labels_csv, index=False)
print(f"✅ Unified dataset created: {len(df)} images from {df['dataset'].nunique()} datasets")

✅ Unified dataset created: 11580 images from 5 datasets


In [16]:
shutil.make_archive("unified_dataset", 'zip', output_dir)

'/content/unified_dataset.zip'

In [17]:
shutil.move("unified_dataset.zip", os.path.join(drive_path, "unified_dataset.zip"))
print(f"✅ Unified dataset zipped and moved to Drive: {drive_path}")

✅ Unified dataset zipped and moved to Drive: /content/drive/MyDrive/GlaucomaProject
