In [13]:
from google.colab import drive
import os
import shutil
import pandas as pd
import random

# Montar Google Drive
drive.mount('/content/drive')

# Ajustar caminhos para o Drive
RAW_PATH = "/content/drive/MyDrive/pdi/dataset/raw/skin-cancer-mnist-ham10000"  # Ajuste conforme sua estrutura
OUT_PATH = "/content/dataset/ham10000"

IMG_DIRS = [
    "HAM10000_images_part_1",
    "HAM10000_images_part_2"
]

META = os.path.join(RAW_PATH, "HAM10000_metadata.csv")

# Verificar se existe
print(f"Arquivo existe: {os.path.exists(META)}")

# parâmetros few-shot
N_CLASSES = 5
N_SHOTS = 5
N_TEST = 15

random.seed(42)

# lê metadata
df = pd.read_csv(META)
# seleciona classes mais comuns (evita classes raras)
classes = df['dx'].value_counts().index[:N_CLASSES]

print("Classes usadas:", classes.tolist())

# cria pastas
for split in ['train', 'test']:
    for c in classes:
        os.makedirs(f"{OUT_PATH}/{split}/{c}", exist_ok=True)

# separação
for c in classes:
    imgs = df[df['dx'] == c]['image_id'].tolist()
    random.shuffle(imgs)

    train_imgs = imgs[:N_SHOTS]
    test_imgs  = imgs[N_SHOTS:N_SHOTS + N_TEST]

    for img in train_imgs:
        for d in IMG_DIRS:
            src = os.path.join(RAW_PATH, d, img + ".jpg")
            if os.path.exists(src):
                shutil.copy(src, f"{OUT_PATH}/train/{c}/")

    for img in test_imgs:
        for d in IMG_DIRS:
            src = os.path.join(RAW_PATH, d, img + ".jpg")
            if os.path.exists(src):
                shutil.copy(src, f"{OUT_PATH}/test/{c}/")

print("Separação concluída!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Arquivo existe: True
Classes usadas: ['nv', 'mel', 'bkl', 'bcc', 'akiec']
Separação concluída!


In [14]:
import os

for split in ['train', 'test']:
    print(f"\n=== {split.upper()} ===")
    for classe in os.listdir(f"/content/dataset/ham10000/{split}"):
        path = f"/content/dataset/ham10000/{split}/{classe}"
        n_imgs = len(os.listdir(path))
        print(f"{classe}: {n_imgs} imagens")


=== TRAIN ===
bcc: 5 imagens
bkl: 5 imagens
nv: 5 imagens
mel: 5 imagens
akiec: 5 imagens

=== TEST ===
bcc: 15 imagens
bkl: 15 imagens
nv: 15 imagens
mel: 15 imagens
akiec: 15 imagens
