In [None]:
# python
import os
import pandas as pd
from sklearn.model_selection import train_test_split

isic_root = os.path.join('data', 'ISIC')
meta_csv = os.path.join(isic_root, 'HAM10000_metadata.csv')
image_dirs = [
    os.path.join(isic_root, 'HAM10000_images_part_1'),
    os.path.join(isic_root, 'HAM10000_images_part_2'),
]

if not os.path.isfile(meta_csv):
    raise FileNotFoundError(f"Не найден файл метаданных: {meta_csv}")
for d in image_dirs:
    if not os.path.isdir(d):
        raise FileNotFoundError(f"Не найдена папка с изображениями: {d}")

df = pd.read_csv(meta_csv)
required_cols = {'image_id', 'dx'}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"В метаданных отсутствуют столбцы: {missing}")

path_by_id = {}
for d in image_dirs:
    for fname in os.listdir(d):
        if fname.lower().endswith('.jpg'):
            img_id = os.path.splitext(fname)[0]
            path_by_id[img_id] = os.path.join(d, fname)

df['path'] = df['image_id'].map(path_by_id)
df = df.dropna(subset=['path']).reset_index(drop=True)

X = df['path'].tolist()
y = df['dx'].tolist()
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

train_df = pd.DataFrame({'path': X_train, 'label': y_train})
test_df = pd.DataFrame({'path': X_test, 'label': y_test})

train_csv = os.path.join(isic_root, 'split_train.csv')
test_csv = os.path.join(isic_root, 'split_test.csv')
train_df.to_csv(train_csv, index=False)
test_df.to_csv(test_csv, index=False)

print(f"Сохранены списки:\n  {train_csv}\n  {test_csv}")