In [3]:
import os
import shutil
import random

source_dir = 'DDI/images'
base_dir = 'DDI_data'
train_dir = os.path.join(base_dir, 'train/class_1')
valid_dir = os.path.join(base_dir, 'valid/class_1')
test_dir = os.path.join(base_dir, 'test/class_1')

for dir in [train_dir, valid_dir, test_dir]:
    os.makedirs(dir, exist_ok=True)

files = [f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))]
random.shuffle(files)

# Split files
train_split = int(0.8 * len(files))
valid_split = int(0.9 * len(files))  # 10% for validation, remaining for test
train_files = files[:train_split]
valid_files = files[train_split:valid_split]
test_files = files[valid_split:]

def copy_files(files, destination):
    for f in files:
        shutil.copy(os.path.join(source_dir, f), destination)

copy_files(train_files, train_dir)
copy_files(valid_files, valid_dir)
copy_files(test_files, test_dir)

print("Files have been split and copied.")

Files have been split and copied.


In [4]:
print("Total files found:", len(files))
print("Some file names:", files[:5])  # Print the first 5 file names to check

Total files found: 656
Some file names: ['000143.png', '000158.png', '000619.png', '000383.png', '000080.png']


In [5]:
print("Total files found:", len(train_files))
print("Some file names:", train_files[:5])  # Print the first 5 file names to check


Total files found: 524
Some file names: ['000143.png', '000158.png', '000619.png', '000383.png', '000080.png']


In [6]:
copy_files(train_files, train_dir)

In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import shutil

data_folder = 'DDI/images'
metadata_file = os.path.join('DDI/ddi_metadata.csv')
output_folder = 'DDI_data'

# Read metadata
metadata = pd.read_csv(metadata_file)

# Split the dataset
train_val, test = train_test_split(metadata, test_size=0.1, random_state=42)
train, val = train_test_split(train_val, test_size=1/9, random_state=42)  # 1/9 of 90% is 10% of the total

# Function to create folders and subfolders
def create_folders(path):
    for subset in ['train', 'val', 'test']:
        for category in ['malignant', 'benign']:
            os.makedirs(os.path.join(path, subset, category), exist_ok=True)

create_folders(output_folder)

def copy_files(df, subset):
    for _, row in df.iterrows():
        file_name = row['DDI_file']
        category = 'malignant' if row['malignant'] else 'benign'
        src = os.path.join(data_folder, file_name)
        dst = os.path.join(output_folder, subset, category, file_name)
        shutil.copy(src, dst)

copy_files(train, 'train')
copy_files(val, 'val')
copy_files(test, 'test')

print("Dataset successfully split and copied.")

Dataset successfully split and copied.


#latest

In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import shutil

# Define the paths
data_folder = 'DDI/images'
metadata_file = os.path.join('DDI/ddi_metadata.csv')
output_folder = 'DDI_data'

# Read the metadata
metadata = pd.read_csv(metadata_file)

# Ensure the output folder and subfolders structure
def create_folders(path):
    for subset in ['train', 'val', 'test']:
        for category in ['malignant', 'benign']:
            os.makedirs(os.path.join(path, subset, category), exist_ok=True)

create_folders(output_folder)

import pandas as pd
from sklearn.model_selection import train_test_split
# Assume df is your DataFrame and it contains two categorical columns, 'label1' and 'label2'
metadata['composite_label'] = metadata['malignant'].astype(str) + "_" + metadata['skin_tone'].astype(str)

# Now use the composite_label for stratification
train, temp_df = train_test_split(metadata, test_size=0.4, random_state=42, stratify=metadata['composite_label'])
val, test = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['composite_label'])

# # Splitting the dataset with stratification
# train_val, test = train_test_split(metadata, test_size=0.1, random_state=42, stratify=metadata['malignant'])
# train, val = train_test_split(train_val, test_size=1/9, random_state=42, stratify=train_val['malignant'])  # 1/9 of 90% is 10% of the total

# Function to copy files to their respective folders based on classification
def copy_files(df, subset):
    for _, row in df.iterrows():
        file_name = row['DDI_file']
        category = 'malignant' if row['malignant'] else 'benign'
        src = os.path.join(data_folder, file_name)
        dst = os.path.join(output_folder, subset, category, file_name)
        shutil.copy(src, dst)

copy_files(train, 'train')
copy_files(val, 'val')
copy_files(test, 'test')

print("Dataset successfully split and copied with stratification.")


Dataset successfully split and copied with stratification.


# create ham100000 data

In [1]:
import pandas as pd
import os
import shutil

directory_path = 'ISIC-images/'

metadata_path = os.path.join(directory_path, 'metadata.csv')
metadata = pd.read_csv(metadata_path)

# Create subfolders for 'benign' and 'malignant' if they do not exist
benign_path = os.path.join(directory_path, 'benign')
malignant_path = os.path.join(directory_path, 'malignant')

if not os.path.exists(benign_path):
    os.makedirs(benign_path)

if not os.path.exists(malignant_path):
    os.makedirs(malignant_path)

# Loop through the rows in the DataFrame
for index, row in metadata.iterrows():
    # Get the image file name and the label
    image_filename = row['isic_id'] + '.JPG'  # Assuming the images are in .jpg format
    label = row['benign_malignant']
    
    # Set the source path for the image
    source_path = os.path.join(directory_path, image_filename)
    
    # Check if the image file exists
    if os.path.exists(source_path):
        # Determine the destination path based on the label
        if label == 'benign':
            destination_path = os.path.join(benign_path, image_filename)
        elif label == 'malignant':
            destination_path = os.path.join(malignant_path, image_filename)
        else:
            continue  # Skip if label is neither benign nor malignant
        
        # Move the image to the appropriate folder
        shutil.move(source_path, destination_path)
        print(f'Moved {image_filename} to {destination_path}')
    else:
        print(f'Warning: {source_path} does not exist and was not moved.')

print('Image reorganization complete.')


Moved ISIC_0024306.JPG to ISIC-images/benign/ISIC_0024306.JPG
Moved ISIC_0024307.JPG to ISIC-images/benign/ISIC_0024307.JPG
Moved ISIC_0024308.JPG to ISIC-images/benign/ISIC_0024308.JPG
Moved ISIC_0024309.JPG to ISIC-images/benign/ISIC_0024309.JPG
Moved ISIC_0024310.JPG to ISIC-images/malignant/ISIC_0024310.JPG
Moved ISIC_0024311.JPG to ISIC-images/benign/ISIC_0024311.JPG
Moved ISIC_0024313.JPG to ISIC-images/malignant/ISIC_0024313.JPG
Moved ISIC_0024314.JPG to ISIC-images/benign/ISIC_0024314.JPG
Moved ISIC_0024315.JPG to ISIC-images/malignant/ISIC_0024315.JPG
Moved ISIC_0024316.JPG to ISIC-images/benign/ISIC_0024316.JPG
Moved ISIC_0024317.JPG to ISIC-images/benign/ISIC_0024317.JPG
Moved ISIC_0024319.JPG to ISIC-images/benign/ISIC_0024319.JPG
Moved ISIC_0024320.JPG to ISIC-images/benign/ISIC_0024320.JPG
Moved ISIC_0024321.JPG to ISIC-images/benign/ISIC_0024321.JPG
Moved ISIC_0024322.JPG to ISIC-images/benign/ISIC_0024322.JPG
Moved ISIC_0024323.JPG to ISIC-images/malignant/ISIC_0024323.