In [17]:
import shutil
import os

In [18]:
categories = ['Benign cases', 'Malignant cases', 'Normal cases']

In [19]:
def count_files(directory, directory_key):
    file_count = sum([len(files) for r, d, files in os.walk(directory)])
    print('Number of files in {} directory = {}'.format(directory_key, file_count))
    return file_count

def copy_normal_test_files(dest, chest_ctscan_images_dataset_src):
    dispositions = ['test']
    dest_target = os.path.join(dest, 'Normal cases')
    chest_ctscan_count = count_files(chest_ctscan_images_dataset_src, 'chest_ctscan')
    
    print('Copying over normal test disposition files')
    normal_index = 5000
    for disposition in dispositions:
        chest_ctscan_normal_file_path = os.path.join(chest_ctscan_images_dataset_src, disposition)
        normal_file_path = os.path.join(chest_ctscan_normal_file_path, 'normal')
            
        for file in os.listdir(normal_file_path):
            dest_filename = os.path.join(dest_target,'Normal case ({}).png'.format(normal_index))
            normal_index = normal_index + 1
            shutil.copy(os.path.join(normal_file_path, file), dest_filename)
                
    count_files(dest, 'destination')

def copy_normal_files(dest, chest_ctscan_images_dataset_src):
    dispositions = ['train', 'valid']
    dest_target = os.path.join(dest, 'Normal cases')
    chest_ctscan_count = count_files(chest_ctscan_images_dataset_src, 'chest_ctscan')
    
    print('Copying over normal train, valid disposition files')
    normal_index = 5000
    for disposition in dispositions:
        chest_ctscan_normal_file_path = os.path.join(chest_ctscan_images_dataset_src, disposition)
        normal_file_path = os.path.join(chest_ctscan_normal_file_path, 'normal')
            
        for file in os.listdir(normal_file_path):
            dest_filename = os.path.join(dest_target,'Normal case ({}).png'.format(normal_index))
            normal_index = normal_index + 1
            shutil.copy(os.path.join(normal_file_path, file), dest_filename)
                
    count_files(dest, 'destination')
    
def copy_malignant_files(dest, chest_ctscan_images_dataset_src):
    # Unfortunatly the subdirectory names are not consistent for test, train and valid...
    dispositions = [os.path.join('train', 'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib'),
                    os.path.join('train', 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa'),
                    os.path.join('train', 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa'),
                    os.path.join('valid', 'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib'),
                    os.path.join('valid', 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa'),
                    os.path.join('valid', 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa')]
    dest_target = os.path.join(dest, 'Malignant cases')
    print('Copying over malignant disposition files')
    normal_index = 5000
    for disposition in dispositions:
        malignant_file_path = os.path.join(chest_ctscan_images_dataset_src, disposition)
            
        for file in os.listdir(malignant_file_path):
            dest_filename = os.path.join(dest_target,'Malignant case ({}).png'.format(normal_index))
            normal_index = normal_index + 1
            shutil.copy(os.path.join(malignant_file_path, file), dest_filename)
                
    print('{} files for train, valid malignant'.format(count_files(dest, 'destination')))


def copy_malignant_test_files(dest, chest_ctscan_images_dataset_src):
    # Unfortunatly the subdirectory names are not consistent for test, train and valid...
    dispositions = [os.path.join('test', 'adenocarcinoma'), 
                    os.path.join('test', 'large.cell.carcinoma'),
                    os.path.join('test', 'squamous.cell.carcinoma')]
    dest_target = os.path.join(dest, 'Malignant cases')
    print('Copying over malignant disposition files')
    normal_index = 5000
    for disposition in dispositions:
        malignant_file_path = os.path.join(chest_ctscan_images_dataset_src, disposition)
            
        for file in os.listdir(malignant_file_path):
            dest_filename = os.path.join(dest_target,'Malignant case ({}).png'.format(normal_index))
            normal_index = normal_index + 1
            shutil.copy(os.path.join(malignant_file_path, file), dest_filename)
                
    print('{} files for test malignant '.format(count_files(dest, 'destination')))
    

In [20]:
# Source path 
src = r'../../data/cancer-detection-model/The IQ-OTHNCCD lung cancer dataset'
    
# Destination path 
dest = r'../../data/cancer-detection-model/MERGED-REVISED The IQ-OTHNCCD lung cancer dataset'
dest_test = r'../../data/cancer-detection-model/TEST lung cancer dataset'
 
# Directory of images to be copied into MERGED directory
chest_ctscan_images_dataset_src =r'../../data/cancer-detection-model/Chest CT-Scan images Dataset'
    
if (os.path.exists(dest)):
    print('ERROR Output directory {} already exists and should be deleted'.format(dest))
else:
    src_file_count = count_files(src, 'source')
    
    # Copy the content of 
    # source to destination 
    destination = shutil.copytree(src, dest) 

    dest_file_count = count_files(dest, 'destination')
    
    if (src_file_count != dest_file_count):
        print("Error file count from source and destination differs")
    else:
        print('Copy succeeded for {}...'.format(dest))
        
        # Copy normal files to MERGED/Normal Cases directory
        copy_normal_files(dest, chest_ctscan_images_dataset_src)
        
        # Copy mailigannt files to MERGED/Malignant case directory
        copy_malignant_files(dest, chest_ctscan_images_dataset_src)

if (os.path.exists(dest_test)):
    print('ERROR Output directory {} already exists and should be deleted'.format(dest_test))
else:
    os.mkdir(dest_test)
    for category in categories:
        dest = os.path.join(dest_test, category)
        os.mkdir(dest)       

        if (category == 'Benign'):
            continue
        elif (category == 'Normal'):
            copy_normal_test_files(dest_test, chest_ctscan_images_dataset_src)
        else:
            # Copy mailigannt files to MERGED/Malignant case directory
            copy_malignant_test_files(dest_test, chest_ctscan_images_dataset_src)

        print('Copy succeeded for {}...'.format(dest))
        dest_file_count = count_files(dest_test, 'destination')
        

    

Number of files in source directory = 1098
Number of files in destination directory = 1098
Copy succeeded for ../../data/cancer-detection-model/MERGED-REVISED The IQ-OTHNCCD lung cancer dataset...
Number of files in chest_ctscan directory = 1000
Copying over normal train, valid disposition files
Number of files in destination directory = 1259
Copying over malignant disposition files
Number of files in destination directory = 1783
1783 files for train, valid malignant
Copying over malignant disposition files


FileNotFoundError: [Errno 2] No such file or directory: '../../data/cancer-detection-model/TEST lung cancer dataset\\Malignant cases\\Malignant case (5000).png'