In [1]:
import os

# Set the path to the directory
path = 'kaggle_3m'

# List all entries in the directory
entries = os.listdir(path)

# Filter entries to count only directories
directory_count = sum(os.path.isdir(os.path.join(path, entry)) for entry in entries)

print(f"There are {directory_count} directories in {path}")


There are 110 directories in kaggle_3m


In [9]:
import os
import shutil

# Path to the 'kaggle_3m' directory
source_directory = 'kaggle_3m'

# Path where the new 'Train' directory will be created inside 'kaggle_3m'
train_directory = os.path.join('Train')

# Ensure the 'Train' directory exists
if not os.path.exists(train_directory):
    os.makedirs(train_directory)

# List all entries in the 'kaggle_3m' directory that are directories
all_folders = [d for d in os.listdir(source_directory) if os.path.isdir(os.path.join(source_directory, d))]
print(len(all_folders))

# Calculate 80% of these folders
cutoff = int(len(all_folders) * 0.8)

# Move the first 80% of folders to the 'Train' directory
for folder in all_folders[:cutoff]:
    shutil.move(os.path.join(source_directory, folder), os.path.join(train_directory, folder))

# Move all files from subfolders to the 'Train' directory and remove the subfolders
for root, dirs, files in os.walk(train_directory, topdown=False):
    for file in files:
        shutil.move(os.path.join(root, file), train_directory)
    for name in dirs:
        os.rmdir(os.path.join(root, name))  # This removes each empty directory

print(f"Moved {cutoff} folders to {train_directory} and reorganized all files within them.")


110
Moved 88 folders to Train and reorganized all files within them.


In [10]:
import os

# Path where the files are located (assuming it's the 'Train' directory from previous examples)
train_directory = 'Train'

# Collect all .tif files
all_tif_files = [f for f in os.listdir(train_directory) if f.endswith('.tif')]

# Initialize a counter for naming files
counter = 1

# Process files for renaming
for file in all_tif_files:
    if not file.endswith('_mask.tif'):
        # Determine the base name without the extension
        base_name = file[:-4]
        mask_name = base_name + '_mask.tif'

        # Check if the corresponding mask file exists
        if mask_name in all_tif_files:
            # Create new file names
            new_base_name = f'im_{counter:04d}.tif'
            new_mask_name = f'im_{counter:04d}_mask.tif'

            # Rename the image file
            os.rename(os.path.join(train_directory, file), os.path.join(train_directory, new_base_name))
            # Rename the mask file
            os.rename(os.path.join(train_directory, mask_name), os.path.join(train_directory, new_mask_name))

            # Increment the counter after renaming a pair
            counter += 1

print(f"Renamed {counter - 1} file pairs in {train_directory}.")


Renamed 3205 file pairs in Train.


In [11]:
# Base directory where the files are located
base_directory = 'Train'

# Path for the new 'imagesTr' and 'labelsTr' directories
images_tr_directory = os.path.join(base_directory, 'imagesTr')
labels_tr_directory = os.path.join(base_directory, 'labelsTr')

# Ensure the 'imagesTr' and 'labelsTr' directories exist
os.makedirs(images_tr_directory, exist_ok=True)
os.makedirs(labels_tr_directory, exist_ok=True)

# Move files to their respective directories
for file in os.listdir(base_directory):
    if file.endswith('.tif'):  # Check if it is a tif file
        if '_mask.tif' in file:
            # It's a mask file, move it to 'labelsTr'
            shutil.move(os.path.join(base_directory, file), os.path.join(labels_tr_directory, file))
        else:
            # It's an image file, move it to 'imagesTr'
            shutil.move(os.path.join(base_directory, file), os.path.join(images_tr_directory, file))

print(f"Files have been moved to {images_tr_directory} and {labels_tr_directory} respectively.")


Files have been moved to Train/imagesTr and Train/labelsTr respectively.


In [12]:
import os

# Path to the 'labelsTr' directory
labels_tr_directory = 'Train/labelsTr'

# Rename files in 'labelsTr' by removing '_mask'
for file in os.listdir(labels_tr_directory):
    if file.endswith('_mask.tif'):
        # Construct new file name by removing '_mask'
        new_file_name = file.replace('_mask', '')
        # Rename the file
        os.rename(os.path.join(labels_tr_directory, file), os.path.join(labels_tr_directory, new_file_name))

print(f"Files in {labels_tr_directory} have been renamed.")


Files in Train/labelsTr have been renamed.


In [13]:
import os

# Path to the 'imagesTr' directory
images_tr_directory = 'Train/imagesTr'

# Rename files in 'imagesTr' by adding '_0000' before '.tif'
for file in os.listdir(images_tr_directory):
    if file.endswith('.tif'):
        # Construct new file name by inserting '_0000' before '.tif'
        new_file_name = file[:-4] + '_0000.tif'
        # Rename the file
        os.rename(os.path.join(images_tr_directory, file), os.path.join(images_tr_directory, new_file_name))

print(f"Files in {images_tr_directory} have been renamed.")

Files in Train/imagesTr have been renamed.


In [16]:
import os

# Set the path to the directory
path = 'nnUNet/nnUNet_raw/Dataset001_Brain/imagesTr'

# List all entries in the directory
entries = os.listdir(path)

# Filter entries to count only directories
print(len(entries))

# print(f"There are {directory_count} directories in {path}")

3205


In [17]:
import os

# Set the path to the directory
path = 'nnUNet/nnUNet_raw/Dataset001_Brain/labelsTr'

# List all entries in the directory
entries = os.listdir(path)

# Filter entries to count only directories
print(len(entries))

3205


In [19]:
import os
import numpy as np
import imageio

# Path to the 'imagesTr' directory
images_tr_directory = 'nnUNet/nnUNet_raw/Dataset001_Brain/labelsTr'

# Process each .tif file in the directory
for file_name in os.listdir(images_tr_directory):
    if file_name.endswith('.tif'):
        file_path = os.path.join(images_tr_directory, file_name)
        
        # Load the image
        image = imageio.imread(file_path)
        
        # Convert image to a numpy array
        image_array = np.array(image)

        # Binarize the image: 0 stays 0, greater than 0 becomes 1
        image_array = np.where(image_array > 0, 1, 0)
        
        # Save the binarized image back to disk
        imageio.imwrite(file_path, image_array.astype(np.uint8))

print("All images have been binarized: 0 remains 0, values > 0 are set to 1.")



  image = imageio.imread(file_path)


All images have been binarized: 0 remains 0, values > 0 are set to 1.
