.mat file and save them as png

In [3]:
import os
import numpy as np
import scipy.io
import h5py
import cv2
from tqdm import tqdm

input_path = r'D:\IIT\Subjects\(4606)Machine Vision\CW\Develo\DataSet\U-Net\mat_data'
output_img_path = r'D:\IIT\Subjects\(4606)Machine Vision\CW\Develo\DataSet\U-Net\processed\images'
output_mask_path = r'D:\IIT\Subjects\(4606)Machine Vision\CW\Develo\DataSet\U-Net\processed\masks'

os.makedirs(output_img_path, exist_ok=True)
os.makedirs(output_mask_path, exist_ok=True)

IMG_SIZE = 256

mat_files = [f for f in os.listdir(input_path) if f.endswith('.mat')]
print(f"Found {len(mat_files)} .mat files")

def load_mat_file(path):
    try:
        # Try with scipy (non-HDF5)
        data = scipy.io.loadmat(path)
        cjdata = data['cjdata']
        image = cjdata['image'][0][0]
        mask = cjdata['tumorMask'][0][0]
    except NotImplementedError:
        # Use h5py for MATLAB v7.3 HDF5 files
        with h5py.File(path, 'r') as f:
            image = np.array(f['cjdata']['image']).T
            mask = np.array(f['cjdata']['tumorMask']).T
    return image, mask

for file in tqdm(mat_files, desc="Processing"):
    try:
        full_path = os.path.join(input_path, file)
        image, mask = load_mat_file(full_path)

        # Normalize image
        image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
        image = image.astype(np.uint8)

        # Resize
        image_resized = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        mask_resized = cv2.resize(mask.astype(np.uint8), (IMG_SIZE, IMG_SIZE))

        # Save
        base_name = os.path.splitext(file)[0]
        image_filename = os.path.join(output_img_path, base_name + ".png")
        mask_filename = os.path.join(output_mask_path, base_name + ".png")

        cv2.imwrite(image_filename, image_resized)
        cv2.imwrite(mask_filename, mask_resized * 255)

    except Exception as e:
        print(f"Failed to process {file}: {e}")


Found 3064 .mat files


Processing: 100%|██████████| 3064/3064 [00:31<00:00, 97.19it/s] 


Dataset splitting

In [4]:
import os
import shutil
import random

# Update this to your actual base path
base_dir = r'D:\IIT\Subjects\(4606)Machine Vision\CW\Develo\DataSet\U-Net\processed'
images_dir = os.path.join(base_dir, 'images')
masks_dir = os.path.join(base_dir, 'masks')

# Output path
split_base_dir = os.path.join(base_dir, 'split')
splits = ['train', 'val', 'test']
split_ratio = {'train': 0.7, 'val': 0.2, 'test': 0.1}

# Create necessary directories
for split in splits:
    os.makedirs(os.path.join(split_base_dir, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(split_base_dir, split, 'masks'), exist_ok=True)

# Get all image files
image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]
random.shuffle(image_files)

# Calculate split indexes
total = len(image_files)
train_end = int(split_ratio['train'] * total)
val_end = train_end + int(split_ratio['val'] * total)

split_files = {
    'train': image_files[:train_end],
    'val': image_files[train_end:val_end],
    'test': image_files[val_end:]
}

# Copy files
for split, files in split_files.items():
    for file in files:
        shutil.copy(os.path.join(images_dir, file), os.path.join(split_base_dir, split, 'images', file))
        shutil.copy(os.path.join(masks_dir, file), os.path.join(split_base_dir, split, 'masks', file))

print("✅ Data successfully split into train, val, and test sets.")


✅ Data successfully split into train, val, and test sets.
