In [1]:
import os
from scipy.io import loadmat
import numpy as np
import h5py
from PIL import Image

# Paths
dataset_dir1 = 'brainTumorDataPublic_1-766'  # Directory containing .mat files
dataset_dir2 = 'brainTumorDataPublic_767-1532'  # Directory containing .mat files
dataset_dir3 = 'brainTumorDataPublic_1533-2298'  # Directory containing .mat files
dataset_dir4 = 'brainTumorDataPublic_2299-3064'  # Directory containing .mat files
output_dir = 'tumor_classification'  # Output directory for classified images

dataset_dirs = [dataset_dir1,dataset_dir2,dataset_dir3,dataset_dir4]

# Tumor labels and their corresponding directory names
tumor_classes = {
    1: "meningioma",
    2: "glioma",
    3: "pituitary_tumor"
}

# Create output directories if not exist
for class_name in tumor_classes.values():
    class_dir = os.path.join(output_dir, class_name)
    os.makedirs(class_dir, exist_ok=True)

# Process all .mat files
counter = 0
for dataset_dir in dataset_dirs:
    for mat_file in os.listdir(dataset_dir):
        if mat_file.endswith('.mat'):
            filepath = os.path.join(dataset_dir, mat_file)
            with h5py.File(filepath, 'r') as f:
                cjdata = f['cjdata']
                label = int(np.array(cjdata['label'])[0])
                PID = ''.join(chr(i) for i in np.array(cjdata['PID']).flatten())
                image = np.array(cjdata['image'])

                # Normalize and save image
                normalized_image = (image - np.min(image)) / (np.max(image) - np.min(image)) * 255
                normalized_image = normalized_image.astype(np.uint8)
                class_dir = os.path.join(output_dir, tumor_classes[label])
                image_path = os.path.join(class_dir, f"{PID}_{counter}.png")
                Image.fromarray(normalized_image).save(image_path)
                counter += 1
                
print(f"Images successfully saved in {output_dir}")


Images successfully saved in tumor_classification
