# Import necessary libraries


In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import shutil
from pathlib import Path

### Create directories

- Loops through `base_dirs` and creates the specified directories if they don't already exist.
- Uses `Path.mkdir()` to create the directory and ensure parent directories are created if needed.
- Prints a confirmation message for each created directory.


In [2]:
base_dirs = [
        'dataset_processed',
    ]
    
for dir_path in base_dirs:
    Path(dir_path).mkdir(parents=True, exist_ok=True)
    print(f"Created directory: {dir_path}")

Created directory: dataset_processed


# MPIImageProcessor Class

- **`__init__`**: Initializes the image processor, taking source and output directories as arguments. Creates the output directory if it doesn't exist.
  
- **`process_all_patients`**: Loops through each patient folder in the source directory and processes images for each patient. It extracts patient ID and condition from the folder name, then calls `process_patient_images`.

- **`process_patient_images`**: Processes all images for a given patient, including AC Display images and QPS/QGC images based on the file names. It also stores metadata about the patient.

- **`extract_views`**: Extracts different views (SA, VLA, HLA) from the AC Display image and saves them as separate files.

- **`process_image_qps`**: Processes QPS images to extract views for stress, rest, and reversibility, and saves them. It also extracts a dashboard from the QPS image.

- **`process_image_qgc`**: Processes QGC images to extract a single view and a dashboard, then saves them.

- **`save_metadata`**: Saves the metadata collected (patient ID and condition) to a CSV file in the output directory.


In [7]:
class MPIImageProcessor:
    def __init__(self, source_dir, output_dir):
        """Initialize the MPI Image Processor.
        
        Args:
            source_dir: Directory containing the source MPI images
            output_dir: Directory where processed images will be saved
        """
        self.source_dir = source_dir
        self.output_dir = output_dir
        self.metadata = []
        
        # Create output directory if it doesn't exist
        os.makedirs(self.output_dir, exist_ok=True)
    
    def process_all_patients(self):
        """Process all patient folders in the source directory."""
        for folder in os.listdir(self.source_dir):
            folder_path = os.path.join(self.source_dir, folder)
            if not os.path.isdir(folder_path):
                continue
                
            # Extract patient ID and condition from folder name
            patient_id = folder.split()[0]
            condition = folder.split('(')[1].split(')')[0] if '(' in folder else 'Unknown'
            
            # Create patient output directory
            patient_output_dir = os.path.join(self.output_dir, patient_id)
            os.makedirs(patient_output_dir, exist_ok=True)
            
            # Process all images for this patient
            self.process_patient_images(folder_path, patient_id, condition)
            
        # Save metadata as CSV
        self.save_metadata()
    
    def process_patient_images(self, folder_path, patient_id, condition):
        """Process all images for a specific patient.
        
        Args:
            folder_path: Path to the patient's folder
            patient_id: Patient ID
            condition: Patient's medical condition
        """
        # Process AC Display images
        ac_display_path = os.path.join(folder_path, f"{patient_id} AC Display.bmp")
        if os.path.exists(ac_display_path):
            self.extract_views(ac_display_path, patient_id, 'AC')
        
        # Process other images in the folder
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            if not file.lower().endswith(('.bmp', '.png', '.jpg', '.jpeg')):
                continue
                
            upper_file = file.upper()
            if ('QPS' in upper_file or 'QPC' in upper_file) and 'clear' in file.lower() and 'AC' in upper_file:
                self.process_image_qps(file_path, patient_id, 'AC')
            elif 'QGS' in upper_file and 'clear' in file.lower() and 'stress' in file.lower():
                self.process_image_qgc(file_path, patient_id, 'Stress')
            elif 'QGS' in upper_file and 'clear' in file.lower() and 'rest' in file.lower():
                self.process_image_qgc(file_path, patient_id, 'Rest')
        
        # Add patient metadata
        self.metadata.append({'patient_id': patient_id, 'condition': condition})
    
    def extract_views(self, image_path, patient_id, image_type):
        """Extract SA, VLA, and HLA views from a Display image.
        
        Args:
            image_path: Path to the image file
            patient_id: Patient ID
            image_type: Type of image (e.g., 'AC')
        """
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}")
            return
            
        sa_coords = {'x1': 17, 'x2': 1023, 'y1': 82, 'y2': 309} 
        vla_coords = {'x1': 17, 'x2': 1023, 'y1': 352, 'y2': 566} 
        hla_coords = {'x1': 17, 'x2': 1023, 'y1': 608, 'y2': 819}
        
        # Extract views
        sa_view = img[sa_coords['y1']:sa_coords['y2'], sa_coords['x1']:sa_coords['x2']]
        vla_view = img[vla_coords['y1']:vla_coords['y2'], vla_coords['x1']:vla_coords['x2']]
        hla_view = img[hla_coords['y1']:hla_coords['y2'], hla_coords['x1']:hla_coords['x2']]
        
        # Save extracted views
        output_folder = os.path.join(self.output_dir, patient_id)
        os.makedirs(output_folder, exist_ok=True)
        
        cv2.imwrite(f'{output_folder}/{image_type}_SA.bmp', sa_view)
        cv2.imwrite(f'{output_folder}/{image_type}_VLA.bmp', vla_view)
        cv2.imwrite(f'{output_folder}/{image_type}_HLA.bmp', hla_view)
    
    def process_image_qps(self, image_path, patient_id, image_type):
        """Process QPS images to extract stress, rest, and reversibility views.
        
        Args:
            image_path: Path to the QPS image
            patient_id: Patient ID
            image_type: Type of image
        """
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error reading image: {image_path}")
            return
            
        # Define coordinates for different views
        stress = {'x1': 362, 'x2': 660, 'y1': 19, 'y2': 309}
        rest = {'x1': 361, 'x2': 659, 'y1': 325, 'y2': 614}
        reversibility = {'x1': 361, 'x2': 666, 'y1': 631, 'y2': 918}
        
        # Extract views
        stress_view = image[stress['y1']:stress['y2'], stress['x1']:stress['x2']]
        rest_view = image[rest['y1']:rest['y2'], rest['x1']:rest['x2']]
        reversibility_view = image[reversibility['y1']:reversibility['y2'], reversibility['x1']:reversibility['x2']]

        if any(view is None for view in [stress_view, rest_view, reversibility_view]):
            print(f'Error: Missing data for patient {patient_id}')

        
        # Save extracted views
        output_folder = os.path.join(self.output_dir, patient_id)
        os.makedirs(output_folder, exist_ok=True)
        
        cv2.imwrite(f'{output_folder}/{image_type}_qps_stress.bmp', stress_view)
        cv2.imwrite(f'{output_folder}/{image_type}_qps_rest.bmp', rest_view)
        cv2.imwrite(f'{output_folder}/{image_type}_qps_reversibility.bmp', reversibility_view)
        
        # Extract dashboard
        dashboard_image = image[45:563, 963:1253]  
        cv2.imwrite(f'{output_folder}/{image_type}_qps_dashboard.bmp', dashboard_image)
    
    def process_image_qgc(self, image_path, patient_id, image_type):
        """Process QGC images.
        
        Args:
            image_path: Path to the QGC image
            patient_id: Patient ID
            image_type: Type of image (e.g., 'Stress', 'Rest')
        """
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error reading image: {image_path}")
            return
            
        # Define coordinates for QGC view
        coords_qgc = {'x1': 360, 'x2': 954, 'y1': 0, 'y2': 648}
        
        # Extract view
        qgc_view = image[coords_qgc['y1']:coords_qgc['y2'], coords_qgc['x1']:coords_qgc['x2']]
        
        # Save extracted view
        output_folder = os.path.join(self.output_dir, patient_id)
        os.makedirs(output_folder, exist_ok=True)
        
        cv2.imwrite(f'{output_folder}/{image_type}_qgc.bmp', qgc_view)
        
        # Extract dashboard
        dashboard_image = image[45:401, 963:1253]
        cv2.imwrite(f'{output_folder}/{image_type}_qgc_dashboard.bmp', dashboard_image)
    
    def save_metadata(self):
        """Save collected metadata to CSV file."""
        metadata_df = pd.DataFrame(self.metadata)
        metadata_df.to_csv(os.path.join(self.output_dir, 'patient_metadata.csv'), index=False)

# Process MPI Images

- Initializes the `MPIImageProcessor` with source and output directories.
- Processes all patient images and saves the results to the output directory.
- Prints a confirmation message when processing is complete.


In [8]:
source_dir = "/kaggle/input/mpi-images/MPI Images SELECTED"
output_dir = "/kaggle/working/dataset_processed"
    
processor = MPIImageProcessor(source_dir, output_dir)
processor.process_all_patients()
    
print(f"Processing complete. Results saved to {output_dir}")

Processing complete. Results saved to /kaggle/working/dataset_processed


# Zip Folder

- Zips the `folder_to_zip` directory and saves it as a `.zip` file in the `output_zip` location.
- Prints a confirmation message when done.


In [9]:
folder_to_zip = "/kaggle/working/dataset_processed"
output_zip = "/kaggle/working/dataset_processed"  

shutil.make_archive(output_zip, 'zip', folder_to_zip)

print(f"Zipped folder saved as {output_zip}.zip")

Zipped folder saved as /kaggle/working/dataset_processed.zip
