In [None]:
import os
import argparse
import numpy as np
import nibabel as nib
from PIL import Image
from pathlib import Path
import logging

In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def normalize_voxel_values(data, background_value=-250):
    """
    Normalize voxel values from [-250, max_value] to [0, 255]
    
    Args:
        data: 3D numpy array
        background_value: Background value (default: -250)
    
    Returns:
        Normalized data as uint8
    """
    # Clip values to ensure minimum is background_value
    data = np.clip(data, background_value, None)
    
    # Get min and max values
    min_val = data.min()
    max_val = data.max()
    
    if max_val == min_val:
        # Handle case where all values are the same
        return np.zeros_like(data, dtype=np.uint8)
    
    # Normalize to [0, 255]
    normalized = ((data - min_val) / (max_val - min_val)) * 255
    return normalized.astype(np.uint8)

def find_max_non_background_layer(data, background_value=-250):
    """
    Find the layer (along axis 0) with maximum number of non-background voxels
    
    Args:
        data: 3D numpy array
        background_value: Background value to exclude
    
    Returns:
        Layer index with maximum non-background voxels
    """
    non_background_counts = []
    
    for i in range(data.shape[0]):
        layer = data[i, :, :]
        non_background_count = np.sum(layer != background_value)
        non_background_counts.append(non_background_count)
    
    max_layer_idx = np.argmax(non_background_counts)
    logger.info(f"Max non-background layer: {max_layer_idx} with {non_background_counts[max_layer_idx]} non-background voxels")
    
    return max_layer_idx

def extract_layers(data, center_layer, n, background_value=-250):
    """
    Extract layers: center, center±n, center±2n
    
    Args:
        data: 3D numpy array
        center_layer: Index of the center layer
        n: Distance parameter
        background_value: Background value
    
    Returns:
        Dictionary of layer indices and their corresponding 2D arrays
    """
    layers = {}
    max_layer = data.shape[0] - 1
    
    # Define layer indices to extract
    layer_offsets = [0, n, 2*n, -n, -2*n]
    layer_names = ['center', 'plus_n', 'plus_2n', 'minus_n', 'minus_2n']
    
    for offset, name in zip(layer_offsets, layer_names):
        layer_idx = center_layer + offset
        
        # Check bounds
        if 0 <= layer_idx <= max_layer:
            layers[f"{name}_{layer_idx:03d}"] = data[layer_idx, :, :]
            logger.info(f"Extracted layer {layer_idx} ({name})")
        else:
            logger.warning(f"Layer {layer_idx} ({name}) is out of bounds [0, {max_layer}], skipping")
    
    return layers

def save_layer_as_png(layer_data, output_path):
    """
    Save 2D layer as PNG with 3 identical channels
    
    Args:
        layer_data: 2D numpy array (normalized to 0-255)
        output_path: Output file path
    """
    # Create 3-channel image (RGB) with identical values
    rgb_image = np.stack([layer_data, layer_data, layer_data], axis=-1)
    
    # Convert to PIL Image and save
    pil_image = Image.fromarray(rgb_image, mode='RGB')
    pil_image.save(output_path)

def process_nifti_file(nifti_path, output_dir, n, background_value=-250):
    """
    Process a single NIfTI file and extract layers
    
    Args:
        nifti_path: Path to .nii.gz file
        output_dir: Output directory for this file's layers
        n: Distance parameter
        background_value: Background value
    """
    try:
        logger.info(f"Processing: {nifti_path}")
        
        # Load NIfTI file
        nifti_img = nib.load(nifti_path)
        data = nifti_img.get_fdata()
        
        logger.info(f"Data shape: {data.shape}")
        logger.info(f"Data range: [{data.min():.2f}, {data.max():.2f}]")
        
        # Find layer with maximum non-background voxels
        center_layer = find_max_non_background_layer(data, background_value)
        
        # Extract layers
        layers = extract_layers(data, center_layer, n, background_value)
        
        # Process and save each layer
        filename_base = Path(nifti_path).stem.replace('.nii', '')  # Remove .nii.gz extension
        
        for layer_name, layer_data in layers.items():
            # Normalize the layer
            normalized_layer = normalize_voxel_values(layer_data, background_value)
            
            # Create output filename
            output_filename = f"{filename_base}_{layer_name}.png"
            output_path = os.path.join(output_dir, output_filename)
            
            # Save as PNG
            save_layer_as_png(normalized_layer, output_path)
            logger.info(f"Saved: {output_path}")
    
    except Exception as e:
        logger.error(f"Error processing {nifti_path}: {str(e)}")

def process_dataset(input_dataset_dir, output_dataset_dir, n, background_value=-250):
    """
    Process entire dataset
    
    Args:
        input_dataset_dir: Input dataset directory
        output_dataset_dir: Output dataset directory
        n: Distance parameter
        background_value: Background value
    """
    input_path = Path(input_dataset_dir)
    output_path = Path(output_dataset_dir)
    
    # Iterate through train/val/test folders
    for split_dir in input_path.iterdir():
        if split_dir.is_dir() and split_dir.name in ['train', 'val', 'test']:
            logger.info(f"Processing split: {split_dir.name}")
            
            # Iterate through class folders (0, 1)
            for class_dir in split_dir.iterdir():
                if class_dir.is_dir() and class_dir.name in ['0', '1']:
                    logger.info(f"Processing class: {class_dir.name}")
                    
                    # Create output directory
                    output_class_dir = output_path / split_dir.name / class_dir.name
                    output_class_dir.mkdir(parents=True, exist_ok=True)
                    
                    # Process all .nii.gz files in this directory
                    nifti_files = list(class_dir.glob('*.nii.gz'))
                    logger.info(f"Found {len(nifti_files)} NIfTI files")
                    
                    for nifti_file in nifti_files:
                        process_nifti_file(nifti_file, output_class_dir, n, background_value)

# Extract layer around the most informative layer

In [None]:
input_dataset = r'C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_3D'  # Replace with actual input dataset path
output_dataset = r'C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_2D'  # Replace with actual output dataset path
n = 5  # Replace with actual distance parameter (n)
background_value = -250

logger.info(f"Input dataset: {input_dataset}")
logger.info(f"Output dataset: {output_dataset}")
logger.info(f"Distance parameter (n): {n}")
logger.info(f"Background value: {background_value}")

# Validate input directory
if not os.path.exists(input_dataset):
    logger.error(f"Input dataset directory does not exist: {input_dataset}")

# Process the dataset
process_dataset(input_dataset, output_dataset, n, background_value)

logger.info("Processing completed!")

# Extract all layers with more than n percent non background

In [None]:
# 3D NIfTI to 2D PNG Layer Extractor - Top N Informative Layers
# Extracts the N most informative layers with minimum spacing between them

import os
import numpy as np
import nibabel as nib
from PIL import Image
from pathlib import Path
import logging

# Set up logging for Jupyter
logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# =============================================================================
# CONFIGURATION - Modify these parameters
# =============================================================================

# Dataset paths
INPUT_DATASET_DIR = r'C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_3D'  # Update this path
OUTPUT_DATASET_DIR = r'C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_2D'  # Update this path

# Processing parameters
N_LAYERS = 10          # Number of most informative layers to extract
MIN_SPACING = 5       # Minimum spacing between selected layers (k parameter)
BACKGROUND_VALUE = -250   # Background value to exclude

print("Configuration:")
print(f"Input dataset: {INPUT_DATASET_DIR}")
print(f"Output dataset: {OUTPUT_DATASET_DIR}")
print(f"Number of layers to extract: {N_LAYERS}")
print(f"Minimum spacing between layers: {MIN_SPACING}")
print(f"Background value: {BACKGROUND_VALUE}")

# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

def normalize_voxel_values(data, background_value=-250):
    """
    Normalize voxel values from [-250, max_value] to [0, 255]
    
    Args:
        data: 3D numpy array
        background_value: Background value (default: -250)
    
    Returns:
        Normalized data as uint8
    """
    # Clip values to ensure minimum is background_value
    data = np.clip(data, background_value, None)
    
    # Get min and max values
    min_val = data.min()
    max_val = data.max()
    
    if max_val == min_val:
        # Handle case where all values are the same
        return np.zeros_like(data, dtype=np.uint8)
    
    # Normalize to [0, 255]
    normalized = ((data - min_val) / (max_val - min_val)) * 255
    return normalized.astype(np.uint8)

def calculate_layer_informativeness(data, background_value=-250):
    """
    Calculate informativeness score for each layer
    Uses non-background ratio as the primary metric
    
    Args:
        data: 3D numpy array
        background_value: Background value to exclude
    
    Returns:
        List of tuples: (layer_index, informativeness_score)
    """
    layer_scores = []
    total_voxels_per_layer = data.shape[1] * data.shape[2]
    
    for i in range(data.shape[0]):
        layer = data[i, :, :]
        
        # Calculate non-background ratio
        non_background_count = np.sum(layer > background_value)
        non_background_ratio = non_background_count / total_voxels_per_layer
        
        # Additional informativeness metrics can be added here:
        # - Variance of non-background voxels
        # - Edge content
        # - Texture measures
        
        # For now, using non-background ratio as informativeness score
        informativeness_score = non_background_ratio
        
        layer_scores.append((i, informativeness_score))
    
    return layer_scores

def select_top_layers_with_spacing(layer_scores, n_layers, min_spacing):
    """
    Select top N layers ensuring minimum spacing between them
    
    Args:
        layer_scores: List of tuples (layer_index, score)
        n_layers: Number of layers to select
        min_spacing: Minimum spacing between selected layers
    
    Returns:
        List of tuples: (layer_index, score) for selected layers
    """
    # Sort by score in descending order
    sorted_layers = sorted(layer_scores, key=lambda x: x[1], reverse=True)
    
    selected_layers = []
    used_indices = set()
    
    print(f"  Selecting {n_layers} layers with minimum spacing of {min_spacing}...")
    
    for layer_idx, score in sorted_layers:
        # Check if this layer conflicts with already selected layers
        conflict = False
        for selected_idx, _ in selected_layers:
            if abs(layer_idx - selected_idx) < min_spacing:
                conflict = True
                break
        
        if not conflict:
            selected_layers.append((layer_idx, score))
            used_indices.add(layer_idx)
            print(f"    Selected layer {layer_idx} (score: {score:.4f})")
            
            if len(selected_layers) >= n_layers:
                break
    
    # Sort selected layers by index for consistent naming
    selected_layers.sort(key=lambda x: x[0])
    
    if len(selected_layers) < n_layers:
        print(f"    ⚠️ Only found {len(selected_layers)} layers that satisfy spacing constraint")
    
    return selected_layers

def extract_selected_layers(data, selected_layers):
    """
    Extract the selected layers
    
    Args:
        data: 3D numpy array
        selected_layers: List of tuples (layer_index, score)
    
    Returns:
        Dictionary of layer indices and their corresponding 2D arrays
    """
    layers = {}
    
    for i, (layer_idx, score) in enumerate(selected_layers):
        layer_key = f"layer_{layer_idx:03d}_rank_{i+1:02d}_score_{score:.3f}"
        layers[layer_key] = data[layer_idx, :, :]
        print(f"  Extracted layer {layer_idx} (rank {i+1}, score: {score:.4f})")
    
    return layers

def save_layer_as_png(layer_data, output_path):
    """
    Save 2D layer as PNG with 3 identical channels
    
    Args:
        layer_data: 2D numpy array (normalized to 0-255)
        output_path: Output file path
    """
    # Create 3-channel image (RGB) with identical values
    rgb_image = np.stack([layer_data, layer_data, layer_data], axis=-1)
    
    # Convert to PIL Image and save
    pil_image = Image.fromarray(rgb_image, mode='RGB')
    pil_image.save(output_path)

def process_nifti_file(nifti_path, output_dir, n_layers, min_spacing, background_value=-250):
    """
    Process a single NIfTI file and extract top N informative layers with spacing
    
    Args:
        nifti_path: Path to .nii.gz file
        output_dir: Output directory for this file's layers
        n_layers: Number of layers to extract
        min_spacing: Minimum spacing between layers
        background_value: Background value
    """
    try:
        print(f"\nProcessing: {nifti_path}")
        
        # Load NIfTI file
        nifti_img = nib.load(nifti_path)
        data = nifti_img.get_fdata()
        
        print(f"  Data shape: {data.shape}")
        print(f"  Data range: [{data.min():.2f}, {data.max():.2f}]")
        
        # Calculate informativeness for all layers
        layer_scores = calculate_layer_informativeness(data, background_value)
        
        # Select top N layers with spacing constraint
        selected_layers = select_top_layers_with_spacing(layer_scores, n_layers, min_spacing)
        
        if not selected_layers:
            print(f"  ⚠️ No suitable layers found")
            return 0
        
        # Extract selected layers
        layers = extract_selected_layers(data, selected_layers)
        
        # Process and save each layer
        filename_base = Path(nifti_path).stem.replace('.nii', '')  # Remove .nii.gz extension
        saved_count = 0
        
        for layer_name, layer_data in layers.items():
            # Normalize the layer
            normalized_layer = normalize_voxel_values(layer_data, background_value)
            
            # Create output filename
            output_filename = f"{filename_base}_{layer_name}.png"
            output_path = os.path.join(output_dir, output_filename)
            
            # Save as PNG
            save_layer_as_png(normalized_layer, output_path)
            saved_count += 1
        
        print(f"  ✅ Saved {saved_count} layers")
        return saved_count
    
    except Exception as e:
        print(f"  ❌ Error processing {nifti_path}: {str(e)}")
        return 0

def process_dataset(input_dataset_dir, output_dataset_dir, n_layers, min_spacing, background_value=-250):
    """
    Process entire dataset
    
    Args:
        input_dataset_dir: Input dataset directory
        output_dataset_dir: Output dataset directory
        n_layers: Number of layers to extract per file
        min_spacing: Minimum spacing between layers
        background_value: Background value
    
    Returns:
        Dictionary with processing statistics
    """
    input_path = Path(input_dataset_dir)
    output_path = Path(output_dataset_dir)
    
    stats = {
        'total_files': 0,
        'processed_files': 0,
        'total_layers_saved': 0,
        'splits': {}
    }
    
    # Iterate through train/val/test folders
    for split_dir in input_path.iterdir():
        if split_dir.is_dir() and split_dir.name in ['train', 'val', 'test']:
            print(f"\n{'='*50}")
            print(f"Processing split: {split_dir.name}")
            print(f"{'='*50}")
            
            stats['splits'][split_dir.name] = {'classes': {}}
            
            # Iterate through class folders (0, 1)
            for class_dir in split_dir.iterdir():
                if class_dir.is_dir() and class_dir.name in ['0', '1']:
                    print(f"\n📁 Processing class: {class_dir.name}")
                    
                    # Create output directory
                    output_class_dir = output_path / split_dir.name / class_dir.name
                    output_class_dir.mkdir(parents=True, exist_ok=True)
                    
                    # Process all .nii.gz files in this directory
                    nifti_files = list(class_dir.glob('*.nii.gz'))
                    print(f"Found {len(nifti_files)} NIfTI files")
                    
                    class_stats = {
                        'total_files': len(nifti_files),
                        'processed_files': 0,
                        'total_layers': 0
                    }
                    
                    for nifti_file in nifti_files:
                        stats['total_files'] += 1
                        layers_saved = process_nifti_file(nifti_file, output_class_dir, n_layers, min_spacing, background_value)
                        
                        if layers_saved > 0:
                            stats['processed_files'] += 1
                            class_stats['processed_files'] += 1
                            stats['total_layers_saved'] += layers_saved
                            class_stats['total_layers'] += layers_saved
                    
                    stats['splits'][split_dir.name]['classes'][class_dir.name] = class_stats
                    print(f"Class {class_dir.name} summary: {class_stats['processed_files']}/{class_stats['total_files']} files processed, {class_stats['total_layers']} layers saved")
    
    return stats

# =============================================================================
# MAIN PROCESSING
# =============================================================================

# Validate input directory
if not os.path.exists(INPUT_DATASET_DIR):
    print(f"❌ Input dataset directory does not exist: {INPUT_DATASET_DIR}")
    print("Please update the INPUT_DATASET_DIR variable with the correct path")
else:
    print(f"✅ Input dataset found: {INPUT_DATASET_DIR}")
    
    # Validate parameters
    if N_LAYERS <= 0:
        print(f"❌ N_LAYERS must be positive, got: {N_LAYERS}")
    elif MIN_SPACING < 0:
        print(f"❌ MIN_SPACING must be non-negative, got: {MIN_SPACING}")
    else:
        print(f"✅ Starting processing to extract {N_LAYERS} layers with {MIN_SPACING} spacing...")
        
        # Process the dataset
        print(f"\n🚀 Starting dataset processing...")
        stats = process_dataset(INPUT_DATASET_DIR, OUTPUT_DATASET_DIR, N_LAYERS, MIN_SPACING, BACKGROUND_VALUE)
        
        # Print final summary
        print(f"\n{'='*60}")
        print("📊 PROCESSING SUMMARY")
        print(f"{'='*60}")
        print(f"Total files processed: {stats['processed_files']}/{stats['total_files']}")
        print(f"Total layers extracted: {stats['total_layers_saved']}")
        print(f"Average layers per file: {stats['total_layers_saved']/max(stats['processed_files'], 1):.1f}")
        print(f"Output directory: {OUTPUT_DATASET_DIR}")
        
        for split_name, split_data in stats['splits'].items():
            print(f"\n{split_name.upper()}:")
            for class_name, class_data in split_data['classes'].items():
                avg_layers = class_data['total_layers'] / max(class_data['processed_files'], 1)
                print(f"  Class {class_name}: {class_data['processed_files']}/{class_data['total_files']} files → {class_data['total_layers']} layers (avg: {avg_layers:.1f})")
        
        print(f"\n✅ Processing completed successfully!")

# =============================================================================
# OPTIONAL: ANALYZE LAYER SELECTION
# =============================================================================

def analyze_layer_selection(input_dataset_dir, n_layers, min_spacing, background_value=-250):
    """
    Analyze which layers would be selected across the dataset
    This helps you understand the distribution of informative layers
    """
    input_path = Path(input_dataset_dir)
    selected_layer_indices = []
    
    print(f"\n📈 Analyzing layer selection with N={n_layers}, spacing={min_spacing}...")
    
    for split_dir in input_path.iterdir():
        if split_dir.is_dir() and split_dir.name in ['train', 'val', 'test']:
            for class_dir in split_dir.iterdir():
                if class_dir.is_dir() and class_dir.name in ['0', '1']:
                    nifti_files = list(class_dir.glob('*.nii.gz'))
                    
                    for nifti_file in nifti_files[:3]:  # Analyze first 3 files for speed
                        try:
                            nifti_img = nib.load(nifti_file)
                            data = nifti_img.get_fdata()
                            
                            layer_scores = calculate_layer_informativeness(data, background_value)
                            selected_layers = select_top_layers_with_spacing(layer_scores, n_layers, min_spacing)
                            
                            for layer_idx, _ in selected_layers:
                                selected_layer_indices.append(layer_idx)
                        except:
                            continue
    
    if selected_layer_indices:
        selected_layer_indices = np.array(selected_layer_indices)
        print(f"Selected layer statistics:")
        print(f"  Mean position: {selected_layer_indices.mean():.1f}")
        print(f"  Median position: {np.median(selected_layer_indices):.1f}")
        print(f"  Min position: {selected_layer_indices.min()}")
        print(f"  Max position: {selected_layer_indices.max()}")
        print(f"  Most common positions: {np.bincount(selected_layer_indices).argsort()[-5:][::-1]}")

# Uncomment the line below to run the analysis
# analyze_layer_selection(INPUT_DATASET_DIR, N_LAYERS, MIN_SPACING, BACKGROUND_VALUE)

Configuration:
Input dataset: C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_3D
Output dataset: C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_2D
Number of layers to extract: 30
Minimum spacing between layers: 5
Background value: -250
✅ Input dataset found: C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_3D
✅ Starting processing to extract 30 layers with 5 spacing...

🚀 Starting dataset processing...

Processing split: test

📁 Processing class: 0
Found 16 NIfTI files

Processing: C:\Users\acer\Desktop\Project_TMJOA\Data\training_dataset_3D\test\0\50-30909 R_adjustedBG.nii.gz
  Data shape: (255, 255, 255)
  Data range: [-250.00, 1088.00]
  Selecting 30 layers with minimum spacing of 5...
    Selected layer 61 (score: 0.2076)
    Selected layer 66 (score: 0.2039)
    Selected layer 56 (score: 0.2029)
    Selected layer 71 (score: 0.1984)
    Selected layer 76 (score: 0.1906)
    Selected layer 51 (score: 0.1896)
    Selected layer 81 (score: 0.1820)
    Se

KeyboardInterrupt: 