In [3]:
import os
import nibabel as nib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import ndimage

In [4]:
labels_dir = r"F:\Conditional Latent Diffusion Model for Pancreas\Task07_Pancreas\labelsTr"
images_dir = r"F:\Conditional Latent Diffusion Model for Pancreas\Task07_Pancreas\imagesTr"

In [5]:
def compute_bounding_box_volume(mask, label_value):
    """Compute bounding box volume for a specific label"""
    coords = np.where(mask == label_value)
    if len(coords[0]) == 0:
        return 0, (0, 0, 0)  # No voxels found
    
    min_coords = [np.min(coords[i]) for i in range(3)]
    max_coords = [np.max(coords[i]) for i in range(3)]
    
    bbox_dims = [max_coords[i] - min_coords[i] + 1 for i in range(3)]
    bbox_volume = np.prod(bbox_dims)
    
    return bbox_volume, bbox_dims


In [6]:
def analyze_case(label_path, image_path):
    """Analyze a single case for volumes and morphology"""
    try:
        # Load data
        label_img = nib.load(label_path)
        label_data = label_img.get_fdata()
        
        # Get voxel volume in mm³
        voxel_volume = np.prod(label_img.header.get_zooms())
        
        # Compute volumes
        pancreas_voxels = np.sum(label_data == 1)
        tumor_voxels = np.sum(label_data == 2)
        
        pancreas_volume = pancreas_voxels * voxel_volume
        tumor_volume = tumor_voxels * voxel_volume
        
        # Compute bounding boxes
        pancreas_bbox_vol, pancreas_bbox_dims = compute_bounding_box_volume(label_data, 1)
        tumor_bbox_vol, tumor_bbox_dims = compute_bounding_box_volume(label_data, 2)
        
        # Additional metrics
        tumor_to_pancreas_ratio = tumor_volume / pancreas_volume if pancreas_volume > 0 else 0
        
        # Pancreas compactness (actual volume / bounding box volume)
        pancreas_compactness = pancreas_voxels / pancreas_bbox_vol if pancreas_bbox_vol > 0 else 0
        
        return {
            'pancreas_volume_mm3': pancreas_volume,
            'tumor_volume_mm3': tumor_volume,
            'tumor_to_pancreas_ratio': tumor_to_pancreas_ratio,
            'pancreas_bbox_volume_voxels': pancreas_bbox_vol,
            'tumor_bbox_volume_voxels': tumor_bbox_vol,
            'pancreas_bbox_dims': pancreas_bbox_dims,
            'tumor_bbox_dims': tumor_bbox_dims,
            'pancreas_compactness': pancreas_compactness,
            'voxel_volume_mm3': voxel_volume
        }
    except Exception as e:
        print(f"Error processing {label_path}: {e}")
        return None

In [7]:
print("🔍 Analyzing all cases...")
data = []

🔍 Analyzing all cases...


In [8]:
for file in os.listdir(labels_dir):
    if file.endswith(".nii.gz") and not file.startswith("._"):
        label_path = os.path.join(labels_dir, file)
        image_path = os.path.join(images_dir, file)
        
        analysis = analyze_case(label_path, image_path)
        if analysis:
            # Classify tumor size
            tumor_vol = analysis['tumor_volume_mm3']
            if tumor_vol < 20000:
                size_class = "small"
            elif tumor_vol < 125000:
                size_class = "medium"
            else:
                size_class = "large"
            
            data.append({
                'case': file,
                'size_class': size_class,
                **analysis
            })

df = pd.DataFrame(data)

In [24]:
import matplotlib
matplotlib.use('TkAgg')  # Set backend before importing pyplot
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Configure plotting
plt.style.use('default')
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.facecolor'] = 'white'

print("\n📊 VOLUME STATISTICS")
print("="*50)

# Overall stats
print(f"Total cases analyzed: {len(df)}")
print(f"\nSize class distribution:")
size_dist = df['size_class'].value_counts().sort_index()
print(size_dist)

# Show percentages
print(f"\nSize class percentages:")
for size_class, count in size_dist.items():
    percentage = (count / len(df)) * 100
    print(f"{size_class}: {count} cases ({percentage:.1f}%)")

# Detailed volume stats per class
print(f"\n📈 TUMOR VOLUME STATISTICS (mm³)")
tumor_stats = df.groupby('size_class')['tumor_volume_mm3'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(2)
print(tumor_stats)

print(f"\n📈 PANCREAS VOLUME STATISTICS (mm³)")
pancreas_stats = df.groupby('size_class')['pancreas_volume_mm3'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(2)
print(pancreas_stats)

# Print key insights
print(f"\n🔍 KEY INSIGHTS:")
print(f"• Large tumors are extremely rare: only {len(df[df['size_class']=='large'])} cases (1.4%)")
print(f"• 99th percentile tumor volume: {np.percentile(df['tumor_volume_mm3'], 99):.0f} mm³")
print(f"• Largest tumor: {df['tumor_volume_mm3'].max():.0f} mm³")

# 🔹 Step 2: Distribution Plots
print(f"\n📊 Creating distribution plots...")

# Create plots directory
import os
os.makedirs('analysis_plots', exist_ok=True)

# Configure figure with explicit DPI and size
fig, axes = plt.subplots(2, 3, figsize=(20, 14), dpi=100)
fig.suptitle('Pancreas Tumor Volume Analysis', fontsize=20, y=0.98)

# Ensure axes is 2D array
axes = np.atleast_2d(axes)

# Row 1: Tumor volume distributions
print("  Creating tumor volume plots...")

# Plot 1: Overall tumor volume distribution
tumor_volumes = df['tumor_volume_mm3'].values
axes[0, 0].hist(tumor_volumes, bins=50, alpha=0.7, color='skyblue', edgecolor='black', linewidth=0.5)
axes[0, 0].set_xlabel('Tumor Volume (mm³)', fontsize=12)
axes[0, 0].set_ylabel('Frequency', fontsize=12)
axes[0, 0].set_title('Overall Tumor Volume Distribution', fontsize=14)
axes[0, 0].axvline(20000, color='red', linestyle='--', linewidth=2, label='Small-Medium (20k)')
axes[0, 0].axvline(125000, color='orange', linestyle='--', linewidth=2, label='Medium-Large (125k)')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Log scale distribution
log_volumes = np.log10(tumor_volumes + 1)
axes[0, 1].hist(log_volumes, bins=50, alpha=0.7, color='lightgreen', edgecolor='black', linewidth=0.5)
axes[0, 1].set_xlabel('Log10(Tumor Volume + 1)', fontsize=12)
axes[0, 1].set_ylabel('Frequency', fontsize=12)
axes[0, 1].set_title('Tumor Volume Distribution (Log Scale)', fontsize=14)
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Box plot by size class
box_data = [df[df['size_class'] == sc]['tumor_volume_mm3'].values for sc in ['small', 'medium', 'large']]
box_plot = axes[0, 2].boxplot(box_data, labels=['Small', 'Medium', 'Large'], patch_artist=True)
colors = ['lightblue', 'orange', 'red']
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
axes[0, 2].set_xlabel('Size Class', fontsize=12)
axes[0, 2].set_ylabel('Tumor Volume (mm³)', fontsize=12)
axes[0, 2].set_title('Tumor Volume by Size Class', fontsize=14)
axes[0, 2].grid(True, alpha=0.3)

# Row 2: Pancreas analysis
print("  Creating pancreas analysis plots...")

# Plot 4: Pancreas volume distribution
pancreas_volumes = df['pancreas_volume_mm3'].values
axes[1, 0].hist(pancreas_volumes, bins=50, alpha=0.7, color='coral', edgecolor='black', linewidth=0.5)
axes[1, 0].set_xlabel('Pancreas Volume (mm³)', fontsize=12)
axes[1, 0].set_ylabel('Frequency', fontsize=12)
axes[1, 0].set_title('Pancreas Volume Distribution', fontsize=14)
axes[1, 0].grid(True, alpha=0.3)

# Plot 5: Tumor-to-pancreas ratio
ratios = df['tumor_to_pancreas_ratio'].values
axes[1, 1].hist(ratios, bins=50, alpha=0.7, color='purple', edgecolor='black', linewidth=0.5)
axes[1, 1].set_xlabel('Tumor/Pancreas Volume Ratio', fontsize=12)
axes[1, 1].set_ylabel('Frequency', fontsize=12)
axes[1, 1].set_title('Tumor-to-Pancreas Ratio Distribution', fontsize=14)
axes[1, 1].grid(True, alpha=0.3)

# Plot 6: Pancreas volume by tumor size class
pancreas_box_data = [df[df['size_class'] == sc]['pancreas_volume_mm3'].values for sc in ['small', 'medium', 'large']]
pancreas_box_plot = axes[1, 2].boxplot(pancreas_box_data, labels=['Small', 'Medium', 'Large'], patch_artist=True)
for patch, color in zip(pancreas_box_plot['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
axes[1, 2].set_xlabel('Tumor Size Class', fontsize=12)
axes[1, 2].set_ylabel('Pancreas Volume (mm³)', fontsize=12)
axes[1, 2].set_title('Pancreas Volume by Tumor Size Class', fontsize=14)
axes[1, 2].grid(True, alpha=0.3)

# Adjust layout and save
plt.tight_layout()
plt.subplots_adjust(top=0.93)  # Make room for suptitle

# Save the figure
plt.savefig('analysis_plots/volume_distributions.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
print("✅ Volume analysis plots saved to: analysis_plots/volume_distributions.png")

# Show plot (with fallback)
try:
    plt.show(block=True)
    print("✅ Plots displayed successfully")
except Exception as e:
    print(f"⚠️  Display failed: {e}")
    print("📁 Check saved file: analysis_plots/volume_distributions.png")

plt.close('all')  # Clean up all figures

# Additional summary statistics
print(f"\n📈 EXTENDED STATISTICS")
print("="*50)

# Volume percentiles for better understanding
percentiles = [25, 50, 75, 90, 95, 99, 99.5]
print(f"\nTumor Volume Percentiles (mm³):")
for p in percentiles:
    vol = np.percentile(df['tumor_volume_mm3'], p)
    print(f"  {p:4.1f}th percentile: {vol:8.0f} mm³")

# Class-specific statistics
print(f"\nDetailed Class Analysis:")
for size_class in ['small', 'medium', 'large']:
    subset = df[df['size_class'] == size_class]
    print(f"\n{size_class.upper()} TUMORS ({len(subset)} cases):")
    print(f"  Volume range: {subset['tumor_volume_mm3'].min():.0f} - {subset['tumor_volume_mm3'].max():.0f} mm³")
    print(f"  Mean ± SD: {subset['tumor_volume_mm3'].mean():.0f} ± {subset['tumor_volume_mm3'].std():.0f} mm³")
    print(f"  Median pancreas volume: {subset['pancreas_volume_mm3'].median():.0f} mm³")

# Check for the extreme outlier
max_tumor_idx = df['tumor_volume_mm3'].idxmax()
max_tumor_case = df.loc[max_tumor_idx]
print(f"\n🔍 EXTREME CASE ANALYSIS:")
print(f"Largest tumor: {max_tumor_case['case']}")
print(f"  Tumor volume: {max_tumor_case['tumor_volume_mm3']:.0f} mm³")
print(f"  Pancreas volume: {max_tumor_case['pancreas_volume_mm3']:.0f} mm³")
print(f"  Ratio: {max_tumor_case['tumor_to_pancreas_ratio']:.2f} (tumor is {max_tumor_case['tumor_to_pancreas_ratio']:.1f}× larger than pancreas!)")


📊 VOLUME STATISTICS
Total cases analyzed: 281

Size class distribution:
size_class
large       4
medium     22
small     255
Name: count, dtype: int64

Size class percentages:
large: 4 cases (1.4%)
medium: 22 cases (7.8%)
small: 255 cases (90.7%)

📈 TUMOR VOLUME STATISTICS (mm³)
            count       mean        std        min        max     median
size_class                                                              
large           4  289937.29  294979.20  140526.37  732388.14  143417.32
medium         22   41646.78   20027.09   21251.95   91472.77   37055.49
small         255    6115.05    4327.07     412.88   19788.57    5300.94

📈 PANCREAS VOLUME STATISTICS (mm³)
            count      mean       std       min        max     median
size_class                                                           
large           4  96319.72  31003.74  60523.80  135210.30   94772.39
medium         22  97477.46  40672.02  35715.12  174249.90  100336.61
small         255  81320.77  35335.58 

  box_plot = axes[0, 2].boxplot(box_data, labels=['Small', 'Medium', 'Large'], patch_artist=True)
  pancreas_box_plot = axes[1, 2].boxplot(pancreas_box_data, labels=['Small', 'Medium', 'Large'], patch_artist=True)


✅ Volume analysis plots saved to: analysis_plots/volume_distributions.png
✅ Plots displayed successfully

📈 EXTENDED STATISTICS

Tumor Volume Percentiles (mm³):
  25.0th percentile:     2775 mm³
  50.0th percentile:     5778 mm³
  75.0th percentile:    10273 mm³
  90.0th percentile:    18901 mm³
  95.0th percentile:    38901 mm³
  99.0th percentile:   140546 mm³
  99.5th percentile:   143976 mm³

Detailed Class Analysis:

SMALL TUMORS (255 cases):
  Volume range: 413 - 19789 mm³
  Mean ± SD: 6115 ± 4327 mm³
  Median pancreas volume: 75608 mm³

MEDIUM TUMORS (22 cases):
  Volume range: 21252 - 91473 mm³
  Mean ± SD: 41647 ± 20027 mm³
  Median pancreas volume: 100337 mm³

LARGE TUMORS (4 cases):
  Volume range: 140526 - 732388 mm³
  Mean ± SD: 289937 ± 294979 mm³
  Median pancreas volume: 94772 mm³

🔍 EXTREME CASE ANALYSIS:
Largest tumor: pancreas_415.nii.gz
  Tumor volume: 732388 mm³
  Pancreas volume: 135210 mm³
  Ratio: 5.42 (tumor is 5.4× larger than pancreas!)
