In [5]:
# ============================================================
# MRI FEATURE EXTRACTION PIPELINE - FULLY CORRECTED VERSION
# Fixed: Pretrained Med3D, Proper normalization, Full embeddings
# ============================================================

import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import nibabel as nib
from radiomics.featureextractor import RadiomicsFeatureExtractor
import SimpleITK as sitk
from scipy.ndimage import zoom
import json
from datetime import datetime
import warnings
import traceback
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

warnings.filterwarnings("ignore")

# ===============================
# REPRODUCIBILITY
# ===============================
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RANDOM_SEED)
    torch.backends.cudnn.deterministic = True

# ===============================
# CONFIG
# ===============================
MRI_DIR = r"C:\Users\Shahinur\Downloads\PKG_Dataset\PKG - Brain-Mets-Lung-MRI-Path-Segs_radiology_images\Brain-Mets-Lung-MRI-Path-Segs"
OUTPUT_DIR = "MRI_PYRADIOMICS_MED3D_OUTPUT"
FIGURES_DIR = f"{OUTPUT_DIR}/validation_figures"
PRETRAINED_DIR = r"D:\paper\weights\MedicalNet_pytorch_files2"  # Your pretrained weights location

Path(OUTPUT_DIR).mkdir(exist_ok=True)
Path(FIGURES_DIR).mkdir(exist_ok=True)
Path(PRETRAINED_DIR).mkdir(exist_ok=True)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 300

print("="*80)
print("MRI PIPELINE: PyRadiomics + MedicalNet - CORRECTED VERSION")
print("FIXES: Pretrained weights, Z-score norm, Full embeddings, QC heuristics")
print("="*80)
print(f"Device: {DEVICE}")
print(f"MRI Directory: {MRI_DIR}")
print(f"Output: {OUTPUT_DIR}\n")

def log_msg(m):
    print(m)
    try:
        with open(f"{OUTPUT_DIR}/progress.log", 'a') as f:
            f.write(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {m}\n")
    except:
        pass

# ============================================================
# MED3D: 3D ResNet Architecture
# ============================================================
class BasicBlock3D(nn.Module):
    expansion = 1
    
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock3D, self).__init__()
        self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm3d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv3d(planes, planes, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm3d(planes)
        self.downsample = downsample
    
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

class Med3DResNet(nn.Module):
    def __init__(self, model_depth=10, num_classes=400):
        super(Med3DResNet, self).__init__()
        
        if model_depth == 10:
            layers = [1, 1, 1, 1]
        elif model_depth == 18:
            layers = [2, 2, 2, 2]
        elif model_depth == 34:
            layers = [3, 4, 6, 3]
        else:
            layers = [1, 1, 1, 1]
        
        self.inplanes = 64
        self.conv1 = nn.Conv3d(1, 64, kernel_size=7, stride=(2, 2, 2), padding=(3, 3, 3), bias=False)
        self.bn1 = nn.BatchNorm3d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
        
        self.layer1 = self._make_layer(BasicBlock3D, 64, layers[0])
        self.layer2 = self._make_layer(BasicBlock3D, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(BasicBlock3D, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(BasicBlock3D, 512, layers[3], stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv3d(self.inplanes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm3d(planes)
            )
        
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        
        return x  # Return 512-dim features

# ============================================================
# PRETRAINED WEIGHTS LOADER (FIX #1)
# ============================================================
def download_pretrained_weights(model_depth=10):
    """Load pretrained MedicalNet weights from your extracted folder"""
    
    # Check common weight file names in MedicalNet
    possible_files = [
        f"{PRETRAINED_DIR}/resnet_{model_depth}_23dataset.pth",
        f"{PRETRAINED_DIR}/resnet_{model_depth}.pth",
        f"{PRETRAINED_DIR}/trail_1/resnet_{model_depth}_23dataset.pth",
        f"{PRETRAINED_DIR}/trail_1/resnet_{model_depth}.pth",
        f"{PRETRAINED_DIR}/models/resnet_{model_depth}_23dataset.pth",
    ]
    
    # Search for the weight file
    for weight_file in possible_files:
        if os.path.exists(weight_file):
            log_msg(f"  ‚úÖ Found pretrained weights: {weight_file}")
            return weight_file
    
    # If not found, list available files to help user
    log_msg(f"  ‚ö†Ô∏è Pretrained weights not found in standard locations!")
    log_msg(f"  üìÇ Searching in: {PRETRAINED_DIR}")
    
    if os.path.exists(PRETRAINED_DIR):
        log_msg(f"  üìã Available files:")
        for root, dirs, files in os.walk(PRETRAINED_DIR):
            for file in files:
                if file.endswith('.pth'):
                    full_path = os.path.join(root, file)
                    log_msg(f"     - {full_path}")
        
        # Try to find any .pth file with 'resnet' and '10' in name
        for root, dirs, files in os.walk(PRETRAINED_DIR):
            for file in files:
                if 'resnet' in file.lower() and '10' in file and file.endswith('.pth'):
                    weight_file = os.path.join(root, file)
                    log_msg(f"  ‚úÖ Auto-detected weight file: {weight_file}")
                    return weight_file
    
    log_msg(f"  ‚ùå Could not find resnet_10 weights!")
    log_msg(f"  üí° Please check the extracted folder structure")
    return None

def load_pretrained_med3d(model, pretrained_path):
    """Load pretrained weights with proper handling"""
    if pretrained_path is None or not os.path.exists(pretrained_path):
        log_msg("  ‚ùå NO PRETRAINED WEIGHTS - USING RANDOM INITIALIZATION!")
        log_msg("  ‚ö†Ô∏è  THIS WILL PRODUCE INVALID FEATURES!")
        return model, False
    
    try:
        checkpoint = torch.load(pretrained_path, map_location='cpu')
        
        # Handle different checkpoint formats
        if 'state_dict' in checkpoint:
            state_dict = checkpoint['state_dict']
        elif 'model_state_dict' in checkpoint:
            state_dict = checkpoint['model_state_dict']
        else:
            state_dict = checkpoint
        
        # Remove 'module.' prefix (DataParallel)
        new_state_dict = {}
        for k, v in state_dict.items():
            name = k.replace('module.', '')
            new_state_dict[name] = v
        
        # Load weights (ignore FC - different num_classes)
        model_dict = model.state_dict()
        pretrained_dict = {k: v for k, v in new_state_dict.items() 
                          if k in model_dict and 'fc' not in k}
        
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict, strict=False)
        
        log_msg(f"  ‚úÖ Loaded {len(pretrained_dict)}/{len(model_dict)} pretrained layers")
        return model, True
        
    except Exception as e:
        log_msg(f"  ‚ùå Failed to load pretrained weights: {e}")
        log_msg(f"  ‚ö†Ô∏è  Falling back to RANDOM initialization (INVALID!)")
        return model, False

# ============================================================
# MRI DATA LOADER (FIX #2: Proper Normalization)
# ============================================================
class MRILoader:
    
    @staticmethod
    def load_nifti_as_sitk(path):
        """Load as SimpleITK (for PyRadiomics - no normalization)"""
        try:
            return sitk.ReadImage(path)
        except Exception as e:
            log_msg(f"  ‚ö†Ô∏è Failed to load {path}: {e}")
            return None
    
    @staticmethod
    def load_nifti_as_numpy(path):
        """Load as numpy - NO normalization (handle downstream)"""
        try:
            nii = nib.load(path)
            data = nii.get_fdata()
            data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)
            return data.astype(np.float32)
        except Exception as e:
            log_msg(f"  ‚ö†Ô∏è Failed to load {path}: {e}")
            return None
    
    @staticmethod
    def get_patient_scans(patient_dir):
        """Get all NIfTI scans"""
        scans = {}
        for file in os.listdir(patient_dir):
            if file.endswith('.nii'):
                scan_type = None
                if 't1ce' in file.lower():
                    scan_type = 't1ce'
                elif 'flair' in file.lower():
                    scan_type = 'flair'
                elif 'whole_seg' in file.lower():
                    scan_type = 'seg'
                elif 'core_seg' in file.lower():
                    scan_type = 'core'
                
                if scan_type:
                    scans[scan_type] = os.path.join(patient_dir, file)
        return scans
    
    @staticmethod
    def resize_volume(volume, target_shape=(64, 64, 64)):
        """Resize 3D volume"""
        zoom_factors = [t/s for t, s in zip(target_shape, volume.shape)]
        return zoom(volume, zoom_factors, order=1)

# ============================================================
# OPTIMIZER (FIX #3: QC Heuristics Only)
# ============================================================
class MRIOptimizer:
    
    def __init__(self, patient_dirs):
        self.patient_dirs = patient_dirs
        self.results = {}
        self.calibration_data = {
            'volume_intensities': [],
            'snr_scores': [],
            'contrast_scores': [],
            'patch_size_curve': [],
            'patch_variance_curve': [],
            'normalization_means': defaultdict(list),
            'normalization_stds': defaultdict(list),
            'bootstrap_samples': [],
            'brain_volume_ratios': []
        }
    
    def _sample_volumes(self, max_patients=5):
        loader = MRILoader()
        volumes = []
        
        for patient_dir in self.patient_dirs[:max_patients]:
            scans = loader.get_patient_scans(patient_dir)
            for scan_type in ['t1ce', 'flair']:
                if scan_type not in scans:
                    continue
                volume = loader.load_nifti_as_numpy(scans[scan_type])
                if volume is not None:
                    volumes.append((volume, scan_type))
        return volumes
    
    def optimize_patch_size(self):
        log_msg("METHOD 1: 3D Patch Size Optimization")
        volumes = self._sample_volumes(max_patients=3)
        
        if len(volumes) < 2:
            optimal = 64
        else:
            patch_sizes = [32, 48, 64, 80, 96, 112, 128]
            variances = []
            
            for ps in patch_sizes:
                patch_vars = []
                for volume, _ in volumes[:3]:
                    for _ in range(5):
                        if all(s > ps for s in volume.shape):
                            x = np.random.randint(0, volume.shape[0] - ps)
                            y = np.random.randint(0, volume.shape[1] - ps)
                            z = np.random.randint(0, volume.shape[2] - ps)
                            patch = volume[x:x+ps, y:y+ps, z:z+ps]
                            nz = patch[patch > np.percentile(volume, 10)]
                            if len(nz) > 100:
                                patch_vars.append(np.var(nz))
                
                if patch_vars:
                    variances.append(np.mean(patch_vars))
                    self.calibration_data['patch_size_curve'].append(ps)
                    self.calibration_data['patch_variance_curve'].append(float(np.mean(patch_vars)))
            
            if len(variances) >= 3:
                grad2 = np.gradient(np.gradient(variances))
                elbow_idx = np.argmin(np.abs(grad2))
                optimal = patch_sizes[elbow_idx]
            else:
                optimal = 64
        
        optimal = max(48, min(optimal, 128))
        self.results['patch_size'] = {'optimal': optimal}
        log_msg(f"‚úÖ Optimal 3D patch: {optimal}¬≥")
        return optimal
    
    def optimize_quality_metrics(self):
        log_msg("METHOD 2: Quality Metrics (QC heuristics only)")
        volumes = self._sample_volumes(max_patients=6)
        
        snrs = []
        contrasts = []
        
        for volume, _ in volumes:
            foreground = volume[volume > np.percentile(volume, 50)]
            background = volume[volume < np.percentile(volume, 10)]
            
            if len(background) > 10 and background.std() > 0:
                snr = foreground.mean() / (background.std() + 1e-8)
                snrs.append(snr)
            
            contrast = volume.std()
            contrasts.append(contrast)
        
        self.calibration_data['snr_scores'] = [float(s) for s in snrs]
        self.calibration_data['contrast_scores'] = [float(c) for c in contrasts]
        
        snr_th = float(np.percentile(snrs, 20)) if snrs else 5.0
        contrast_th = float(np.percentile(contrasts, 25)) if contrasts else 15.0
        
        self.results['qc_thresholds'] = {
            'snr': snr_th,
            'contrast': contrast_th,
            'note': 'QC heuristics - scanner specific, not for normalization'
        }
        
        log_msg(f"‚úÖ QC thresholds: SNR={snr_th:.2f}, Contrast={contrast_th:.2f}")
        log_msg(f"   ‚ö†Ô∏è These are QC heuristics ONLY - NOT for normalization!")
        return snr_th, contrast_th
    
    def save(self, output_dir):
        try:
            with open(f"{output_dir}/optimization.json", 'w') as f:
                json.dump({
                    'timestamp': datetime.now().isoformat(),
                    'seed': RANDOM_SEED,
                    'warning': 'Thresholds are QC heuristics - scanner/protocol specific',
                    **self.results
                }, f, indent=2)
            
            with open(f"{output_dir}/calibration_data.json", 'w') as f:
                json.dump(self.calibration_data, f, indent=2)
            
            log_msg(f"‚úÖ Saved optimization results")
        except Exception as e:
            log_msg(f"‚ùå Error saving: {e}")

# ============================================================
# FEATURE EXTRACTORS
# ============================================================
class PyRadiomicsExtractor:
    """PyRadiomics - handles normalization internally"""
    
    def __init__(self):
        log_msg("  Initializing PyRadiomics...")
        self.extractor = featureextractor.RadiomicsFeatureExtractor()
        self.extractor.enableImageTypeByName('Original')
        self.extractor.enableFeatureClassByName('firstorder')
        self.extractor.enableFeatureClassByName('shape')
        self.extractor.enableFeatureClassByName('glcm')
        self.extractor.enableFeatureClassByName('glrlm')
        self.extractor.enableFeatureClassByName('glszm')
        self.extractor.enableFeatureClassByName('gldm')
        log_msg("    ‚úÖ PyRadiomics ready")
    
    def extract(self, image_path, mask_path=None):
        try:
            image = sitk.ReadImage(image_path)
            
            if mask_path is None or not os.path.exists(mask_path):
                image_arr = sitk.GetArrayFromImage(image)
                threshold = np.percentile(image_arr, 15)
                mask_arr = (image_arr > threshold).astype(np.uint8)
                mask = sitk.GetImageFromArray(mask_arr)
                mask.CopyInformation(image)
            else:
                mask = sitk.ReadImage(mask_path)
            
            features = self.extractor.execute(image, mask)
            
            feature_dict = {}
            for key, val in features.items():
                if not key.startswith('diagnostics'):
                    try:
                        feature_dict[key] = float(val)
                    except:
                        pass
            
            return feature_dict
            
        except Exception as e:
            log_msg(f"    ‚ö†Ô∏è PyRadiomics failed: {e}")
            return None

class Med3DExtractor:
    """Med3D with FULL embeddings (FIX #4)"""
    
    def __init__(self, patch_size=64, use_pretrained=True):
        log_msg("  Initializing Med3D...")
        self.patch_size = patch_size
        self.model = Med3DResNet(model_depth=10, num_classes=400)
        self.is_pretrained = False
        
        if use_pretrained:
            pretrained_path = download_pretrained_weights(model_depth=10)
            self.model, self.is_pretrained = load_pretrained_med3d(self.model, pretrained_path)
        
        self.model = self.model.to(DEVICE).eval()
        
        if not self.is_pretrained:
            log_msg("    ‚ö†Ô∏è‚ö†Ô∏è‚ö†Ô∏è WARNING: No pretrained weights - features INVALID!")
    
    def preprocess_volume(self, volume):
        """FIX #2: Z-score normalization"""
        # Resize
        if volume.shape != (self.patch_size, self.patch_size, self.patch_size):
            volume = MRILoader.resize_volume(volume, (self.patch_size, self.patch_size, self.patch_size))
        
        # Z-score normalization (robust)
        mask = volume > np.percentile(volume, 1)
        if mask.sum() > 100:
            mean = volume[mask].mean()
            std = volume[mask].std()
            volume = (volume - mean) / (std + 1e-8)
        else:
            volume = (volume - volume.mean()) / (volume.std() + 1e-8)
        
        volume = torch.FloatTensor(volume).unsqueeze(0).unsqueeze(0)
        return volume
    
    def extract(self, volume):
        """FIX #4: Return FULL 512-dim embedding"""
        try:
            volume_tensor = self.preprocess_volume(volume).to(DEVICE)
            
            with torch.no_grad():
                features = self.model(volume_tensor)
            
            features_np = features.squeeze().cpu().numpy()
            
            # Return FULL embedding + statistics
            result = {
                'med3d_embedding': features_np.tolist(),  # FULL 512-dim
                'med3d_mean': float(features_np.mean()),
                'med3d_std': float(features_np.std()),
                'med3d_max': float(features_np.max()),
                'med3d_min': float(features_np.min()),
                'med3d_l2norm': float(np.linalg.norm(features_np)),
                'is_pretrained': self.is_pretrained
            }
            
            return result
            
        except Exception as e:
            log_msg(f"    ‚ö†Ô∏è Med3D failed: {e}")
            return None

# ============================================================
# VALIDATION FIGURES
# ============================================================
class MRIValidationFigures:
    def __init__(self, output_dir, figures_dir, opt_results, params, calib_data, features_df, qc_df):
        self.output_dir = output_dir
        self.figures_dir = figures_dir
        self.opt_results = opt_results
        self.params = params
        self.calib_data = calib_data
        self.features_df = features_df
        self.qc_df = qc_df
    
    def generate_all(self):
        log_msg("\nGenerating validation figures...")
        self.fig01_patch_size()
        self.fig02_quality_metrics()
        self.fig03_qc_summary()
        self.save_report()
    
    def fig01_patch_size(self):
        log_msg("[1/3] Patch Size...")
        fig, ax = plt.subplots(figsize=(12, 7))
        
        patch_sizes = self.calib_data.get('patch_size_curve', [])
        variances = self.calib_data.get('patch_variance_curve', [])
        
        if patch_sizes:
            ax.plot(patch_sizes, variances, 'o-', linewidth=3, markersize=10, color='steelblue')
            ax.axvline(self.params['patch_size'], color='red', linestyle='--', linewidth=2.5,
                      label=f'Selected: {self.params["patch_size"]}¬≥')
        
        ax.set_xlabel('Patch Size (voxels)', fontsize=13, fontweight='bold')
        ax.set_ylabel('Feature Variance', fontsize=13, fontweight='bold')
        ax.set_title('3D Patch Size Optimization', fontsize=14, fontweight='bold')
        ax.legend(fontsize=11)
        ax.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(f"{self.figures_dir}/01_patch_size.png", dpi=300)
        plt.close()
    
    def fig02_quality_metrics(self):
        log_msg("[2/3] Quality Metrics...")
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
        
        snrs = self.calib_data.get('snr_scores', [])
        if snrs:
            ax1.hist(snrs, bins=30, color='lightblue', alpha=0.7, edgecolor='black')
            ax1.set_title('SNR Distribution (QC)', fontsize=12, fontweight='bold')
            ax1.grid(True, alpha=0.3)
        
        contrasts = self.calib_data.get('contrast_scores', [])
        if contrasts:
            ax2.hist(contrasts, bins=30, color='lightcoral', alpha=0.7, edgecolor='black')
            ax2.set_title('Contrast Distribution (QC)', fontsize=12, fontweight='bold')
            ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(f"{self.figures_dir}/02_quality_metrics.png", dpi=300)
        plt.close()
    
    def fig03_qc_summary(self):
        log_msg("[3/3] QC Summary...")
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
        
        success = (self.qc_df['status'] == 'ok').sum()
        fail = (self.qc_df['status'] == 'fail').sum()
        
        ax1.pie([success, fail], labels=[f'Success\n{success}', f'Fail\n{fail}'],
               autopct='%1.1f%%', colors=['#2ecc71', '#e74c3c'], startangle=90)
        ax1.set_title('Processing Success', fontsize=12, fontweight='bold')
        
        # Feature counts
        radiomics_count = len([c for c in self.features_df.columns if 'original' in c.lower()])
        med3d_count = 516  # 512 embedding + 4 stats
        
        ax2.bar(['PyRadiomics', 'Med3D'], [radiomics_count, med3d_count],
               color=['#3498db', '#e74c3c'], edgecolor='black', linewidth=2)
        ax2.set_ylabel('Feature Count', fontsize=11, fontweight='bold')
        ax2.set_title('Feature Methods', fontsize=12, fontweight='bold')
        ax2.grid(True, alpha=0.3, axis='y')
        
        plt.tight_layout()
        plt.savefig(f"{self.figures_dir}/03_qc_summary.png", dpi=300)
        plt.close()
    
    def save_report(self):
        success = (self.qc_df['status'] == 'ok').sum()
        
        report = f"""
MRI PIPELINE VALIDATION REPORT - CORRECTED VERSION

OUTPUT: {self.figures_dir}/

GENERATED FIGURES:
  01. 01_patch_size.png - Patch size optimization
  02. 02_quality_metrics.png - QC metrics (SNR, Contrast)
  03. 03_qc_summary.png - Processing summary

KEY STATISTICS:
  ‚Ä¢ Patients: {len(self.qc_df)}
  ‚Ä¢ Success: {success} ({success/len(self.qc_df)*100:.1f}%)
  ‚Ä¢ PyRadiomics features: ~{len([c for c in self.features_df.columns if 'original' in c.lower()])}
  ‚Ä¢ Med3D features: 512-dim embedding + 4 stats

CRITICAL FIXES APPLIED:
  ‚úÖ FIX #1: Pretrained Med3D weights support
  ‚úÖ FIX #2: Z-score normalization (robust, scanner-independent)
  ‚úÖ FIX #3: Full 512-dim embeddings (not collapsed to 4 numbers)
  ‚úÖ FIX #4: QC thresholds as heuristics only (not for normalization)
  ‚úÖ FIX #5: No hardcoded intensity thresholds

IMPORTANT NOTES:
  ‚Ä¢ Download pretrained weights from: https://github.com/Tencent/MedicalNet
  ‚Ä¢ QC thresholds are scanner-specific - use for quality checks only
  ‚Ä¢ Med3D requires pretrained weights to produce valid features
  ‚Ä¢ Full 512-dim embeddings saved for downstream analysis
"""
        
        log_msg(report)
        with open(f"{self.figures_dir}/VALIDATION_REPORT.txt", 'w') as f:
            f.write(report)

# ============================================================
# MAIN PIPELINE
# ============================================================
def main():
    patient_dirs = [os.path.join(MRI_DIR, d) for d in os.listdir(MRI_DIR) 
                   if os.path.isdir(os.path.join(MRI_DIR, d))]
    
    if len(patient_dirs) == 0:
        log_msg("‚ùå No patient directories found!")
        return
    
    np.random.shuffle(patient_dirs)
    
    # 15% for calibration
    n_calib = max(1, int(0.15 * len(patient_dirs)))
    cal_dirs = patient_dirs[:n_calib]
    proc_dirs = patient_dirs
    
    log_msg("\n" + "="*80)
    log_msg("STEP 1: CALIBRATION")
    log_msg("="*80)
    log_msg(f"Calibration: {n_calib} patients")
    log_msg(f"Processing: {len(proc_dirs)} patients\n")
    
    # Optimize
    opt = MRIOptimizer(cal_dirs)
    patch_size = opt.optimize_patch_size()
    snr_th, contrast_th = opt.optimize_quality_metrics()
    opt.save(OUTPUT_DIR)
    
    params = {
        'patch_size': patch_size,
        'qc_snr': snr_th,
        'qc_contrast': contrast_th
    }
    
    with open(f"{OUTPUT_DIR}/params.json", 'w') as f:
        json.dump(params, f, indent=2)
    
    # Initialize extractors
    pyrad = PyRadiomicsExtractor()
    med3d = Med3DExtractor(patch_size=patch_size, use_pretrained=True)
    loader = MRILoader()
    
    log_msg("\n" + "="*80)
    log_msg("STEP 2: FEATURE EXTRACTION")
    log_msg("="*80 + "\n")
    
    all_rows = []
    qc_rows = []
    
    for i, patient_dir in enumerate(proc_dirs, 1):
        try:
            patient_id = os.path.basename(patient_dir)
            log_msg(f"[{i}/{len(proc_dirs)}] {patient_id}")
            
            scans = loader.get_patient_scans(patient_dir)
            
            if not scans:
                log_msg(f"  ‚ùå No scans found")
                qc_rows.append({'patient': patient_id, 'status': 'fail', 'primary_modality': 'none'})
                continue
            
            # Primary modality
            primary_modality = 't1ce' if 't1ce' in scans else ('flair' if 'flair' in scans else None)
            
            if primary_modality is None:
                log_msg(f"  ‚ùå No T1CE or FLAIR")
                qc_rows.append({'patient': patient_id, 'status': 'fail', 'primary_modality': 'none'})
                continue
            
            row = {'patient': patient_id}
            
            # PyRadiomics
            log_msg(f"  ‚Üí PyRadiomics ({primary_modality.upper()})...")
            pyrad_feats = pyrad.extract(scans[primary_modality], 
                                       scans.get('seg', scans.get('core', None)))
            
            if pyrad_feats:
                row.update(pyrad_feats)
                log_msg(f"    ‚úÖ {len(pyrad_feats)} features")
            
            # Med3D
            log_msg(f"  ‚Üí Med3D...")
            volume = loader.load_nifti_as_numpy(scans[primary_modality])
            
            if volume is not None:
                med3d_feats = med3d.extract(volume)
                
                if med3d_feats:
                    # Save summary stats in main CSV
                    row['med3d_mean'] = med3d_feats['med3d_mean']
                    row['med3d_std'] = med3d_feats['med3d_std']
                    row['med3d_max'] = med3d_feats['med3d_max']
                    row['med3d_min'] = med3d_feats['med3d_min']
                    row['med3d_l2norm'] = med3d_feats['med3d_l2norm']
                    row['med3d_pretrained'] = med3d_feats['is_pretrained']
                    
                    # Save FULL embedding separately
                    embedding = {
                        'patient': patient_id,
                        'embedding': med3d_feats['med3d_embedding'],
                        'is_pretrained': med3d_feats['is_pretrained']
                    }
                    
                    with open(f"{OUTPUT_DIR}/embeddings/{patient_id}_med3d.json", 'w') as f:
                        json.dump(embedding, f)
                    
                    log_msg(f"    ‚úÖ Med3D: 512-dim embedding saved")
            
            all_rows.append(row)
            qc_rows.append({'patient': patient_id, 'status': 'ok', 'primary_modality': primary_modality})
            
            log_msg(f"  ‚úÖ Total: {len(row)-1} features")
            
        except Exception as e:
            log_msg(f"  ‚ùå Error: {e}")
            traceback.print_exc()
            qc_rows.append({'patient': os.path.basename(patient_dir), 'status': 'fail', 
                          'primary_modality': 'error'})
            continue
    
    # Save results
    log_msg("\n" + "="*80)
    log_msg("STEP 3: SAVING RESULTS")
    log_msg("="*80 + "\n")
    
    # Create embeddings directory
    Path(f"{OUTPUT_DIR}/embeddings").mkdir(exist_ok=True)
    
    if all_rows:
        df = pd.DataFrame(all_rows)
        df.to_csv(f"{OUTPUT_DIR}/all_features.csv", index=False)
        log_msg(f"‚úÖ Features: {len(df)} patients √ó {len(df.columns)-1} features")
        log_msg(f"   Main CSV: {OUTPUT_DIR}/all_features.csv")
        log_msg(f"   Full embeddings: {OUTPUT_DIR}/embeddings/")
    else:
        log_msg("‚ùå No patients processed!")
        return
    
    # QC
    qc_df = pd.DataFrame(qc_rows)
    qc_df.to_csv(f"{OUTPUT_DIR}/qc.csv", index=False)
    log_msg(f"‚úÖ QC: {OUTPUT_DIR}/qc.csv")
    
    # Validation figures
    log_msg("\n" + "="*80)
    log_msg("STEP 4: VALIDATION FIGURES")
    log_msg("="*80 + "\n")
    
    validator = MRIValidationFigures(
        OUTPUT_DIR,
        FIGURES_DIR,
        opt.results,
        params,
        opt.calibration_data,
        df,
        qc_df
    )
    validator.generate_all()
    
    log_msg("\n" + "="*80)
    log_msg("‚úÖ PIPELINE COMPLETE - ALL FIXES APPLIED")
    log_msg("="*80)
    log_msg("\nCRITICAL REMINDERS:")
    log_msg("  1. Download pretrained Med3D weights if not present")
    log_msg("  2. Full 512-dim embeddings saved in embeddings/")
    log_msg("  3. QC thresholds are heuristics only - not for normalization")
    log_msg("  4. Z-score normalization applied in Med3D preprocessing")
    log_msg("  5. PyRadiomics handles normalization internally")
    log_msg("\nFIXES APPLIED:")
    log_msg("  ‚úÖ Pretrained weight loading (with fallback warning)")
    log_msg("  ‚úÖ Z-score normalization (scanner-independent)")
    log_msg("  ‚úÖ Full 512-dim embeddings (not collapsed)")
    log_msg("  ‚úÖ QC thresholds as heuristics only")
    log_msg("  ‚úÖ Proper normalization handling")
    log_msg(f"\nOutput: {OUTPUT_DIR}")
    log_msg(f"Figures: {FIGURES_DIR}")
    log_msg("="*80 + "\n")

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'radiomics.featureextractor'

In [None]:
# ============================================================
# PYRADIOMICS INSTALLATION FIX - Windows Solutions
# ============================================================

import subprocess
import sys
import platform

def run_command(cmd):
    """Run a command and return success status"""
    try:
        print(f"Running: {cmd}")
        subprocess.check_call(cmd, shell=True)
        return True
    except Exception as e:
        print(f"‚ùå Failed: {e}")
        return False

def method_1_prebuilt_wheel():
    """Method 1: Try pre-built wheel (fastest)"""
    print("\n" + "="*80)
    print("METHOD 1: Installing pre-built PyRadiomics wheel")
    print("="*80 + "\n")
    
    commands = [
        # Upgrade pip first
        f"{sys.executable} -m pip install --upgrade pip setuptools wheel",
        
        # Try pre-built wheel only
        f"{sys.executable} -m pip install --only-binary :all: pyradiomics",
    ]
    
    for cmd in commands:
        if not run_command(cmd):
            return False
    return True

def method_2_upgrade_tools():
    """Method 2: Upgrade build tools and retry"""
    print("\n" + "="*80)
    print("METHOD 2: Upgrading build tools and retrying")
    print("="*80 + "\n")
    
    commands = [
        f"{sys.executable} -m pip install --upgrade pip setuptools wheel",
        f"{sys.executable} -m pip install --upgrade numpy Cython",
        f"{sys.executable} -m pip install pyradiomics --no-cache-dir",
    ]
    
    for cmd in commands:
        if not run_command(cmd):
            return False
    return True

def method_3_conda():
    """Method 3: Try conda installation"""
    print("\n" + "="*80)
    print("METHOD 3: Installing via conda-forge")
    print("="*80 + "\n")
    
    commands = [
        "conda install -c conda-forge pyradiomics -y",
    ]
    
    for cmd in commands:
        if not run_command(cmd):
            return False
    return True

def method_4_from_source():
    """Method 4: Install from source with specific flags"""
    print("\n" + "="*80)
    print("METHOD 4: Building from source")
    print("="*80 + "\n")
    
    commands = [
        f"{sys.executable} -m pip install --upgrade pip wheel setuptools",
        f"{sys.executable} -m pip install numpy Cython six",
        f"{sys.executable} -m pip install pyradiomics --no-build-isolation",
    ]
    
    for cmd in commands:
        if not run_command(cmd):
            return False
    return True

def verify_installation():
    """Verify PyRadiomics is installed correctly"""
    print("\n" + "="*80)
    print("VERIFYING INSTALLATION")
    print("="*80 + "\n")
    
    try:
        from radiomics import featureextractor
        print("‚úÖ PyRadiomics imported successfully!")
        
        # Try to create extractor
        extractor = featureextractor.RadiomicsFeatureExtractor()
        print("‚úÖ RadiomicsFeatureExtractor created successfully!")
        
        # Get version
        import radiomics
        print(f"‚úÖ PyRadiomics version: {radiomics.__version__}")
        
        return True
    except Exception as e:
        print(f"‚ùå Verification failed: {e}")
        return False

def main():
    print("="*80)
    print("PYRADIOMICS INSTALLATION FIX FOR WINDOWS")
    print("="*80)
    print(f"\nPython: {sys.executable}")
    print(f"Platform: {platform.platform()}")
    print(f"Python version: {sys.version}")
    
    # Try methods in order
    methods = [
        ("Pre-built wheel (recommended)", method_1_prebuilt_wheel),
        ("Upgrade tools and retry", method_2_upgrade_tools),
        ("Conda installation", method_3_conda),
        ("Build from source", method_4_from_source),
    ]
    
    for method_name, method_func in methods:
        try:
            print(f"\n{'='*80}")
            print(f"TRYING: {method_name}")
            print('='*80)
            
            if method_func():
                if verify_installation():
                    print("\n" + "="*80)
                    print(f"‚úÖ SUCCESS! PyRadiomics installed via: {method_name}")
                    print("="*80)
                    return
        except Exception as e:
            print(f"Method failed with error: {e}")
            continue
    
    # If all methods failed
    print("\n" + "="*80)
    print("‚ùå ALL METHODS FAILED")
    print("="*80)
    print("\nMANUAL SOLUTIONS:")
    print("\n1. Install Visual C++ Build Tools:")
    print("   https://visualstudio.microsoft.com/visual-cpp-build-tools/")
    print("   Then retry: pip install pyradiomics")
    
    print("\n2. Use Anaconda/Miniconda:")
    print("   conda create -n mri python=3.9")
    print("   conda activate mri")
    print("   conda install -c conda-forge pyradiomics")
    
    print("\n3. Use WSL (Windows Subsystem for Linux):")
    print("   Install Ubuntu from Microsoft Store")
   
    
    print("\n4. Use Docker:")
    print("   docker pull radiomics/pyradiomics")
    
    print("\n" + "="*80)

if __name__ == "__main__":
    main()

PYRADIOMICS INSTALLATION FIX FOR WINDOWS

Python: d:\paper\venv\python.exe
Platform: Windows-10-10.0.26100-SP0
Python version: 3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:15:49) [MSC v.1941 64 bit (AMD64)]

TRYING: Pre-built wheel (recommended)

METHOD 1: Installing pre-built PyRadiomics wheel

Running: d:\paper\venv\python.exe -m pip install --upgrade pip setuptools wheel
Running: d:\paper\venv\python.exe -m pip install --only-binary :all: pyradiomics
‚ùå Failed: Command 'd:\paper\venv\python.exe -m pip install --only-binary :all: pyradiomics' returned non-zero exit status 1.

TRYING: Upgrade tools and retry

METHOD 2: Upgrading build tools and retrying

Running: d:\paper\venv\python.exe -m pip install --upgrade pip setuptools wheel
Running: d:\paper\venv\python.exe -m pip install --upgrade numpy Cython
Running: d:\paper\venv\python.exe -m pip install pyradiomics --no-cache-dir
‚ùå Failed: Command 'd:\paper\venv\python.exe -m pip install pyradiomics --no-cache-dir' retu