**IMPORTS AND TPU SET UP**

In [None]:
import subprocess
import sys

def install_required_packages():
    packages = [
        'pydicom',
        'nibabel', 
        'opencv-python',
        'scikit-learn'
    ]
    
    for package in packages:
        try:
            __import__(package.replace('-', '_'))
            print(f"{package} already installed")
        except ImportError:
            print(f"Installing {package}...")
            try:
                subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '-q'])
                print(f"{package} installed successfully")
            except Exception as e:
                print(f"⚠ Failed to install {package}: {e}")

print("Checking and installing required packages...")
install_required_packages()

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pydicom
from pydicom.errors import InvalidDicomError
import nibabel as nib
import cv2
from scipy import ndimage
from tqdm.auto import tqdm
import warnings
import gc
import time
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')

# TPU-SPECIFIC SETUP (Fixed)
try:
    import torch_xla
    import torch_xla.core.xla_model as xm
    import torch_xla.distributed.parallel_loader as pl
    TPU_AVAILABLE = True
    print("TPU libraries loaded successfully")
except ImportError:
    TPU_AVAILABLE = False
    print("TPU libraries not available, falling back to GPU/CPU")


**TPU OPTMIZED CONFIGURATION AND TRAINING PIPELINE**

In [None]:
class Config:
    # Paths
    TRAIN_CSV_PATH = '/kaggle/input/rsna-intracranial-aneurysm-detection/train.csv'
    SERIES_DIR = '/kaggle/input/rsna-intracranial-aneurysm-detection/series/'
    
    # Model parameters (Fixed: More reasonable sizes)
    TARGET_SIZE = (64, 128, 128)  # Depth, Height, Width
    BATCH_SIZE = 2 if TPU_AVAILABLE else 4
    EPOCHS = 10
    LEARNING_RATE = 1e-4
    WEIGHT_DECAY = 1e-4
    
    # Device setup
    if TPU_AVAILABLE:
        DEVICE = xm.xla_device()
        print(f"Using TPU device: {DEVICE}")
    else:
        DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {DEVICE}")
    
    # Data parameters
    ID_COL = 'SeriesInstanceUID'
    TARGET_COL = 'Aneurysm Present'
    
    # Debug settings
    DEBUG_MODE = True
    DEBUG_SAMPLES = 200

# Fixed: Proper 3D CNN Architecture for Classification Only
class ImprovedAneurysmNet(nn.Module):
    def __init__(self, in_channels=1, num_classes=1, dropout_rate=0.3):
        super(ImprovedAneurysmNet, self).__init__()
        
        # Feature extraction layers
        self.features = nn.Sequential(
            # First block
            nn.Conv3d(in_channels, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(32),
            nn.ReLU(inplace=True),
            nn.Conv3d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=2, stride=2),
            nn.Dropout3d(dropout_rate * 0.5),
            
            # Second block
            nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True),
            nn.Conv3d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=2, stride=2),
            nn.Dropout3d(dropout_rate * 0.7),
            
            # Third block
            nn.Conv3d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(128),
            nn.ReLU(inplace=True),
            nn.Conv3d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool3d(kernel_size=2, stride=2),
            nn.Dropout3d(dropout_rate),
        )
        
        # Adaptive pooling to handle variable input sizes
        self.adaptive_pool = nn.AdaptiveAvgPool3d((2, 4, 4))
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(128 * 2 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.7),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.5),
            nn.Linear(128, num_classes)
        )
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm3d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        x = self.features(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x

# Fixed: Better DICOM processor
class DICOMProcessor:
    def __init__(self, target_size=None):
        self.target_size = target_size or Config.TARGET_SIZE
        self.max_slices = self.target_size[0]
        
    def load_dicom_series(self, series_path):
        try:
            if not os.path.exists(series_path):
                return self._get_dummy_volume()
                
            dicom_files = [f for f in os.listdir(series_path) if f.endswith('.dcm')]
            if not dicom_files:
                return self._get_dummy_volume()
            
            # Sort files by instance number or filename
            dicom_files.sort()
            
            # Select evenly distributed slices
            if len(dicom_files) > self.max_slices:
                indices = np.linspace(0, len(dicom_files)-1, self.max_slices, dtype=int)
                selected_files = [dicom_files[i] for i in indices]
            else:
                selected_files = dicom_files
            
            pixel_arrays = []
            target_shape = self.target_size[1:]  # H, W
            
            for file_name in selected_files:
                try:
                    ds = pydicom.dcmread(os.path.join(series_path, file_name), force=True)
                    
                    if hasattr(ds, 'pixel_array'):
                        arr = ds.pixel_array.astype(np.float32)
                        
                        if arr.ndim == 2:
                            # Resize to target shape
                            if arr.shape != target_shape:
                                arr = cv2.resize(arr, (target_shape[1], target_shape[0]), 
                                               interpolation=cv2.INTER_LINEAR)
                            pixel_arrays.append(arr)
                        elif arr.ndim == 3:
                            # Take middle slice for multi-slice files
                            middle_slice = arr[arr.shape[0] // 2]
                            if middle_slice.shape != target_shape:
                                middle_slice = cv2.resize(middle_slice, (target_shape[1], target_shape[0]), 
                                                        interpolation=cv2.INTER_LINEAR)
                            pixel_arrays.append(middle_slice)
                    
                    del ds
                    
                except Exception as e:
                    continue
            
            if not pixel_arrays:
                return self._get_dummy_volume()
            
            # Pad or truncate to exact number of slices
            while len(pixel_arrays) < self.max_slices:
                if pixel_arrays:
                    pixel_arrays.append(pixel_arrays[-1])  # Repeat last slice
                else:
                    pixel_arrays.append(np.zeros(target_shape, dtype=np.float32))
            
            if len(pixel_arrays) > self.max_slices:
                pixel_arrays = pixel_arrays[:self.max_slices]
            
            # Stack into volume
            volume = np.stack(pixel_arrays, axis=0).astype(np.float32)
            
            # Preprocessing
            volume = self._preprocess_volume(volume)
            
            return volume
            
        except Exception as e:
            print(f"Error processing {series_path}: {e}")
            return self._get_dummy_volume()
    
    def _get_dummy_volume(self):
        return np.random.normal(0.5, 0.1, self.target_size).astype(np.float32)
    
    def _preprocess_volume(self, volume):
        # Robust normalization using percentiles
        p1, p99 = np.percentile(volume, [1, 99])
        if p99 > p1:
            volume = np.clip(volume, p1, p99)
            volume = (volume - p1) / (p99 - p1)
        else:
            volume = np.zeros_like(volume)
        
        # Additional normalization
        volume = np.clip(volume, 0, 1).astype(np.float32)
        
        return volume

# Fixed: Improved Dataset
class AneurysmDataset(Dataset):
    def __init__(self, df, series_dir, processor, transform=None):
        self.df = df.copy().reset_index(drop=True)
        self.series_dir = series_dir
        self.processor = processor
        self.transform = transform
        
        print(f"Dataset created with {len(self.df)} samples")
        print(f"Positive cases: {self.df[Config.TARGET_COL].sum()}")
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        try:
            row = self.df.iloc[idx]
            series_id = row[Config.ID_COL]
            label = float(row[Config.TARGET_COL])
            
            series_path = os.path.join(self.series_dir, series_id)
            
            # Load volume
            volume = self.processor.load_dicom_series(series_path)
            
            # Add channel dimension
            volume_tensor = torch.from_numpy(volume).float().unsqueeze(0)
            label_tensor = torch.tensor(label, dtype=torch.float32)
            
            return {
                'volume': volume_tensor,
                'label': label_tensor,
                'series_id': series_id
            }
            
        except Exception as e:
            # Return dummy data on failure
            return {
                'volume': torch.zeros((1, *Config.TARGET_SIZE), dtype=torch.float32),
                'label': torch.tensor(0.0, dtype=torch.float32),
                'series_id': f"DUMMY_{idx}"
            }

# Fixed: Proper loss function with class balancing
class BalancedBCELoss(nn.Module):
    def __init__(self, pos_weight=None):
        super(BalancedBCELoss, self).__init__()
        self.pos_weight = pos_weight
        
    def forward(self, input, target):
        if self.pos_weight is not None:
            loss = nn.functional.binary_cross_entropy_with_logits(
                input.view(-1), target, pos_weight=self.pos_weight
            )
        else:
            loss = nn.functional.binary_cross_entropy_with_logits(
                input.view(-1), target
            )
        return loss

# Fixed: Training functions
def train_epoch(model, loader, optimizer, criterion, device, epoch):
    model.train()
    total_loss = 0
    num_batches = 0
    
    progress_bar = tqdm(
        enumerate(loader),
        total=len(loader),
        desc=f"Training Epoch {epoch+1}",
        leave=False
    )
    
    for batch_idx, batch in progress_bar:
        volume = batch['volume'].to(device)
        label = batch['label'].to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(volume)
        loss = criterion(outputs, label)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        # Optimizer step
        if TPU_AVAILABLE:
            xm.optimizer_step(optimizer)
        else:
            optimizer.step()
        
        total_loss += loss.item()
        num_batches += 1
        
        progress_bar.set_postfix({
            'Loss': f'{loss.item():.4f}',
            'Avg': f'{total_loss/num_batches:.4f}'
        })
        
        # TPU synchronization
        if TPU_AVAILABLE and batch_idx % 10 == 0:
            xm.mark_step()
    
    progress_bar.close()
    
    if TPU_AVAILABLE:
        xm.mark_step()
    
    return total_loss / max(num_batches, 1)

def validate_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    num_batches = 0
    all_preds = []
    all_labels = []
    all_probs = []
    
    progress_bar = tqdm(
        enumerate(loader),
        total=len(loader),
        desc="Validation",
        leave=False
    )
    
    with torch.no_grad():
        for batch_idx, batch in progress_bar:
            volume = batch['volume'].to(device)
            label = batch['label'].to(device)
            
            outputs = model(volume)
            loss = criterion(outputs, label)
            
            total_loss += loss.item()
            num_batches += 1
            
            # Get predictions
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)
            
            all_probs.extend(probs.flatten())
            all_preds.extend(preds.flatten())
            all_labels.extend(label.cpu().numpy())
            
            progress_bar.set_postfix({
                'Val Loss': f'{loss.item():.4f}',
                'Avg': f'{total_loss/num_batches:.4f}'
            })
    
    progress_bar.close()
    
    if TPU_AVAILABLE:
        xm.mark_step()
    
    avg_loss = total_loss / max(num_batches, 1)
    accuracy = accuracy_score(all_labels, all_preds) if len(all_preds) > 0 else 0
    
    return avg_loss, accuracy, all_preds, all_labels, all_probs

# Fixed: Main training function
def main_training():
    print("🧠 ANEURYSM DETECTION TRAINING")
    print("="*50)
    print(f"Device: {Config.DEVICE}")
    print(f"TPU Available: {TPU_AVAILABLE}")
    
    # Load data
    print("\n📊 Loading training data...")
    train_df = pd.read_csv(Config.TRAIN_CSV_PATH)
    
    if Config.DEBUG_MODE:
        train_df = train_df.head(Config.DEBUG_SAMPLES)
        print(f"🔍 Debug mode: using {len(train_df)} samples")
    
    print(f"Training samples: {len(train_df)}")
    print(f"Positive cases: {train_df[Config.TARGET_COL].sum()}")
    print(f"Negative cases: {len(train_df) - train_df[Config.TARGET_COL].sum()}")
    
    # Calculate class weights for balanced training
    pos_count = train_df[Config.TARGET_COL].sum()
    neg_count = len(train_df) - pos_count
    pos_weight = torch.tensor([neg_count / pos_count if pos_count > 0 else 1.0])
    print(f"Positive weight: {pos_weight.item():.2f}")
    
    # Train/validation split
    train_data, val_data = train_test_split(
        train_df, test_size=0.2, random_state=42, 
        stratify=train_df[Config.TARGET_COL]
    )
    
    print(f"📊 Train: {len(train_data)}, Val: {len(val_data)}")
    
    # Create datasets
    processor = DICOMProcessor()
    train_dataset = AneurysmDataset(train_data, Config.SERIES_DIR, processor)
    val_dataset = AneurysmDataset(val_data, Config.SERIES_DIR, processor)
    
    # Data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=Config.BATCH_SIZE,
        shuffle=True,
        num_workers=0,
        pin_memory=False,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=Config.BATCH_SIZE,
        shuffle=False,
        num_workers=0,
        pin_memory=False
    )
    
    print(f"📦 Train batches: {len(train_loader)}, Val batches: {len(val_loader)}")
    
    # Create model
    print("\n🤖 Creating model...")
    model = ImprovedAneurysmNet().to(Config.DEVICE)
    
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Model parameters: {total_params:,} (trainable: {trainable_params:,})")
    
    # Loss function and optimizer
    criterion = BalancedBCELoss(pos_weight=pos_weight.to(Config.DEVICE))
    optimizer = optim.AdamW(
        model.parameters(),
        lr=Config.LEARNING_RATE,
        weight_decay=Config.WEIGHT_DECAY
    )
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=3, verbose=True
    )
    
    # Training loop
    best_val_loss = float('inf')
    best_val_acc = 0
    patience_counter = 0
    patience_limit = 5
    
    print(f"\n🚀 Starting training for {Config.EPOCHS} epochs...")
    
    for epoch in range(Config.EPOCHS):
        print(f"\n{'='*15} EPOCH {epoch+1}/{Config.EPOCHS} {'='*15}")
        
        # Training
        train_loss = train_epoch(model, train_loader, optimizer, criterion, Config.DEVICE, epoch)
        
        # Validation
        val_loss, val_acc, val_preds, val_labels, val_probs = validate_epoch(
            model, val_loader, criterion, Config.DEVICE
        )
        
        # Update scheduler
        scheduler.step(val_loss)
        
        print(f"\n📊 Epoch {epoch+1} Results:")
        print(f"   Train Loss: {train_loss:.4f}")
        print(f"   Val Loss: {val_loss:.4f}")
        print(f"   Val Accuracy: {val_acc:.4f}")
        print(f"   LR: {optimizer.param_groups[0]['lr']:.2e}")
        
        # Calculate additional metrics
        if len(set(val_labels)) > 1 and len(val_probs) > 0:
            try:
                auc = roc_auc_score(val_labels, val_probs)
                print(f"   Val AUC: {auc:.4f}")
            except:
                pass
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_acc = val_acc
            patience_counter = 0
            
            checkpoint = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'train_loss': train_loss,
                'val_loss': val_loss,
                'val_accuracy': val_acc,
                'best_val_loss': best_val_loss
            }
            
            torch.save(checkpoint, 'tpu_aneurysm_best.pth')
            print(f"💾 Saved best model (val_loss: {val_loss:.4f}, acc: {val_acc:.4f})")
        else:
            patience_counter += 1
            print(f"⏳ Patience: {patience_counter}/{patience_limit}")
        
        # Early stopping
        if patience_counter >= patience_limit:
            print(f"ℹ️ Early stopping at epoch {epoch+1}")
            break
        
        # Memory cleanup
        if not TPU_AVAILABLE:
            torch.cuda.empty_cache()
        gc.collect()
    
    print(f"\n✅ Training completed!")
    print(f"🏆 Best validation loss: {best_val_loss:.4f}")
    print(f"🏆 Best validation accuracy: {best_val_acc:.4f}")
    
    return model, best_val_loss

In [None]:
# RUN TRAINING
print("Starting TPU-optimized training...")
model, best_loss = main_tpu_training()

if model is not None:
    print(f"\nTraining completed successfully!")
    print(f"Best validation loss: {best_loss:.4f}")
    print("Output files: tpu_aneurysm_final.pth, tpu_aneurysm_best.pth")
else:
    print("Training failed. Check error messages above.")

**Model Evaluation and Performance**

In [None]:
def evaluate_model(model_path='tpu_aneurysm_best.pth'):
    print("🔍 MODEL EVALUATION")
    print("="*50)
    
    # Load test data
    train_df = pd.read_csv(Config.TRAIN_CSV_PATH)
    test_size = min(100, len(train_df) // 5)
    test_df = train_df.sample(n=test_size, random_state=123).reset_index(drop=True)
    
    print(f"📊 Test set: {len(test_df)} samples")
    print(f"➕ Positive cases: {test_df[Config.TARGET_COL].sum()}")
    
    # Load model
    model = ImprovedAneurysmNet().to(Config.DEVICE)
    
    if os.path.exists(model_path):
        print(f"📁 Loading model from {model_path}")
        checkpoint = torch.load(model_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(Config.DEVICE)
        print(f"✅ Loaded model from epoch {checkpoint.get('epoch', 'unknown')}")
        print(f"🏆 Best val loss: {checkpoint.get('best_val_loss', 'unknown')}")
    else:
        print(f"⚠️ Model file not found, using random weights")
    
    # Create test dataset
    processor = DICOMProcessor()
    test_dataset = AneurysmDataset(test_df, Config.SERIES_DIR, processor)
    test_loader = DataLoader(
        test_dataset,
        batch_size=2,
        shuffle=False,
        num_workers=0,
        pin_memory=False
    )
    
    # Evaluation
    model.eval()
    predictions = []
    probabilities = []
    true_labels = []
    
    print("🔄 Running evaluation...")
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            volume = batch['volume'].to(Config.DEVICE)
            label = batch['label']
            
            outputs = model(volume)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)
            
            probabilities.extend(probs.flatten())
            predictions.extend(preds.flatten())
            true_labels.extend(label.numpy())
    
    # Calculate metrics
    predictions = np.array(predictions)
    probabilities = np.array(probabilities)
    true_labels = np.array(true_labels)
    
    if len(predictions) > 0:
        accuracy = accuracy_score(true_labels, predictions)
        
        print(f"\n📊 EVALUATION RESULTS")
        print(f"{'='*30}")
        print(f"🎯 Accuracy: {accuracy:.3f}")
        print(f"📈 Predictions made: {len(predictions)}")
        
        if len(np.unique(true_labels)) > 1:
            try:
                auc = roc_auc_score(true_labels, probabilities)
                print(f"📈 AUC-ROC: {auc:.3f}")
            except:
                print("⚠️ Could not calculate AUC")
        
        print(f"\n📋 Classification Report:")
        print(classification_report(true_labels, predictions, 
                                  target_names=['No Aneurysm', 'Aneurysm'],
                                  zero_division=0))
        
        # Sample predictions
        print(f"\n🔎 Sample Predictions:")
        sample_size = min(10, len(predictions))
        for i in range(sample_size):
            status = "✅" if predictions[i] == true_labels[i] else "❌"
            print(f"{status} True: {int(true_labels[i])}, "
                  f"Pred: {int(predictions[i])}, "
                  f"Prob: {probabilities[i]:.3f}")
    
    print(f"\n✅ Evaluation completed!")

In [None]:
print("\n" + "="*60)
print("🔍 RUNNING MODEL EVALUATION")
print("="*60)

eval_results = quick_tpu_evaluation()

https://symbolize.stripped_domain/r/?trace=7f4798bb7ee6,7f4798aeb04f&map= 
*** SIGTERM received by PID 10 (TID 10) on cpu 2 from PID 1; stack trace: ***
PC: @     0x7f4798bb7ee6  (unknown)  epoll_wait
    @     0x7f434efa9a01       1888  (unknown)
    @     0x7f4798aeb050  (unknown)  (unknown)
https://symbolize.stripped_domain/r/?trace=7f4798bb7ee6,7f434efa9a00,7f4798aeb04f&map= 
E0821 01:07:58.377615      10 coredump_hook.cc:247] RAW: Remote crash gathering disabled for SIGTERM.
