Progresive Training

In [1]:
# Full code with minimal changes to SimpleTrainer to use LLL (Laplace Log Likelihood) and MAE as main metrics
# and modify early stopping to consider both metrics.
# All other parts including image processing and data loading remain unchanged.

import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import json
from pathlib import Path
import warnings
import pickle

# Albumentations for medical augmentations
import albumentations as albu
from albumentations.pytorch import ToTensorV2

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    """Ensure reproducibility across all random operations"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
TEST_DIR = DATA_DIR / "test"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("🚀 DenseNet V2 - Enhanced Medical Imaging Model")
print("=" * 60)
print(f"📱 Device: {DEVICE}")
if torch.cuda.is_available():
    print(f"🔥 GPU: {torch.cuda.get_device_name()}")
    print(f"💾 Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
print("=" * 60)

def quick_recovery():
    """Quick recovery of auto-saved data after kernel restart"""
    global train_df, A, TAB, P, train_patients, val_patients
    
    print("🔄 QUICK RECOVERY MODE")
    print("=" * 40)
    
    # Check Kaggle vs local environment
    if os.path.exists('/kaggle/working/auto_save_data'):
        auto_save_dir = "/kaggle/working/auto_save_data"
        print("🐰 Using Kaggle persistent auto-save data")
    elif os.path.exists('auto_save_data'):
        auto_save_dir = "auto_save_data"
        print("🏠 Using local auto-save data")
    else:
        print("❌ No auto-saved data found. Run full notebook first.")
        return False
    
    try:
        # Load core data
        print("📊 Loading core data...")
        train_df = pd.read_csv(f"{auto_save_dir}/train_df_backup.csv")
        
        with open(f"{auto_save_dir}/decay_coefficients_A_backup.pkl", 'rb') as f:
            A = pickle.load(f)
        
        with open(f"{auto_save_dir}/tabular_features_TAB_backup.pkl", 'rb') as f:
            TAB = pickle.load(f)
        
        with open(f"{auto_save_dir}/patient_list_P_backup.pkl", 'rb') as f:
            P = pickle.load(f)

        print(f"✅ Loaded: train_df ({train_df.shape}), A ({len(A)}), TAB ({len(TAB)}), P ({len(P)})")

        # Load splits if available
        if os.path.exists(f"{auto_save_dir}/train_patients_backup.pkl"):
            print("🔄 Loading train/val splits...")
            
            with open(f"{auto_save_dir}/train_patients_backup.pkl", 'rb') as f:
                train_patients = pickle.load(f)
            
            with open(f"{auto_save_dir}/val_patients_backup.pkl", 'rb') as f:
                val_patients = pickle.load(f)

            print(f"✅ Loaded: train_patients ({len(train_patients)}), val_patients ({len(val_patients)})")

        # Show metadata
        if os.path.exists(f"{auto_save_dir}/processing_metadata.json"):
            with open(f"{auto_save_dir}/processing_metadata.json", 'r') as f:
                metadata = json.load(f)
            print(f"📅 Data from: {metadata.get('processing_timestamp', 'Unknown')}")

        # Load model if available
        if os.path.exists(f"{auto_save_dir}/model_weights_backup.pth"):
            print("🏗️ Loading model...")
            try:
                global model
                model = WorkingDenseNetModel(tabular_dim=4).to(DEVICE)
                model.load_state_dict(torch.load(f"{auto_save_dir}/model_weights_backup.pth", map_location=DEVICE))
                print("✅ Model weights loaded")
            except:
                print("⚠️ Model loading failed (need to run model definition cells first)")

        # Show training results if available
        if os.path.exists(f"{auto_save_dir}/training_results_backup.json"):
            with open(f"{auto_save_dir}/training_results_backup.json", 'r') as f:
                results = json.load(f)
            print(f"📈 Previous training: MAE = {results.get('best_val_mae', 'N/A')}")

        print("🎉 Quick recovery complete! Core variables restored.")
        print("💡 Tip: If model loading failed, run model definition cells first, then call quick_recovery() again")
        return True
    
    except Exception as e:
        print(f"❌ Recovery failed: {e}")
        return False

print("✅ Quick recovery system ready!")
print("💡 Usage after kernel restart:")
print(" quick_recovery() # Restore all auto-saved data")
# quick_recovery()

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_tab_features(df_row):
    """Extract tabular features (returns 4 features)"""
    vector = [(df_row['Age'] - 30) / 30] 
    
    # Sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(0)
    else:
        vector.append(1)
    
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([1, 1])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 1])
    else:
        vector.extend([1, 0])
    return np.array(vector)

A = {} 
TAB = {} 
P = []

print("Calculating linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy()
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) > 1:
        c = np.vstack([weeks, np.ones(len(weeks))]).T
        try:
            a, b = np.linalg.lstsq(c, fvc, rcond=None)[0]
            A[patient] = a
            TAB[patient] = get_tab_features(sub.iloc[0])
            P.append(patient)
        except:
            A[patient] = (fvc[-1] - fvc[0]) / (weeks[-1] - weeks[0]) if len(weeks) > 1 else 0.0
            TAB[patient] = get_tab_features(sub.iloc[0])
            P.append(patient)
    else:
        A[patient] = 0.0
        TAB[patient] = get_tab_features(sub.iloc[0])
        P.append(patient)

print(f"Processed {len(P)} patients with decay coefficients")

print("💾 Auto-saving critical data...")
if os.path.exists('/kaggle/working'):
    auto_save_dir = "/kaggle/working/auto_save_data"
    print("🐰 Using Kaggle persistent storage")
else:
    auto_save_dir = "auto_save_data"
    print("🏠 Using local storage")

os.makedirs(auto_save_dir, exist_ok=True)

try:
    train_df.to_csv(f"{auto_save_dir}/train_df_backup.csv", index=False)
    
    with open(f"{auto_save_dir}/decay_coefficients_A_backup.pkl", 'wb') as f:
        pickle.dump(A, f)
    
    with open(f"{auto_save_dir}/tabular_features_TAB_backup.pkl", 'wb') as f:
        pickle.dump(TAB, f)
    
    with open(f"{auto_save_dir}/patient_list_P_backup.pkl", 'wb') as f:
        pickle.dump(P, f)
    
    metadata = {
        'processed_patients': len(P),
        'total_decay_coefficients': len(A),
        'tabular_features_dim': len(list(TAB.values())[0]) if TAB else 0,
        'processing_timestamp': str(pd.Timestamp.now())
    }
    
    with open(f"{auto_save_dir}/processing_metadata.json", 'w') as f:
        json.dump(metadata, f, indent=2)
    
    print(f"✅ Auto-saved to {auto_save_dir}/")
    print(" - train_df_backup.csv")
    print(" - decay_coefficients_A_backup.pkl") 
    print(" - tabular_features_TAB_backup.pkl")
    print(" - patient_list_P_backup.pkl")
    print(" - processing_metadata.json")
except Exception as e:
    print(f"⚠️ Auto-save failed: {e}")

class MedicalAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=15, p=0.7),
                albu.HorizontalFlip(p=0.5),
                albu.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.7),
                albu.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.7),
                albu.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
                albu.RandomGamma(gamma_limit=(80, 120), p=0.5),
                albu.GridDistortion(num_steps=5, distort_limit=0.3, p=0.3),
                albu.OpticalDistortion(distort_limit=0.3, shift_limit=0.3, p=0.3),
                albu.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])

    def __call__(self, image):
        return self.transform(image=image)['image']

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_cat = torch.cat([avg_out, max_out], dim=1)
        x_cat = self.conv1(x_cat)
        return x * self.sigmoid(x_cat)

class OSICDenseNetDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train', augment=True):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augment = augment
        self.augmentor = MedicalAugmentation(augment=augment)
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 6
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        if len(available_images) > 1:
            selected_image = np.random.choice(available_images)
        else:
            selected_image = available_images[0]
        img = self.load_and_preprocess_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_and_preprocess_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img, (512, 512))
            
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading DICOM {path}: {e}")
            return np.zeros((512, 512, 3), dtype=np.uint8)

class WorkingDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=4, dropout_rate=0.4):
        super(WorkingDenseNetModel, self).__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        self.spatial_attention = SpatialAttention()
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU()
        )
        self.cross_attention = nn.MultiheadAttention(
            embed_dim=1024, num_heads=8, dropout=0.2, batch_first=True
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 512, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(768, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate/2)
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
    def forward(self, images, tabular):
        batch_size = images.size(0)
        img_features = self.features(images)
        img_features = self.spatial_attention(img_features)
        img_features = F.adaptive_avg_pool2d(img_features, (1, 1))
        img_features = img_features.view(batch_size, -1)
        tab_features = self.tabular_processor(tabular)
        img_expanded = img_features.unsqueeze(1)
        tab_expanded = tab_features.unsqueeze(1)
        tab_proj = F.linear(tab_expanded, torch.randn(1024, 512).to(images.device))
        attended_img, _ = self.cross_attention(img_expanded, tab_proj, tab_proj)
        attended_img = attended_img.squeeze(1)
        combined_features = torch.cat([attended_img, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        return mean_pred.squeeze(), log_var.squeeze()

print("✅ CORRECTED Working model defined!")

print("🔄 Creating data loaders...")

patients_list = list(P)
train_patients, val_patients = train_test_split(
    patients_list,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

print(f"Train patients: {len(train_patients)}")
print(f"Validation patients: {len(val_patients)}")

train_dataset = OSICDenseNetDataset(
    patients=train_patients,
    A_dict=A,
    TAB_dict=TAB,
    data_dir=TRAIN_DIR,
    split='train',
    augment=True
)

val_dataset = OSICDenseNetDataset(
    patients=val_patients,
    A_dict=A,
    TAB_dict=TAB,
    data_dir=TRAIN_DIR,
    split='val',
    augment=False
)

train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
    drop_last=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
    drop_last=False
)

print(f"✅ Data loaders created!")
print(f" Train batches: {len(train_loader)}")
print(f" Val batches: {len(val_loader)}")

# ----------------- LAPLACE LOG LIKELIHOOD METRIC FUNCTION ---------------- #
def laplace_log_likelihood_metric(pred_mean, pred_log_var, targets):
    """
    pred_mean: np.ndarray or torch.tensor
    pred_log_var: np.ndarray or torch.tensor
    targets: np.ndarray or torch.tensor
    returns: LLL (higher better, so often you report minus LLL as loss)
    """
    if isinstance(pred_mean, torch.Tensor):
        pred_mean = pred_mean.detach().cpu().numpy()
    if isinstance(pred_log_var, torch.Tensor):
        pred_log_var = pred_log_var.detach().cpu().numpy()
    if isinstance(targets, torch.Tensor):
        targets = targets.detach().cpu().numpy()
    
    sigma = np.sqrt(np.exp(pred_log_var))
    sigma = np.clip(sigma, a_min=1e-3, a_max=1000)  # Stabilize
    delta = np.abs(pred_mean - targets)
    score = -np.log(2 * sigma) - delta / sigma
    lll = np.mean(score)
    return lll
# ------------------------------------------------------------------------- #

class SimpleTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')  # LLL: higher better
    
    def uncertainty_loss(self, mean_pred, log_var, targets, reduction='mean'):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        loss = 0.5 * (mse_loss / var + log_var)
        if reduction == 'mean':
            return loss.mean()
        return loss.sum()
    
    def train(self, train_loader, val_loader, epochs=30, patience=8):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=4, verbose=True
        )
        patience_counter = 0
        lambda_lll = 0.8  # Weight for combining LLL loss and MAE loss

        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_mae = 0.0
            train_batches = 0
            train_lll_scores = []  # Laplace Log Likelihood
            
            for batch_idx, (images, tabular, targets, _) in enumerate(train_loader):
                try:
                    images = images.to(self.device)
                    tabular = tabular.to(self.device) 
                    targets = targets.to(self.device)
                    optimizer.zero_grad()
                    mean_pred, log_var = self.model(images, tabular)
                    loss_lll = self.uncertainty_loss(mean_pred, log_var, targets)
                    loss_mae = F.l1_loss(mean_pred, targets)
                    # Combined loss: weighted sum
                    loss = lambda_lll * loss_lll + (1 - lambda_lll) * loss_mae
                    mae = loss_mae
                    lll = laplace_log_likelihood_metric(mean_pred, log_var, targets)
                    train_lll_scores.append(lll)
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                    optimizer.step()
                    train_loss += loss.item()
                    train_mae += mae.item()
                    train_batches += 1
                except Exception as e:
                    print(f"Error in training batch {batch_idx}: {e}")
                    continue
            
            self.model.eval()
            val_loss = 0.0
            val_mae = 0.0
            val_predictions = []
            val_targets = []
            val_pred_log_vars = []
            val_lll_scores = []  # Laplace Log Likelihood
            
            with torch.no_grad():
                for batch_idx, (images, tabular, targets, _) in enumerate(val_loader):
                    try:
                        images = images.to(self.device)
                        tabular = tabular.to(self.device)
                        targets = targets.to(self.device)
                        mean_pred, log_var = self.model(images, tabular)
                        loss_lll = self.uncertainty_loss(mean_pred, log_var, targets)
                        loss_mae = F.l1_loss(mean_pred, targets)
                        loss = lambda_lll * loss_lll + (1 - lambda_lll) * loss_mae
                        mae = loss_mae
                        val_loss += loss.item()
                        val_mae += mae.item()
                        val_predictions.extend(mean_pred.cpu().numpy())
                        val_targets.extend(targets.cpu().numpy())
                        val_pred_log_vars.extend(log_var.cpu().numpy())
                        lll = laplace_log_likelihood_metric(mean_pred, log_var, targets)
                        val_lll_scores.append(lll)
                    except Exception as e:
                        print(f"Error in validation batch {batch_idx}: {e}")
                        continue
            
            if train_batches > 0 and len(val_predictions) > 0:
                avg_train_loss = train_loss / train_batches
                avg_train_mae = train_mae / train_batches
                avg_val_loss = val_loss / len(val_loader)
                avg_val_mae = val_mae / len(val_loader)
                
                val_predictions = np.array(val_predictions)
                val_targets = np.array(val_targets)
                val_pred_log_vars = np.array(val_pred_log_vars)
                
                val_rmse = np.sqrt(np.mean((val_predictions - val_targets) ** 2))
                ss_res = np.sum((val_targets - val_predictions) ** 2)
                ss_tot = np.sum((val_targets - np.mean(val_targets)) ** 2)
                r2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else -float('inf')
                
                avg_train_lll = np.mean(train_lll_scores) if len(train_lll_scores) > 0 else 0.
                avg_val_lll = np.mean(val_lll_scores) if len(val_lll_scores) > 0 else 0.
                overall_val_lll = laplace_log_likelihood_metric(val_predictions, val_pred_log_vars, val_targets)

                print(f"Epoch {epoch+1}/{epochs}")
                print(f"Train Loss: {avg_train_loss:.6f}, MAE: {avg_train_mae:.6f}, LLL: {avg_train_lll:.6f}")
                print(f"Val Loss: {avg_val_loss:.6f}, MAE: {avg_val_mae:.6f}, RMSE: {val_rmse:.6f}, R²: {r2:.6f}, LLL: {avg_val_lll:.6f}, Overall LLL: {overall_val_lll:.6f}")
                
                scheduler.step(avg_val_mae)  # Scheduler monitors MAE
                
                # Early stopping logic: seek to maximize LLL and minimize MAE
                if avg_val_lll > self.best_val_lll or avg_val_mae < self.best_val_mae:
                    self.best_val_lll = max(self.best_val_lll, avg_val_lll)
                    self.best_val_mae = min(self.best_val_mae, avg_val_mae)
                    torch.save(self.model.state_dict(), 'best_working_model.pth')
                    print("✅ New best model saved!")
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping at epoch {epoch+1}")
                        break
            else:
                print(f"No valid training or validation batches in epoch {epoch+1}")
            print("-" * 60)
        return self.best_val_mae, self.best_val_lll

print("✅ Simple trainer defined!")

print("🔄 Replacing with CORRECTED working model...")

if 'model' in globals():
    del model
torch.cuda.empty_cache()

model = WorkingDenseNetModel(tabular_dim=4).to(DEVICE)
print(f"✅ Corrected model initialized!")
print(f"📊 Total parameters: {sum(p.numel() for p in model.parameters()):,}")

try:
    if 'train_loader' in globals():
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images = images.to(DEVICE)
        tabular = tabular.to(DEVICE)
        print(f"🔍 Input shapes:")
        print(f" Images: {images.shape}")
        print(f" Tabular: {tabular.shape}")
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print(f"✅ Model forward pass successful!")
        print(f" Mean prediction: {mean_pred.shape} - {mean_pred[:3]}")
        print(f" Log variance: {log_var.shape} - {log_var[:3]}")
    else:
        print("⚠️ Data loaders not found, creating dummy test...")
        dummy_images = torch.randn(2, 3, 512, 512).to(DEVICE)
        dummy_tabular = torch.randn(2, 4).to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(dummy_images, dummy_tabular)
        print(f"✅ Model forward pass successful with dummy data!")
        print(f" Mean prediction: {mean_pred.shape} - {mean_pred}")
        print(f" Log variance: {log_var.shape} - {log_var}")
except Exception as e:
    print(f"❌ Model test failed: {e}")
    import traceback
    traceback.print_exc()

if 'model' in globals():
    print("🚀 Starting training with CORRECTED model...")
    trainer = SimpleTrainer(model, DEVICE, lr=1e-4)
    best_val_mae, best_val_lll = trainer.train(
        train_loader, 
        val_loader, 
        epochs=30,
        patience=8
    )
    print(f"🎯 Training completed! Best validation MAE: {best_val_mae:.6f}, Best LLL: {best_val_lll:.6f}")
else:
    print("❌ No model found. Run previous cells first!... ")

print("Starting Progressive Training...")
best_val_mae, best_val_lll = trainer.train(
    train_loader,
    val_loader,
    epochs=40,
    patience=10
)

print(f"🎯 Progressive training completed! Best validation MAE: {best_val_mae:.6f}, Best LLL: {best_val_lll:.6f}")

# End of code


🚀 DenseNet V2 - Enhanced Medical Imaging Model
📱 Device: cuda
🔥 GPU: Tesla P100-PCIE-16GB
💾 Memory: 17.1 GB
✅ Quick recovery system ready!
💡 Usage after kernel restart:
 quick_recovery() # Restore all auto-saved data
Loaded dataset with shape: (1549, 7)
Calculating linear decay coefficients...


100%|██████████| 176/176 [00:00<00:00, 882.90it/s]


Processed 176 patients with decay coefficients
💾 Auto-saving critical data...
🐰 Using Kaggle persistent storage
✅ Auto-saved to /kaggle/working/auto_save_data/
 - train_df_backup.csv
 - decay_coefficients_A_backup.pkl
 - tabular_features_TAB_backup.pkl
 - patient_list_P_backup.pkl
 - processing_metadata.json
✅ CORRECTED Working model defined!
🔄 Creating data loaders...
Train patients: 140
Validation patients: 36
Dataset train: 138 patients with images


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth


Dataset val: 36 patients with images
✅ Data loaders created!
 Train batches: 103
 Val batches: 5
✅ Simple trainer defined!
🔄 Replacing with CORRECTED working model...


100%|██████████| 30.8M/30.8M [00:00<00:00, 173MB/s]


✅ Corrected model initialized!
📊 Total parameters: 12,756,452
🔍 Input shapes:
 Images: torch.Size([8, 3, 512, 512])
 Tabular: torch.Size([8, 4])
✅ Model forward pass successful!
 Mean prediction: torch.Size([8]) - tensor([-0.4609, -0.1809, -0.1003], device='cuda:0')
 Log variance: torch.Size([8]) - tensor([ 0.1176,  0.0216, -0.1518], device='cuda:0')
🚀 Starting training with CORRECTED model...
Epoch 1/30
Train Loss: 11.720372, MAE: 4.886730, LLL: -4.667458
Val Loss: 11.632891, MAE: 5.662280, RMSE: 8.113307, R²: -0.272742, LLL: -4.719950, Overall LLL: -4.741514
✅ New best model saved!
------------------------------------------------------------
Epoch 2/30
Train Loss: 3.670003, MAE: 4.200726, LLL: -3.278303
Val Loss: 4.631873, MAE: 5.250366, RMSE: 7.581581, R²: -0.111384, LLL: -3.606204, Overall LLL: -3.623816
✅ New best model saved!
------------------------------------------------------------
Epoch 3/30
Train Loss: 2.811165, MAE: 4.089293, LLL: -3.136902
Val Loss: 3.578562, MAE: 5.11344

In [None]:
print("hi now 15")