# 13th Oct

In [1]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5")
print("=" * 60)
print(f"üì± Device: {DEVICE}")

# Load Data
train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    """Optimized tabular features - simpler but more effective"""
    vector = []
    
    # Basic but effective features
    age = df_row['Age']
    vector.extend([
        (age - 50) / 30,  # Centered age
        age / 100,  # Scaled age
    ])
    
    # Simple sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(1.0)
    else:
        vector.append(0.0)
    
    # Simple smoking status
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1, 0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0, 1, 0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 0, 1])
    else:
        vector.extend([0, 0, 0])
    
    # FVC features
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,  # Normalized FVC
            (fvc - 2500) / 1000,  # Centered FVC
        ])
    
    # Percent predicted (approximate)
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        
        # Approximate percent predicted FVC
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112 * age) if age > 0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101 * age) if age > 0 else 0.8
            
        vector.append(min(pp_fvc, 2.0))  # Cap at 200%
    
    return np.array(vector)

def calculate_lll(actual, predicted, sigma):
    """Calculate Log Laplace Likelihood"""
    sigma = np.maximum(sigma, 1e-6)  # Avoid division by zero
    delta = np.abs(actual - predicted)
    return -np.sqrt(2) * delta / sigma - np.log(sigma * np.sqrt(2))

# Improved coefficient calculation
A = {} 
TAB = {} 
P = []

print("Calculating optimized linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) >= 2:
        try:
            # Simple robust slope calculation
            if len(weeks) == 2:
                slope = (fvc[1] - fvc[0]) / (weeks[1] - weeks[0])
            else:
                # Use Theil-Sen estimator for robustness
                slopes = []
                for i in range(len(weeks)):
                    for j in range(i+1, len(weeks)):
                        if weeks[j] != weeks[i]:
                            slope = (fvc[j] - fvc[i]) / (weeks[j] - weeks[i])
                            slopes.append(slope)
                slope = np.median(slopes) if slopes else 0.0
            
            A[patient] = slope
        except:
            A[patient] = 0.0
    else:
        A[patient] = 0.0
    
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients with optimized features")

# Analyze target distribution
decay_values = np.array(list(A.values()))
print(f"Target statistics: mean={decay_values.mean():.4f}, std={decay_values.std():.4f}")
print(f"Target range: [{decay_values.min():.4f}, {decay_values.max():.4f}]")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10, p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
    
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super(OptimizedDenseNetModel, self).__init__()
        
        # DenseNet121 backbone
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        
        # Freeze early layers, unfreeze later layers
        for i, param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100  # Only unfreeze later layers
        
        # Global pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Simple but effective tabular processor
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Feature fusion
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Output heads
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Tanh()  # Constrain output
        )
        
        # Initialize output layers for better convergence
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in [self.mean_head, self.log_var_head]:
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
    
    def forward(self, images, tabular):
        batch_size = images.size(0)
        
        # Extract image features
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(batch_size, -1)
        
        # Process tabular data
        tab_features = self.tabular_processor(tabular)
        
        # Feature fusion
        combined_features = torch.cat([img_features, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        
        # Predict mean and log variance
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        
        # Prepare image paths
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 8
        else:
            return len(self.valid_patients)
    
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
            
        patient = self.valid_patients[patient_idx]
        
        # Get random image
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        
        # Load and preprocess image
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        
        # Get tabular features
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        
        # Get target (clipped to reasonable range)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        
        return img_tensor, tab_features, target, patient
    
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            
            img = cv2.resize(img, (384, 384))
            
            # Normalize
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img.astype(np.uint8))
            
            # Convert to 3-channel
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            
            return img
            
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384, 384, 3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
        
    def uncertainty_loss(self, mean_pred, log_var, targets):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        return 0.5 * (mse_loss / var + log_var).mean()
    
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                
                # Combined loss
                mse_loss = F.mse_loss(mean_pred, targets)
                uncertainty_loss = self.uncertainty_loss(mean_pred, log_var, targets)
                
                # Start with more MSE focus, transition to uncertainty
                if epoch < 20:
                    loss = 0.7 * mse_loss + 0.3 * uncertainty_loss
                else:
                    loss = 0.3 * mse_loss + 0.7 * uncertainty_loss
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                
                train_loss += loss.item()
                train_batches += 1
            
            # Validation - FIXED: Handle scalar predictions properly
            self.model.eval()
            val_predictions, val_targets, val_log_vars = [], [], []
            
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    
                    # Convert to numpy properly (handle both scalar and tensor cases)
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    
                    # Ensure we have arrays, not scalars
                    if mean_pred_np.ndim == 0:  # scalar
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:  # array
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                
                # Calculate metrics
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                
                avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
                current_lr = optimizer.param_groups[0]['lr']
                
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}, Loss={avg_train_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, LLL={avg_lll:.4f}")
                
                # Update scheduler
                scheduler.step(r2)
                
                # Save best model
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_MAE_1_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
                print("-" * 50)
        
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    
    # Simple stratified split
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    
    # Get tabular dimension
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    
    # Data loaders - ensure batch size > 1 to avoid scalar issues
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    
    # Initialize model
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test forward pass
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        
        print(f"‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    
    # Train model
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    
    print(f"\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    
    return best_r2, best_mae, best_lll

if __name__ == "__main__":
    final_r2, final_mae, final_lll = optimized_main()

üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating optimized linear decay coefficients...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1083.03it/s]

Processed 176 patients with optimized features
Target statistics: mean=-4.8107, std=6.7150
Target range: [-39.0741, 11.1389]
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth


Dataset val: 25 patients with images


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30.8M/30.8M [00:00<00:00, 169MB/s]


üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.10 GB
Epoch 1: LR=1.00e-04, Loss=44.5500
          R¬≤=-0.0548, MAE=4.5869, LLL=-5.1727
üéØ NEW BEST! R¬≤: -0.0548
--------------------------------------------------
Epoch 2: LR=1.00e-04, Loss=31.4685
          R¬≤=0.0800, MAE=4.3757, LLL=-4.8889
üéØ NEW BEST! R¬≤: 0.0800
--------------------------------------------------
Epoch 3: LR=1.00e-04, Loss=30.7683
          R¬≤=0.0031, MAE=4.6065, LLL=-4.9003
--------------------------------------------------
Epoch 4: LR=1.00e-04, Loss=29.8601
          R¬≤=-0.0725, MAE=4.7057, LLL=-5.0710
--------------------------------------------------
Epoch 5: LR=1.00e-04, Loss=28.4130
          R¬≤=-0.0204, MAE=4.5591, LLL=-4.8947
--------------------------------------------------
Epoch 6: LR=1.00e-04, Loss=26.6444
          R¬≤=0.0027, MAE=4.5835, LLL=-4.8495
--------------------------------------------

In [2]:
# R^2 NOT ABLE TO PUSH MORE THAN THIS 

In [3]:
# LAST TRY WITH LLL AS THE MAIN METRIC

In [4]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ Optimized OSIC Model - LLL as Main Loss")
print("="*60)
print(f"üì± Device: {DEVICE}")

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    vector = []
    age = df_row['Age']
    vector.extend([
        (age - 50)/30,
        age / 100,
    ])
    vector.append(1.0 if df_row['Sex']=='Male' else 0.0)
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1,0,0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0,1,0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0,0,1])
    else:
        vector.extend([0,0,0])
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,
            (fvc - 2500)/1000,
        ])
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112*age) if age>0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101*age) if age>0 else 0.8
        vector.append(min(pp_fvc, 2.0))
    return np.array(vector)

def calculate_lll_loss(mean_pred, log_var, targets):
    # Numerically stable programmatic LLL negative for loss minimization
    var = torch.exp(log_var)
    delta = torch.abs(mean_pred - targets)
    lll = - ( - torch.sqrt(torch.tensor(2.0)) * delta / (var.sqrt() + 1e-6) - torch.log(var.sqrt() * torch.sqrt(torch.tensor(2.0))) )
    return lll.mean()

def calculate_lll(actual, predicted, sigma):
    sigma = np.maximum(sigma, 1e-6)
    delta = np.abs(actual - predicted)
    return -np.sqrt(2)*delta/sigma - np.log(sigma*np.sqrt(2))

A = {}
TAB = {}
P = []

print("Calculating decays ...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient']==patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    if len(weeks) >=2:
        try:
            if len(weeks)==2:
                slope = (fvc[1]-fvc[0])/(weeks[1]-weeks[0])
            else:
                slopes=[]
                for i in range(len(weeks)):
                    for j in range(i+1,len(weeks)):
                        if weeks[j]!=weeks[i]:
                            slopes.append((fvc[j]-fvc[i])/(weeks[j]-weeks[i]))
                slope = np.median(slopes) if slopes else 0.0
            A[patient] = slope
        except:
            A[patient]=0.0
    else:
        A[patient]=0.0
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients.")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10,p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0,20.0), p=0.3),
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super().__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        for i,param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256,128), nn.ReLU(),
            nn.Linear(128,64), nn.ReLU(),
            nn.Linear(64,1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256,32), nn.ReLU(),
            nn.Linear(32,1), nn.Tanh()
        )
        self._initialize_weights()
    def _initialize_weights(self):
        for m in [self.mean_head,self.log_var_head]:
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias,0.0)
    def forward(self, images, tabular):
        b = images.size(0)
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(b,-1)
        tab_features = self.tabular_processor(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        fused = self.fusion_layer(combined)
        mean_pred = self.mean_head(fused)
        log_var = self.log_var_head(fused)
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184','ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower()=='.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split=='train':
            return len(self.valid_patients)*8
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split=='train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape)==3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img,(384,384))
            img_min,img_max = img.min(), img.max()
            if img_max>img_min:
                img = (img-img_min)/(img_max-img_min)*255
            else:
                img = np.zeros_like(img)
            clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))
            img = np.stack([img,img,img],axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384,384,3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        patience_counter = 0
        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                # Use negative LLL as loss
                var = torch.exp(log_var)
                delta = torch.abs(mean_pred - targets)
                # Calculate negative log likelihood loss (Laplace)
                loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                loss = loss.mean()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                train_loss += loss.item()
                train_batches += 1
            avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
            self.model.eval()
            val_loss_sum = 0.0
            val_batches = 0
            val_predictions, val_targets, val_log_vars = [], [], []
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    var = torch.exp(log_var)
                    delta = torch.abs(mean_pred - targets)
                    val_loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                    val_loss = val_loss.mean()
                    val_loss_sum += val_loss.item()
                    val_batches += 1
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    if mean_pred_np.ndim == 0:
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            avg_val_loss = val_loss_sum / val_batches if val_batches > 0 else 0
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                rmse = np.sqrt(np.mean((val_pred_np - val_target_np) ** 2))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                current_lr = optimizer.param_groups[0]['lr']
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}")
                print(f"          Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, LLL={avg_lll:.4f}")
                scheduler.step(r2)
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_LLL_1_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                print("-"*50)
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print("‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    print("\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    return best_r2, best_mae, best_lll

if __name__=="__main__":
    final_r2, final_mae, final_lll = optimized_main()


üöÄ Optimized OSIC Model - LLL as Main Loss
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating decays ...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1185.09it/s]

Processed 176 patients.
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04
          Train Loss=5.9234, Val Loss=5.0917
          R¬≤=-0.1123, MAE=4.7440, RMSE=5.7397, LLL=-5.1170
üéØ NEW BEST! R¬≤: -0.1123
--------------------------------------------------
Epoch 2: LR=1.00e-04
          Train Loss=4.6866, Val Loss=4.4823
          R¬≤=0.2448, MAE=3.9316, RMSE=4.7294, LLL=-4.3417
üéØ NEW BEST! R¬≤: 0.2448
--------------------------------------------------
Epoch 3: LR=1.00e-04
          Train Loss=4.5315, Val Loss=4.3381
          R¬≤=-0.0834, MAE=4.6140, RMSE=5.6645, LLL=-4.9184
--------------------------------------------------
Epoch 4: LR=1.00e-04
          Train Loss=4.4311, Val Loss=5.8354
          R¬≤=-0.1471, MAE=4.9009, RMSE=5.8286, LLL=-5.0718
--------------------------------------------------
Epoc

In [5]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5")
print("=" * 60)
print(f"üì± Device: {DEVICE}")

# Load Data
train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    """Optimized tabular features - simpler but more effective"""
    vector = []
    
    # Basic but effective features
    age = df_row['Age']
    vector.extend([
        (age - 50) / 30,  # Centered age
        age / 100,  # Scaled age
    ])
    
    # Simple sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(1.0)
    else:
        vector.append(0.0)
    
    # Simple smoking status
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1, 0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0, 1, 0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 0, 1])
    else:
        vector.extend([0, 0, 0])
    
    # FVC features
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,  # Normalized FVC
            (fvc - 2500) / 1000,  # Centered FVC
        ])
    
    # Percent predicted (approximate)
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        
        # Approximate percent predicted FVC
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112 * age) if age > 0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101 * age) if age > 0 else 0.8
            
        vector.append(min(pp_fvc, 2.0))  # Cap at 200%
    
    return np.array(vector)

def calculate_lll(actual, predicted, sigma):
    """Calculate Log Laplace Likelihood"""
    sigma = np.maximum(sigma, 1e-6)  # Avoid division by zero
    delta = np.abs(actual - predicted)
    return -np.sqrt(2) * delta / sigma - np.log(sigma * np.sqrt(2))

# Improved coefficient calculation
A = {} 
TAB = {} 
P = []

print("Calculating optimized linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) >= 2:
        try:
            # Simple robust slope calculation
            if len(weeks) == 2:
                slope = (fvc[1] - fvc[0]) / (weeks[1] - weeks[0])
            else:
                # Use Theil-Sen estimator for robustness
                slopes = []
                for i in range(len(weeks)):
                    for j in range(i+1, len(weeks)):
                        if weeks[j] != weeks[i]:
                            slope = (fvc[j] - fvc[i]) / (weeks[j] - weeks[i])
                            slopes.append(slope)
                slope = np.median(slopes) if slopes else 0.0
            
            A[patient] = slope
        except:
            A[patient] = 0.0
    else:
        A[patient] = 0.0
    
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients with optimized features")

# Analyze target distribution
decay_values = np.array(list(A.values()))
print(f"Target statistics: mean={decay_values.mean():.4f}, std={decay_values.std():.4f}")
print(f"Target range: [{decay_values.min():.4f}, {decay_values.max():.4f}]")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10, p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
    
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super(OptimizedDenseNetModel, self).__init__()
        
        # DenseNet121 backbone
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        
        # Freeze early layers, unfreeze later layers
        for i, param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100  # Only unfreeze later layers
        
        # Global pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Simple but effective tabular processor
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Feature fusion
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Output heads
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Tanh()  # Constrain output
        )
        
        # Initialize output layers for better convergence
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in [self.mean_head, self.log_var_head]:
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
    
    def forward(self, images, tabular):
        batch_size = images.size(0)
        
        # Extract image features
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(batch_size, -1)
        
        # Process tabular data
        tab_features = self.tabular_processor(tabular)
        
        # Feature fusion
        combined_features = torch.cat([img_features, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        
        # Predict mean and log variance
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        
        # Prepare image paths
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 8
        else:
            return len(self.valid_patients)
    
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
            
        patient = self.valid_patients[patient_idx]
        
        # Get random image
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        
        # Load and preprocess image
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        
        # Get tabular features
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        
        # Get target (clipped to reasonable range)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        
        return img_tensor, tab_features, target, patient
    
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            
            img = cv2.resize(img, (384, 384))
            
            # Normalize
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img.astype(np.uint8))
            
            # Convert to 3-channel
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            
            return img
            
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384, 384, 3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
        
    def uncertainty_loss(self, mean_pred, log_var, targets):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        return 0.5 * (mse_loss / var + log_var).mean()
    
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                
                # Combined loss
                mse_loss = F.mse_loss(mean_pred, targets)
                uncertainty_loss = self.uncertainty_loss(mean_pred, log_var, targets)
                
                # Start with more MSE focus, transition to uncertainty
                if epoch < 20:
                    loss = 0.7 * mse_loss + 0.3 * uncertainty_loss
                else:
                    loss = 0.3 * mse_loss + 0.7 * uncertainty_loss
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                
                train_loss += loss.item()
                train_batches += 1
            
            # Validation - FIXED: Handle scalar predictions properly
            self.model.eval()
            val_predictions, val_targets, val_log_vars = [], [], []
            
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    
                    # Convert to numpy properly (handle both scalar and tensor cases)
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    
                    # Ensure we have arrays, not scalars
                    if mean_pred_np.ndim == 0:  # scalar
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:  # array
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                
                # Calculate metrics
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                
                avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
                current_lr = optimizer.param_groups[0]['lr']
                
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}, Loss={avg_train_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, LLL={avg_lll:.4f}")
                
                # Update scheduler
                scheduler.step(r2)
                
                # Save best model
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_MAE_2_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
                print("-" * 50)
        
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    
    # Simple stratified split
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    
    # Get tabular dimension
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    
    # Data loaders - ensure batch size > 1 to avoid scalar issues
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    
    # Initialize model
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test forward pass
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        
        print(f"‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    
    # Train model
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    
    print(f"\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    
    return best_r2, best_mae, best_lll

if __name__ == "__main__":
    final_r2, final_mae, final_lll = optimized_main()

üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating optimized linear decay coefficients...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1082.62it/s]

Processed 176 patients with optimized features
Target statistics: mean=-4.8107, std=6.7150
Target range: [-39.0741, 11.1389]
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04, Loss=44.3931
          R¬≤=0.0366, MAE=4.6583, LLL=-5.0808
üéØ NEW BEST! R¬≤: 0.0366
--------------------------------------------------
Epoch 2: LR=1.00e-04, Loss=30.2252
          R¬≤=0.1124, MAE=4.0819, LLL=-4.5801
üéØ NEW BEST! R¬≤: 0.1124
--------------------------------------------------
Epoch 3: LR=1.00e-04, Loss=29.9113
          R¬≤=0.0393, MAE=4.6941, LLL=-5.1883
--------------------------------------------------
Epoch 4: LR=1.00e-04, Loss=27.7596
          R¬≤=-0.2982, MAE=5.3590, LLL=-5.8118
--------------------------------------------------
Epoch 5: LR=1.00e-04, Loss=28.1790
          R¬≤=0.1421, MAE=4.3691, LLL=-4.7704
üéØ NEW BEST! R¬≤: 0.1421
--------------------------------------------------
Epoch 6: LR=1.00e-04, Lo

In [6]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ Optimized OSIC Model - LLL as Main Loss")
print("="*60)
print(f"üì± Device: {DEVICE}")

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    vector = []
    age = df_row['Age']
    vector.extend([
        (age - 50)/30,
        age / 100,
    ])
    vector.append(1.0 if df_row['Sex']=='Male' else 0.0)
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1,0,0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0,1,0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0,0,1])
    else:
        vector.extend([0,0,0])
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,
            (fvc - 2500)/1000,
        ])
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112*age) if age>0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101*age) if age>0 else 0.8
        vector.append(min(pp_fvc, 2.0))
    return np.array(vector)

def calculate_lll_loss(mean_pred, log_var, targets):
    # Numerically stable programmatic LLL negative for loss minimization
    var = torch.exp(log_var)
    delta = torch.abs(mean_pred - targets)
    lll = - ( - torch.sqrt(torch.tensor(2.0)) * delta / (var.sqrt() + 1e-6) - torch.log(var.sqrt() * torch.sqrt(torch.tensor(2.0))) )
    return lll.mean()

def calculate_lll(actual, predicted, sigma):
    sigma = np.maximum(sigma, 1e-6)
    delta = np.abs(actual - predicted)
    return -np.sqrt(2)*delta/sigma - np.log(sigma*np.sqrt(2))

A = {}
TAB = {}
P = []

print("Calculating decays ...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient']==patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    if len(weeks) >=2:
        try:
            if len(weeks)==2:
                slope = (fvc[1]-fvc[0])/(weeks[1]-weeks[0])
            else:
                slopes=[]
                for i in range(len(weeks)):
                    for j in range(i+1,len(weeks)):
                        if weeks[j]!=weeks[i]:
                            slopes.append((fvc[j]-fvc[i])/(weeks[j]-weeks[i]))
                slope = np.median(slopes) if slopes else 0.0
            A[patient] = slope
        except:
            A[patient]=0.0
    else:
        A[patient]=0.0
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients.")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10,p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0,20.0), p=0.3),
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super().__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        for i,param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256,128), nn.ReLU(),
            nn.Linear(128,64), nn.ReLU(),
            nn.Linear(64,1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256,32), nn.ReLU(),
            nn.Linear(32,1), nn.Tanh()
        )
        self._initialize_weights()
    def _initialize_weights(self):
        for m in [self.mean_head,self.log_var_head]:
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias,0.0)
    def forward(self, images, tabular):
        b = images.size(0)
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(b,-1)
        tab_features = self.tabular_processor(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        fused = self.fusion_layer(combined)
        mean_pred = self.mean_head(fused)
        log_var = self.log_var_head(fused)
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184','ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower()=='.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split=='train':
            return len(self.valid_patients)*8
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split=='train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape)==3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img,(384,384))
            img_min,img_max = img.min(), img.max()
            if img_max>img_min:
                img = (img-img_min)/(img_max-img_min)*255
            else:
                img = np.zeros_like(img)
            clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))
            img = np.stack([img,img,img],axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384,384,3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        patience_counter = 0
        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                # Use negative LLL as loss
                var = torch.exp(log_var)
                delta = torch.abs(mean_pred - targets)
                # Calculate negative log likelihood loss (Laplace)
                loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                loss = loss.mean()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                train_loss += loss.item()
                train_batches += 1
            avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
            self.model.eval()
            val_loss_sum = 0.0
            val_batches = 0
            val_predictions, val_targets, val_log_vars = [], [], []
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    var = torch.exp(log_var)
                    delta = torch.abs(mean_pred - targets)
                    val_loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                    val_loss = val_loss.mean()
                    val_loss_sum += val_loss.item()
                    val_batches += 1
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    if mean_pred_np.ndim == 0:
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            avg_val_loss = val_loss_sum / val_batches if val_batches > 0 else 0
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                rmse = np.sqrt(np.mean((val_pred_np - val_target_np) ** 2))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                current_lr = optimizer.param_groups[0]['lr']
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}")
                print(f"          Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, LLL={avg_lll:.4f}")
                scheduler.step(r2)
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_LLL_2_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                print("-"*50)
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print("‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    print("\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    return best_r2, best_mae, best_lll

if __name__=="__main__":
    final_r2, final_mae, final_lll = optimized_main()


üöÄ Optimized OSIC Model - LLL as Main Loss
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating decays ...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1160.05it/s]

Processed 176 patients.
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04
          Train Loss=5.8574, Val Loss=5.5960
          R¬≤=-0.2365, MAE=5.0468, RMSE=6.0516, LLL=-5.4790
üéØ NEW BEST! R¬≤: -0.2365
--------------------------------------------------
Epoch 2: LR=1.00e-04
          Train Loss=4.6673, Val Loss=6.0457
          R¬≤=-0.1616, MAE=4.8955, RMSE=5.8654, LLL=-5.1542
üéØ NEW BEST! R¬≤: -0.1616
--------------------------------------------------
Epoch 3: LR=1.00e-04
          Train Loss=4.5751, Val Loss=4.6113
          R¬≤=0.0155, MAE=4.6390, RMSE=5.3997, LLL=-4.8801
üéØ NEW BEST! R¬≤: 0.0155
--------------------------------------------------
Epoch 4: LR=1.00e-04
          Train Loss=4.3810, Val Loss=3.8305
          R¬≤=0.2895, MAE=3.8107, RMSE=4.5872, LLL=-4.1558
üéØ NEW BEST! R¬≤: 0.2895
-

In [7]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5")
print("=" * 60)
print(f"üì± Device: {DEVICE}")

# Load Data
train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    """Optimized tabular features - simpler but more effective"""
    vector = []
    
    # Basic but effective features
    age = df_row['Age']
    vector.extend([
        (age - 50) / 30,  # Centered age
        age / 100,  # Scaled age
    ])
    
    # Simple sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(1.0)
    else:
        vector.append(0.0)
    
    # Simple smoking status
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1, 0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0, 1, 0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 0, 1])
    else:
        vector.extend([0, 0, 0])
    
    # FVC features
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,  # Normalized FVC
            (fvc - 2500) / 1000,  # Centered FVC
        ])
    
    # Percent predicted (approximate)
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        
        # Approximate percent predicted FVC
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112 * age) if age > 0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101 * age) if age > 0 else 0.8
            
        vector.append(min(pp_fvc, 2.0))  # Cap at 200%
    
    return np.array(vector)

def calculate_lll(actual, predicted, sigma):
    """Calculate Log Laplace Likelihood"""
    sigma = np.maximum(sigma, 1e-6)  # Avoid division by zero
    delta = np.abs(actual - predicted)
    return -np.sqrt(2) * delta / sigma - np.log(sigma * np.sqrt(2))

# Improved coefficient calculation
A = {} 
TAB = {} 
P = []

print("Calculating optimized linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) >= 2:
        try:
            # Simple robust slope calculation
            if len(weeks) == 2:
                slope = (fvc[1] - fvc[0]) / (weeks[1] - weeks[0])
            else:
                # Use Theil-Sen estimator for robustness
                slopes = []
                for i in range(len(weeks)):
                    for j in range(i+1, len(weeks)):
                        if weeks[j] != weeks[i]:
                            slope = (fvc[j] - fvc[i]) / (weeks[j] - weeks[i])
                            slopes.append(slope)
                slope = np.median(slopes) if slopes else 0.0
            
            A[patient] = slope
        except:
            A[patient] = 0.0
    else:
        A[patient] = 0.0
    
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients with optimized features")

# Analyze target distribution
decay_values = np.array(list(A.values()))
print(f"Target statistics: mean={decay_values.mean():.4f}, std={decay_values.std():.4f}")
print(f"Target range: [{decay_values.min():.4f}, {decay_values.max():.4f}]")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10, p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
    
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super(OptimizedDenseNetModel, self).__init__()
        
        # DenseNet121 backbone
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        
        # Freeze early layers, unfreeze later layers
        for i, param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100  # Only unfreeze later layers
        
        # Global pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Simple but effective tabular processor
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Feature fusion
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Output heads
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Tanh()  # Constrain output
        )
        
        # Initialize output layers for better convergence
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in [self.mean_head, self.log_var_head]:
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
    
    def forward(self, images, tabular):
        batch_size = images.size(0)
        
        # Extract image features
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(batch_size, -1)
        
        # Process tabular data
        tab_features = self.tabular_processor(tabular)
        
        # Feature fusion
        combined_features = torch.cat([img_features, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        
        # Predict mean and log variance
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        
        # Prepare image paths
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 8
        else:
            return len(self.valid_patients)
    
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
            
        patient = self.valid_patients[patient_idx]
        
        # Get random image
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        
        # Load and preprocess image
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        
        # Get tabular features
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        
        # Get target (clipped to reasonable range)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        
        return img_tensor, tab_features, target, patient
    
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            
            img = cv2.resize(img, (384, 384))
            
            # Normalize
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img.astype(np.uint8))
            
            # Convert to 3-channel
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            
            return img
            
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384, 384, 3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
        
    def uncertainty_loss(self, mean_pred, log_var, targets):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        return 0.5 * (mse_loss / var + log_var).mean()
    
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                
                # Combined loss
                mse_loss = F.mse_loss(mean_pred, targets)
                uncertainty_loss = self.uncertainty_loss(mean_pred, log_var, targets)
                
                # Start with more MSE focus, transition to uncertainty
                if epoch < 20:
                    loss = 0.7 * mse_loss + 0.3 * uncertainty_loss
                else:
                    loss = 0.3 * mse_loss + 0.7 * uncertainty_loss
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                
                train_loss += loss.item()
                train_batches += 1
            
            # Validation - FIXED: Handle scalar predictions properly
            self.model.eval()
            val_predictions, val_targets, val_log_vars = [], [], []
            
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    
                    # Convert to numpy properly (handle both scalar and tensor cases)
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    
                    # Ensure we have arrays, not scalars
                    if mean_pred_np.ndim == 0:  # scalar
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:  # array
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                
                # Calculate metrics
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                
                avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
                current_lr = optimizer.param_groups[0]['lr']
                
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}, Loss={avg_train_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, LLL={avg_lll:.4f}")
                
                # Update scheduler
                scheduler.step(r2)
                
                # Save best model
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_MAE_3_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
                print("-" * 50)
        
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    
    # Simple stratified split
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    
    # Get tabular dimension
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    
    # Data loaders - ensure batch size > 1 to avoid scalar issues
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    
    # Initialize model
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test forward pass
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        
        print(f"‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    
    # Train model
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    
    print(f"\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    
    return best_r2, best_mae, best_lll

if __name__ == "__main__":
    final_r2, final_mae, final_lll = optimized_main()

üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating optimized linear decay coefficients...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1184.58it/s]

Processed 176 patients with optimized features
Target statistics: mean=-4.8107, std=6.7150
Target range: [-39.0741, 11.1389]
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04, Loss=44.7973
          R¬≤=-0.0271, MAE=4.6716, LLL=-5.3058
üéØ NEW BEST! R¬≤: -0.0271
--------------------------------------------------
Epoch 2: LR=1.00e-04, Loss=31.3869
          R¬≤=0.2216, MAE=3.9747, LLL=-4.6449
üéØ NEW BEST! R¬≤: 0.2216
--------------------------------------------------
Epoch 3: LR=1.00e-04, Loss=28.8738
          R¬≤=0.1079, MAE=4.4628, LLL=-4.9425
--------------------------------------------------
Epoch 4: LR=1.00e-04, Loss=28.4719
          R¬≤=-0.0012, MAE=4.7483, LLL=-5.2659
--------------------------------------------------
Epoch 5: LR=1.00e-04, Loss=25.8545
          R¬≤=0.2243, MAE=4.1031, LLL=-4.5335
üéØ NEW BEST! R¬≤: 0.2243
--------------------------------------------------
Epoch 6: LR=1.00e-04, 

In [8]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ Optimized OSIC Model - LLL as Main Loss")
print("="*60)
print(f"üì± Device: {DEVICE}")

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    vector = []
    age = df_row['Age']
    vector.extend([
        (age - 50)/30,
        age / 100,
    ])
    vector.append(1.0 if df_row['Sex']=='Male' else 0.0)
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1,0,0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0,1,0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0,0,1])
    else:
        vector.extend([0,0,0])
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,
            (fvc - 2500)/1000,
        ])
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112*age) if age>0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101*age) if age>0 else 0.8
        vector.append(min(pp_fvc, 2.0))
    return np.array(vector)

def calculate_lll_loss(mean_pred, log_var, targets):
    # Numerically stable programmatic LLL negative for loss minimization
    var = torch.exp(log_var)
    delta = torch.abs(mean_pred - targets)
    lll = - ( - torch.sqrt(torch.tensor(2.0)) * delta / (var.sqrt() + 1e-6) - torch.log(var.sqrt() * torch.sqrt(torch.tensor(2.0))) )
    return lll.mean()

def calculate_lll(actual, predicted, sigma):
    sigma = np.maximum(sigma, 1e-6)
    delta = np.abs(actual - predicted)
    return -np.sqrt(2)*delta/sigma - np.log(sigma*np.sqrt(2))

A = {}
TAB = {}
P = []

print("Calculating decays ...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient']==patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    if len(weeks) >=2:
        try:
            if len(weeks)==2:
                slope = (fvc[1]-fvc[0])/(weeks[1]-weeks[0])
            else:
                slopes=[]
                for i in range(len(weeks)):
                    for j in range(i+1,len(weeks)):
                        if weeks[j]!=weeks[i]:
                            slopes.append((fvc[j]-fvc[i])/(weeks[j]-weeks[i]))
                slope = np.median(slopes) if slopes else 0.0
            A[patient] = slope
        except:
            A[patient]=0.0
    else:
        A[patient]=0.0
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients.")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10,p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0,20.0), p=0.3),
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super().__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        for i,param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256,128), nn.ReLU(),
            nn.Linear(128,64), nn.ReLU(),
            nn.Linear(64,1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256,32), nn.ReLU(),
            nn.Linear(32,1), nn.Tanh()
        )
        self._initialize_weights()
    def _initialize_weights(self):
        for m in [self.mean_head,self.log_var_head]:
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias,0.0)
    def forward(self, images, tabular):
        b = images.size(0)
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(b,-1)
        tab_features = self.tabular_processor(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        fused = self.fusion_layer(combined)
        mean_pred = self.mean_head(fused)
        log_var = self.log_var_head(fused)
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184','ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower()=='.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split=='train':
            return len(self.valid_patients)*8
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split=='train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape)==3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img,(384,384))
            img_min,img_max = img.min(), img.max()
            if img_max>img_min:
                img = (img-img_min)/(img_max-img_min)*255
            else:
                img = np.zeros_like(img)
            clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))
            img = np.stack([img,img,img],axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384,384,3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        patience_counter = 0
        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                # Use negative LLL as loss
                var = torch.exp(log_var)
                delta = torch.abs(mean_pred - targets)
                # Calculate negative log likelihood loss (Laplace)
                loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                loss = loss.mean()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                train_loss += loss.item()
                train_batches += 1
            avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
            self.model.eval()
            val_loss_sum = 0.0
            val_batches = 0
            val_predictions, val_targets, val_log_vars = [], [], []
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    var = torch.exp(log_var)
                    delta = torch.abs(mean_pred - targets)
                    val_loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                    val_loss = val_loss.mean()
                    val_loss_sum += val_loss.item()
                    val_batches += 1
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    if mean_pred_np.ndim == 0:
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            avg_val_loss = val_loss_sum / val_batches if val_batches > 0 else 0
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                rmse = np.sqrt(np.mean((val_pred_np - val_target_np) ** 2))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                current_lr = optimizer.param_groups[0]['lr']
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}")
                print(f"          Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, LLL={avg_lll:.4f}")
                scheduler.step(r2)
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_LLL_3_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                print("-"*50)
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print("‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    print("\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    return best_r2, best_mae, best_lll

if __name__=="__main__":
    final_r2, final_mae, final_lll = optimized_main()


üöÄ Optimized OSIC Model - LLL as Main Loss
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating decays ...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1145.06it/s]

Processed 176 patients.
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.14 GB
Epoch 1: LR=1.00e-04
          Train Loss=5.8157, Val Loss=4.9924
          R¬≤=-0.1428, MAE=4.7830, RMSE=5.8177, LLL=-5.1054
üéØ NEW BEST! R¬≤: -0.1428
--------------------------------------------------
Epoch 2: LR=1.00e-04
          Train Loss=4.6919, Val Loss=4.8649
          R¬≤=0.0731, MAE=4.3889, RMSE=5.2395, LLL=-4.6991
üéØ NEW BEST! R¬≤: 0.0731
--------------------------------------------------
Epoch 3: LR=1.00e-04
          Train Loss=4.5497, Val Loss=4.6014
          R¬≤=0.0994, MAE=4.4487, RMSE=5.1647, LLL=-4.7165
üéØ NEW BEST! R¬≤: 0.0994
--------------------------------------------------
Epoch 4: LR=1.00e-04
          Train Loss=4.4777, Val Loss=4.3432
          R¬≤=0.1981, MAE=4.1655, RMSE=4.8735, LLL=-4.4575
üéØ NEW BEST! R¬≤: 0.1981
---

In [9]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5")
print("=" * 60)
print(f"üì± Device: {DEVICE}")

# Load Data
train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    """Optimized tabular features - simpler but more effective"""
    vector = []
    
    # Basic but effective features
    age = df_row['Age']
    vector.extend([
        (age - 50) / 30,  # Centered age
        age / 100,  # Scaled age
    ])
    
    # Simple sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(1.0)
    else:
        vector.append(0.0)
    
    # Simple smoking status
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1, 0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0, 1, 0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 0, 1])
    else:
        vector.extend([0, 0, 0])
    
    # FVC features
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,  # Normalized FVC
            (fvc - 2500) / 1000,  # Centered FVC
        ])
    
    # Percent predicted (approximate)
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        
        # Approximate percent predicted FVC
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112 * age) if age > 0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101 * age) if age > 0 else 0.8
            
        vector.append(min(pp_fvc, 2.0))  # Cap at 200%
    
    return np.array(vector)

def calculate_lll(actual, predicted, sigma):
    """Calculate Log Laplace Likelihood"""
    sigma = np.maximum(sigma, 1e-6)  # Avoid division by zero
    delta = np.abs(actual - predicted)
    return -np.sqrt(2) * delta / sigma - np.log(sigma * np.sqrt(2))

# Improved coefficient calculation
A = {} 
TAB = {} 
P = []

print("Calculating optimized linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) >= 2:
        try:
            # Simple robust slope calculation
            if len(weeks) == 2:
                slope = (fvc[1] - fvc[0]) / (weeks[1] - weeks[0])
            else:
                # Use Theil-Sen estimator for robustness
                slopes = []
                for i in range(len(weeks)):
                    for j in range(i+1, len(weeks)):
                        if weeks[j] != weeks[i]:
                            slope = (fvc[j] - fvc[i]) / (weeks[j] - weeks[i])
                            slopes.append(slope)
                slope = np.median(slopes) if slopes else 0.0
            
            A[patient] = slope
        except:
            A[patient] = 0.0
    else:
        A[patient] = 0.0
    
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients with optimized features")

# Analyze target distribution
decay_values = np.array(list(A.values()))
print(f"Target statistics: mean={decay_values.mean():.4f}, std={decay_values.std():.4f}")
print(f"Target range: [{decay_values.min():.4f}, {decay_values.max():.4f}]")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10, p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
    
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super(OptimizedDenseNetModel, self).__init__()
        
        # DenseNet121 backbone
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        
        # Freeze early layers, unfreeze later layers
        for i, param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100  # Only unfreeze later layers
        
        # Global pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Simple but effective tabular processor
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Feature fusion
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Output heads
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Tanh()  # Constrain output
        )
        
        # Initialize output layers for better convergence
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in [self.mean_head, self.log_var_head]:
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
    
    def forward(self, images, tabular):
        batch_size = images.size(0)
        
        # Extract image features
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(batch_size, -1)
        
        # Process tabular data
        tab_features = self.tabular_processor(tabular)
        
        # Feature fusion
        combined_features = torch.cat([img_features, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        
        # Predict mean and log variance
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        
        # Prepare image paths
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 8
        else:
            return len(self.valid_patients)
    
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
            
        patient = self.valid_patients[patient_idx]
        
        # Get random image
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        
        # Load and preprocess image
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        
        # Get tabular features
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        
        # Get target (clipped to reasonable range)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        
        return img_tensor, tab_features, target, patient
    
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            
            img = cv2.resize(img, (384, 384))
            
            # Normalize
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img.astype(np.uint8))
            
            # Convert to 3-channel
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            
            return img
            
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384, 384, 3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
        
    def uncertainty_loss(self, mean_pred, log_var, targets):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        return 0.5 * (mse_loss / var + log_var).mean()
    
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                
                # Combined loss
                mse_loss = F.mse_loss(mean_pred, targets)
                uncertainty_loss = self.uncertainty_loss(mean_pred, log_var, targets)
                
                # Start with more MSE focus, transition to uncertainty
                if epoch < 20:
                    loss = 0.7 * mse_loss + 0.3 * uncertainty_loss
                else:
                    loss = 0.3 * mse_loss + 0.7 * uncertainty_loss
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                
                train_loss += loss.item()
                train_batches += 1
            
            # Validation - FIXED: Handle scalar predictions properly
            self.model.eval()
            val_predictions, val_targets, val_log_vars = [], [], []
            
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    
                    # Convert to numpy properly (handle both scalar and tensor cases)
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    
                    # Ensure we have arrays, not scalars
                    if mean_pred_np.ndim == 0:  # scalar
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:  # array
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                
                # Calculate metrics
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                
                avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
                current_lr = optimizer.param_groups[0]['lr']
                
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}, Loss={avg_train_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, LLL={avg_lll:.4f}")
                
                # Update scheduler
                scheduler.step(r2)
                
                # Save best model
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_MAE_4_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
                print("-" * 50)
        
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    
    # Simple stratified split
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    
    # Get tabular dimension
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    
    # Data loaders - ensure batch size > 1 to avoid scalar issues
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    
    # Initialize model
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test forward pass
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        
        print(f"‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    
    # Train model
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    
    print(f"\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    
    return best_r2, best_mae, best_lll

if __name__ == "__main__":
    final_r2, final_mae, final_lll = optimized_main()

üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating optimized linear decay coefficients...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1115.01it/s]

Processed 176 patients with optimized features
Target statistics: mean=-4.8107, std=6.7150
Target range: [-39.0741, 11.1389]
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.14 GB
Epoch 1: LR=1.00e-04, Loss=44.2671
          R¬≤=0.1958, MAE=4.0305, LLL=-4.5077
üéØ NEW BEST! R¬≤: 0.1958
--------------------------------------------------
Epoch 2: LR=1.00e-04, Loss=30.8762
          R¬≤=-0.1627, MAE=4.7715, LLL=-5.1491
--------------------------------------------------
Epoch 3: LR=1.00e-04, Loss=30.3178
          R¬≤=0.0661, MAE=4.5636, LLL=-4.9858
--------------------------------------------------
Epoch 4: LR=1.00e-04, Loss=27.2203
          R¬≤=-0.0258, MAE=4.6960, LLL=-5.1812
--------------------------------------------------
Epoch 5: LR=1.00e-04, Loss=25.9201
          R¬≤=0.2225, MAE=4.0389, LLL=-4.5297
üéØ NEW BEST! R¬≤: 0.2225
--------------------------------------------------
Epoch 6: LR=1.00e-04, Loss=24.8922
          R¬≤=0

In [10]:
# LLL
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ Optimized OSIC Model - LLL as Main Loss")
print("="*60)
print(f"üì± Device: {DEVICE}")

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    vector = []
    age = df_row['Age']
    vector.extend([
        (age - 50)/30,
        age / 100,
    ])
    vector.append(1.0 if df_row['Sex']=='Male' else 0.0)
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1,0,0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0,1,0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0,0,1])
    else:
        vector.extend([0,0,0])
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,
            (fvc - 2500)/1000,
        ])
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112*age) if age>0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101*age) if age>0 else 0.8
        vector.append(min(pp_fvc, 2.0))
    return np.array(vector)

def calculate_lll_loss(mean_pred, log_var, targets):
    # Numerically stable programmatic LLL negative for loss minimization
    var = torch.exp(log_var)
    delta = torch.abs(mean_pred - targets)
    lll = - ( - torch.sqrt(torch.tensor(2.0)) * delta / (var.sqrt() + 1e-6) - torch.log(var.sqrt() * torch.sqrt(torch.tensor(2.0))) )
    return lll.mean()

def calculate_lll(actual, predicted, sigma):
    sigma = np.maximum(sigma, 1e-6)
    delta = np.abs(actual - predicted)
    return -np.sqrt(2)*delta/sigma - np.log(sigma*np.sqrt(2))

A = {}
TAB = {}
P = []

print("Calculating decays ...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient']==patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    if len(weeks) >=2:
        try:
            if len(weeks)==2:
                slope = (fvc[1]-fvc[0])/(weeks[1]-weeks[0])
            else:
                slopes=[]
                for i in range(len(weeks)):
                    for j in range(i+1,len(weeks)):
                        if weeks[j]!=weeks[i]:
                            slopes.append((fvc[j]-fvc[i])/(weeks[j]-weeks[i]))
                slope = np.median(slopes) if slopes else 0.0
            A[patient] = slope
        except:
            A[patient]=0.0
    else:
        A[patient]=0.0
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients.")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10,p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0,20.0), p=0.3),
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super().__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        for i,param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256,128), nn.ReLU(),
            nn.Linear(128,64), nn.ReLU(),
            nn.Linear(64,1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256,32), nn.ReLU(),
            nn.Linear(32,1), nn.Tanh()
        )
        self._initialize_weights()
    def _initialize_weights(self):
        for m in [self.mean_head,self.log_var_head]:
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias,0.0)
    def forward(self, images, tabular):
        b = images.size(0)
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(b,-1)
        tab_features = self.tabular_processor(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        fused = self.fusion_layer(combined)
        mean_pred = self.mean_head(fused)
        log_var = self.log_var_head(fused)
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184','ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower()=='.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split=='train':
            return len(self.valid_patients)*8
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split=='train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape)==3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img,(384,384))
            img_min,img_max = img.min(), img.max()
            if img_max>img_min:
                img = (img-img_min)/(img_max-img_min)*255
            else:
                img = np.zeros_like(img)
            clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))
            img = np.stack([img,img,img],axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384,384,3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        patience_counter = 0
        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                # Use negative LLL as loss
                var = torch.exp(log_var)
                delta = torch.abs(mean_pred - targets)
                # Calculate negative log likelihood loss (Laplace)
                loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                loss = loss.mean()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                train_loss += loss.item()
                train_batches += 1
            avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
            self.model.eval()
            val_loss_sum = 0.0
            val_batches = 0
            val_predictions, val_targets, val_log_vars = [], [], []
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    var = torch.exp(log_var)
                    delta = torch.abs(mean_pred - targets)
                    val_loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                    val_loss = val_loss.mean()
                    val_loss_sum += val_loss.item()
                    val_batches += 1
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    if mean_pred_np.ndim == 0:
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            avg_val_loss = val_loss_sum / val_batches if val_batches > 0 else 0
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                rmse = np.sqrt(np.mean((val_pred_np - val_target_np) ** 2))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                current_lr = optimizer.param_groups[0]['lr']
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}")
                print(f"          Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, LLL={avg_lll:.4f}")
                scheduler.step(r2)
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_LLL_4_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                print("-"*50)
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print("‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    print("\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    return best_r2, best_mae, best_lll

if __name__=="__main__":
    final_r2, final_mae, final_lll = optimized_main()


üöÄ Optimized OSIC Model - LLL as Main Loss
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating decays ...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1166.23it/s]

Processed 176 patients.
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04
          Train Loss=5.8504, Val Loss=5.2851
          R¬≤=-0.1432, MAE=4.8567, RMSE=5.8187, LLL=-5.1850
üéØ NEW BEST! R¬≤: -0.1432
--------------------------------------------------
Epoch 2: LR=1.00e-04
          Train Loss=4.6566, Val Loss=4.9946
          R¬≤=0.0021, MAE=4.5669, RMSE=5.4365, LLL=-4.8305
üéØ NEW BEST! R¬≤: 0.0021
--------------------------------------------------
Epoch 3: LR=1.00e-04
          Train Loss=4.5233, Val Loss=4.4612
          R¬≤=0.0331, MAE=4.5442, RMSE=5.3515, LLL=-4.8250
üéØ NEW BEST! R¬≤: 0.0331
--------------------------------------------------
Epoch 4: LR=1.00e-04
          Train Loss=4.4749, Val Loss=4.9620
          R¬≤=-0.0022, MAE=4.7128, RMSE=5.4481, LLL=-4.9588
-----------------------------

In [11]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5")
print("=" * 60)
print(f"üì± Device: {DEVICE}")

# Load Data
train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    """Optimized tabular features - simpler but more effective"""
    vector = []
    
    # Basic but effective features
    age = df_row['Age']
    vector.extend([
        (age - 50) / 30,  # Centered age
        age / 100,  # Scaled age
    ])
    
    # Simple sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(1.0)
    else:
        vector.append(0.0)
    
    # Simple smoking status
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1, 0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0, 1, 0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 0, 1])
    else:
        vector.extend([0, 0, 0])
    
    # FVC features
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,  # Normalized FVC
            (fvc - 2500) / 1000,  # Centered FVC
        ])
    
    # Percent predicted (approximate)
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        
        # Approximate percent predicted FVC
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112 * age) if age > 0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101 * age) if age > 0 else 0.8
            
        vector.append(min(pp_fvc, 2.0))  # Cap at 200%
    
    return np.array(vector)

def calculate_lll(actual, predicted, sigma):
    """Calculate Log Laplace Likelihood"""
    sigma = np.maximum(sigma, 1e-6)  # Avoid division by zero
    delta = np.abs(actual - predicted)
    return -np.sqrt(2) * delta / sigma - np.log(sigma * np.sqrt(2))

# Improved coefficient calculation
A = {} 
TAB = {} 
P = []

print("Calculating optimized linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) >= 2:
        try:
            # Simple robust slope calculation
            if len(weeks) == 2:
                slope = (fvc[1] - fvc[0]) / (weeks[1] - weeks[0])
            else:
                # Use Theil-Sen estimator for robustness
                slopes = []
                for i in range(len(weeks)):
                    for j in range(i+1, len(weeks)):
                        if weeks[j] != weeks[i]:
                            slope = (fvc[j] - fvc[i]) / (weeks[j] - weeks[i])
                            slopes.append(slope)
                slope = np.median(slopes) if slopes else 0.0
            
            A[patient] = slope
        except:
            A[patient] = 0.0
    else:
        A[patient] = 0.0
    
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients with optimized features")

# Analyze target distribution
decay_values = np.array(list(A.values()))
print(f"Target statistics: mean={decay_values.mean():.4f}, std={decay_values.std():.4f}")
print(f"Target range: [{decay_values.min():.4f}, {decay_values.max():.4f}]")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10, p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
    
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super(OptimizedDenseNetModel, self).__init__()
        
        # DenseNet121 backbone
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        
        # Freeze early layers, unfreeze later layers
        for i, param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100  # Only unfreeze later layers
        
        # Global pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Simple but effective tabular processor
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Feature fusion
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Output heads
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Tanh()  # Constrain output
        )
        
        # Initialize output layers for better convergence
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in [self.mean_head, self.log_var_head]:
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
    
    def forward(self, images, tabular):
        batch_size = images.size(0)
        
        # Extract image features
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(batch_size, -1)
        
        # Process tabular data
        tab_features = self.tabular_processor(tabular)
        
        # Feature fusion
        combined_features = torch.cat([img_features, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        
        # Predict mean and log variance
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        
        # Prepare image paths
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 8
        else:
            return len(self.valid_patients)
    
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
            
        patient = self.valid_patients[patient_idx]
        
        # Get random image
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        
        # Load and preprocess image
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        
        # Get tabular features
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        
        # Get target (clipped to reasonable range)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        
        return img_tensor, tab_features, target, patient
    
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            
            img = cv2.resize(img, (384, 384))
            
            # Normalize
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img.astype(np.uint8))
            
            # Convert to 3-channel
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            
            return img
            
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384, 384, 3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
        
    def uncertainty_loss(self, mean_pred, log_var, targets):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        return 0.5 * (mse_loss / var + log_var).mean()
    
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                
                # Combined loss
                mse_loss = F.mse_loss(mean_pred, targets)
                uncertainty_loss = self.uncertainty_loss(mean_pred, log_var, targets)
                
                # Start with more MSE focus, transition to uncertainty
                if epoch < 20:
                    loss = 0.7 * mse_loss + 0.3 * uncertainty_loss
                else:
                    loss = 0.3 * mse_loss + 0.7 * uncertainty_loss
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                
                train_loss += loss.item()
                train_batches += 1
            
            # Validation - FIXED: Handle scalar predictions properly
            self.model.eval()
            val_predictions, val_targets, val_log_vars = [], [], []
            
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    
                    # Convert to numpy properly (handle both scalar and tensor cases)
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    
                    # Ensure we have arrays, not scalars
                    if mean_pred_np.ndim == 0:  # scalar
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:  # array
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                
                # Calculate metrics
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                
                avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
                current_lr = optimizer.param_groups[0]['lr']
                
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}, Loss={avg_train_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, LLL={avg_lll:.4f}")
                
                # Update scheduler
                scheduler.step(r2)
                
                # Save best model
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_MAE_5_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
                print("-" * 50)
        
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    
    # Simple stratified split
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    
    # Get tabular dimension
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    
    # Data loaders - ensure batch size > 1 to avoid scalar issues
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    
    # Initialize model
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test forward pass
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        
        print(f"‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    
    # Train model
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    
    print(f"\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    
    return best_r2, best_mae, best_lll

if __name__ == "__main__":
    final_r2, final_mae, final_lll = optimized_main()

üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating optimized linear decay coefficients...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1214.66it/s]

Processed 176 patients with optimized features
Target statistics: mean=-4.8107, std=6.7150
Target range: [-39.0741, 11.1389]
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04, Loss=44.4437
          R¬≤=0.0121, MAE=4.4374, LLL=-5.0033
üéØ NEW BEST! R¬≤: 0.0121
--------------------------------------------------
Epoch 2: LR=1.00e-04, Loss=31.7872
          R¬≤=0.1668, MAE=4.1306, LLL=-4.7528
üéØ NEW BEST! R¬≤: 0.1668
--------------------------------------------------
Epoch 3: LR=1.00e-04, Loss=30.2944
          R¬≤=-0.1253, MAE=4.8788, LLL=-5.2693
--------------------------------------------------
Epoch 4: LR=1.00e-04, Loss=27.9926
          R¬≤=-0.0448, MAE=4.7313, LLL=-5.1201
--------------------------------------------------
Epoch 5: LR=1.00e-04, Loss=25.7185
          R¬≤=-0.3540, MAE=5.0493, LLL=-5.3283
--------------------------------------------------
Epoch 6: LR=1.00e-04, Loss=25.4835
          R¬≤=

In [12]:
# LLL
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ Optimized OSIC Model - LLL as Main Loss")
print("="*60)
print(f"üì± Device: {DEVICE}")

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    vector = []
    age = df_row['Age']
    vector.extend([
        (age - 50)/30,
        age / 100,
    ])
    vector.append(1.0 if df_row['Sex']=='Male' else 0.0)
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1,0,0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0,1,0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0,0,1])
    else:
        vector.extend([0,0,0])
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,
            (fvc - 2500)/1000,
        ])
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112*age) if age>0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101*age) if age>0 else 0.8
        vector.append(min(pp_fvc, 2.0))
    return np.array(vector)

def calculate_lll_loss(mean_pred, log_var, targets):
    # Numerically stable programmatic LLL negative for loss minimization
    var = torch.exp(log_var)
    delta = torch.abs(mean_pred - targets)
    lll = - ( - torch.sqrt(torch.tensor(2.0)) * delta / (var.sqrt() + 1e-6) - torch.log(var.sqrt() * torch.sqrt(torch.tensor(2.0))) )
    return lll.mean()

def calculate_lll(actual, predicted, sigma):
    sigma = np.maximum(sigma, 1e-6)
    delta = np.abs(actual - predicted)
    return -np.sqrt(2)*delta/sigma - np.log(sigma*np.sqrt(2))

A = {}
TAB = {}
P = []

print("Calculating decays ...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient']==patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    if len(weeks) >=2:
        try:
            if len(weeks)==2:
                slope = (fvc[1]-fvc[0])/(weeks[1]-weeks[0])
            else:
                slopes=[]
                for i in range(len(weeks)):
                    for j in range(i+1,len(weeks)):
                        if weeks[j]!=weeks[i]:
                            slopes.append((fvc[j]-fvc[i])/(weeks[j]-weeks[i]))
                slope = np.median(slopes) if slopes else 0.0
            A[patient] = slope
        except:
            A[patient]=0.0
    else:
        A[patient]=0.0
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients.")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10,p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0,20.0), p=0.3),
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super().__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        for i,param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256,128), nn.ReLU(),
            nn.Linear(128,64), nn.ReLU(),
            nn.Linear(64,1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256,32), nn.ReLU(),
            nn.Linear(32,1), nn.Tanh()
        )
        self._initialize_weights()
    def _initialize_weights(self):
        for m in [self.mean_head,self.log_var_head]:
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias,0.0)
    def forward(self, images, tabular):
        b = images.size(0)
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(b,-1)
        tab_features = self.tabular_processor(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        fused = self.fusion_layer(combined)
        mean_pred = self.mean_head(fused)
        log_var = self.log_var_head(fused)
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184','ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower()=='.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split=='train':
            return len(self.valid_patients)*8
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split=='train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape)==3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img,(384,384))
            img_min,img_max = img.min(), img.max()
            if img_max>img_min:
                img = (img-img_min)/(img_max-img_min)*255
            else:
                img = np.zeros_like(img)
            clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))
            img = np.stack([img,img,img],axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384,384,3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        patience_counter = 0
        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                # Use negative LLL as loss
                var = torch.exp(log_var)
                delta = torch.abs(mean_pred - targets)
                # Calculate negative log likelihood loss (Laplace)
                loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                loss = loss.mean()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                train_loss += loss.item()
                train_batches += 1
            avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
            self.model.eval()
            val_loss_sum = 0.0
            val_batches = 0
            val_predictions, val_targets, val_log_vars = [], [], []
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    var = torch.exp(log_var)
                    delta = torch.abs(mean_pred - targets)
                    val_loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                    val_loss = val_loss.mean()
                    val_loss_sum += val_loss.item()
                    val_batches += 1
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    if mean_pred_np.ndim == 0:
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            avg_val_loss = val_loss_sum / val_batches if val_batches > 0 else 0
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                rmse = np.sqrt(np.mean((val_pred_np - val_target_np) ** 2))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                current_lr = optimizer.param_groups[0]['lr']
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}")
                print(f"          Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, LLL={avg_lll:.4f}")
                scheduler.step(r2)
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_LLL_5_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                print("-"*50)
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print("‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    print("\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    return best_r2, best_mae, best_lll

if __name__=="__main__":
    final_r2, final_mae, final_lll = optimized_main()


üöÄ Optimized OSIC Model - LLL as Main Loss
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating decays ...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1174.96it/s]

Processed 176 patients.
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04
          Train Loss=5.8683, Val Loss=4.6989
          R¬≤=-0.0900, MAE=4.6104, RMSE=5.6818, LLL=-5.0058
üéØ NEW BEST! R¬≤: -0.0900
--------------------------------------------------
Epoch 2: LR=1.00e-04
          Train Loss=4.6390, Val Loss=5.5061
          R¬≤=-0.1185, MAE=4.8148, RMSE=5.7556, LLL=-5.0280
--------------------------------------------------
Epoch 3: LR=1.00e-04
          Train Loss=4.5449, Val Loss=4.4854
          R¬≤=0.0720, MAE=4.3675, RMSE=5.2426, LLL=-4.6447
üéØ NEW BEST! R¬≤: 0.0720
--------------------------------------------------
Epoch 4: LR=1.00e-04
          Train Loss=4.4048, Val Loss=5.3072
          R¬≤=-0.2498, MAE=4.8202, RMSE=6.0841, LLL=-5.0117
--------------------------------------------------
Epoc

In [13]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Configuration
DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5")
print("=" * 60)
print(f"üì± Device: {DEVICE}")

# Load Data
train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    """Optimized tabular features - simpler but more effective"""
    vector = []
    
    # Basic but effective features
    age = df_row['Age']
    vector.extend([
        (age - 50) / 30,  # Centered age
        age / 100,  # Scaled age
    ])
    
    # Simple sex encoding
    if df_row['Sex'] == 'Male':
        vector.append(1.0)
    else:
        vector.append(0.0)
    
    # Simple smoking status
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1, 0, 0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0, 1, 0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0, 0, 1])
    else:
        vector.extend([0, 0, 0])
    
    # FVC features
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,  # Normalized FVC
            (fvc - 2500) / 1000,  # Centered FVC
        ])
    
    # Percent predicted (approximate)
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        
        # Approximate percent predicted FVC
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112 * age) if age > 0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101 * age) if age > 0 else 0.8
            
        vector.append(min(pp_fvc, 2.0))  # Cap at 200%
    
    return np.array(vector)

def calculate_lll(actual, predicted, sigma):
    """Calculate Log Laplace Likelihood"""
    sigma = np.maximum(sigma, 1e-6)  # Avoid division by zero
    delta = np.abs(actual - predicted)
    return -np.sqrt(2) * delta / sigma - np.log(sigma * np.sqrt(2))

# Improved coefficient calculation
A = {} 
TAB = {} 
P = []

print("Calculating optimized linear decay coefficients...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient'] == patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    
    if len(weeks) >= 2:
        try:
            # Simple robust slope calculation
            if len(weeks) == 2:
                slope = (fvc[1] - fvc[0]) / (weeks[1] - weeks[0])
            else:
                # Use Theil-Sen estimator for robustness
                slopes = []
                for i in range(len(weeks)):
                    for j in range(i+1, len(weeks)):
                        if weeks[j] != weeks[i]:
                            slope = (fvc[j] - fvc[i]) / (weeks[j] - weeks[i])
                            slopes.append(slope)
                slope = np.median(slopes) if slopes else 0.0
            
            A[patient] = slope
        except:
            A[patient] = 0.0
    else:
        A[patient] = 0.0
    
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients with optimized features")

# Analyze target distribution
decay_values = np.array(list(A.values()))
print(f"Target statistics: mean={decay_values.mean():.4f}, std={decay_values.std():.4f}")
print(f"Target range: [{decay_values.min():.4f}, {decay_values.max():.4f}]")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10, p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
    
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super(OptimizedDenseNetModel, self).__init__()
        
        # DenseNet121 backbone
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        
        # Freeze early layers, unfreeze later layers
        for i, param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100  # Only unfreeze later layers
        
        # Global pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Simple but effective tabular processor
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Feature fusion
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )
        
        # Output heads
        self.mean_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.log_var_head = nn.Sequential(
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Tanh()  # Constrain output
        )
        
        # Initialize output layers for better convergence
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in [self.mean_head, self.log_var_head]:
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)
    
    def forward(self, images, tabular):
        batch_size = images.size(0)
        
        # Extract image features
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(batch_size, -1)
        
        # Process tabular data
        tab_features = self.tabular_processor(tabular)
        
        # Feature fusion
        combined_features = torch.cat([img_features, tab_features], dim=1)
        fused_features = self.fusion_layer(combined_features)
        
        # Predict mean and log variance
        mean_pred = self.mean_head(fused_features)
        log_var = self.log_var_head(fused_features)
        
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184', 'ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        
        # Prepare image paths
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower() == '.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    
    def __len__(self):
        if self.split == 'train':
            return len(self.valid_patients) * 8
        else:
            return len(self.valid_patients)
    
    def __getitem__(self, idx):
        if self.split == 'train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
            
        patient = self.valid_patients[patient_idx]
        
        # Get random image
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        
        # Load and preprocess image
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        
        # Get tabular features
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        
        # Get target (clipped to reasonable range)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        
        return img_tensor, tab_features, target, patient
    
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            
            if len(img.shape) == 3:
                img = img[img.shape[0]//2]
            
            img = cv2.resize(img, (384, 384))
            
            # Normalize
            img_min, img_max = img.min(), img.max()
            if img_max > img_min:
                img = (img - img_min) / (img_max - img_min) * 255
            else:
                img = np.zeros_like(img)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            img = clahe.apply(img.astype(np.uint8))
            
            # Convert to 3-channel
            img = np.stack([img, img, img], axis=2).astype(np.uint8)
            
            return img
            
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384, 384, 3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
        
    def uncertainty_loss(self, mean_pred, log_var, targets):
        var = torch.exp(log_var)
        mse_loss = (mean_pred - targets) ** 2
        return 0.5 * (mse_loss / var + log_var).mean()
    
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        
        patience_counter = 0
        
        for epoch in range(epochs):
            # Training
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                
                # Combined loss
                mse_loss = F.mse_loss(mean_pred, targets)
                uncertainty_loss = self.uncertainty_loss(mean_pred, log_var, targets)
                
                # Start with more MSE focus, transition to uncertainty
                if epoch < 20:
                    loss = 0.7 * mse_loss + 0.3 * uncertainty_loss
                else:
                    loss = 0.3 * mse_loss + 0.7 * uncertainty_loss
                
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                
                train_loss += loss.item()
                train_batches += 1
            
            # Validation - FIXED: Handle scalar predictions properly
            self.model.eval()
            val_predictions, val_targets, val_log_vars = [], [], []
            
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    
                    # Convert to numpy properly (handle both scalar and tensor cases)
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    
                    # Ensure we have arrays, not scalars
                    if mean_pred_np.ndim == 0:  # scalar
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:  # array
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                
                # Calculate metrics
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                
                avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
                current_lr = optimizer.param_groups[0]['lr']
                
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}, Loss={avg_train_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, LLL={avg_lll:.4f}")
                
                # Update scheduler
                scheduler.step(r2)
                
                # Save best model
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_MAE_6_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
                print("-" * 50)
        
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    
    # Simple stratified split
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    
    # Get tabular dimension
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Create datasets
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    
    # Data loaders - ensure batch size > 1 to avoid scalar issues
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    
    # Initialize model
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test forward pass
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        
        print(f"‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    
    # Train model
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    
    print(f"\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    
    return best_r2, best_mae, best_lll

if __name__ == "__main__":
    final_r2, final_mae, final_lll = optimized_main()

üöÄ OPTIMIZED OSIC Model - Targeting R¬≤ > 0.5
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating optimized linear decay coefficients...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1163.68it/s]

Processed 176 patients with optimized features
Target statistics: mean=-4.8107, std=6.7150
Target range: [-39.0741, 11.1389]
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04, Loss=44.4466
          R¬≤=-0.0879, MAE=4.7355, LLL=-5.4512
üéØ NEW BEST! R¬≤: -0.0879
--------------------------------------------------
Epoch 2: LR=1.00e-04, Loss=31.6251
          R¬≤=0.0341, MAE=4.4529, LLL=-5.0508
üéØ NEW BEST! R¬≤: 0.0341
--------------------------------------------------
Epoch 3: LR=1.00e-04, Loss=30.3141
          R¬≤=0.0676, MAE=4.0962, LLL=-4.4645
üéØ NEW BEST! R¬≤: 0.0676
--------------------------------------------------
Epoch 4: LR=1.00e-04, Loss=28.8795
          R¬≤=-0.1188, MAE=4.9008, LLL=-5.4449
--------------------------------------------------
Epoch 5: LR=1.00e-04, Loss=26.3504
          R¬≤=0.1368, MAE=4.1628, LLL=-4.6197
üéØ NEW BEST! R¬≤: 0.1368
----------------------------------------------

In [14]:
# LLL
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import random
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from pathlib import Path
import albumentations as albu
from albumentations.pytorch import ToTensorV2
import warnings

warnings.filterwarnings('ignore')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

DATA_DIR = Path("../input/osic-pulmonary-fibrosis-progression")
TRAIN_DIR = DATA_DIR / "train"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("üöÄ Optimized OSIC Model - LLL as Main Loss")
print("="*60)
print(f"üì± Device: {DEVICE}")

train_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
print(f"Loaded dataset with shape: {train_df.shape}")

def get_optimized_tab_features(df_row):
    vector = []
    age = df_row['Age']
    vector.extend([
        (age - 50)/30,
        age / 100,
    ])
    vector.append(1.0 if df_row['Sex']=='Male' else 0.0)
    smoking_status = df_row['SmokingStatus']
    if smoking_status == 'Never smoked':
        vector.extend([1,0,0])
    elif smoking_status == 'Ex-smoker':
        vector.extend([0,1,0])
    elif smoking_status == 'Currently smokes':
        vector.extend([0,0,1])
    else:
        vector.extend([0,0,0])
    if 'FVC' in df_row:
        fvc = df_row['FVC']
        vector.extend([
            fvc / 3000,
            (fvc - 2500)/1000,
        ])
    if 'FVC' in df_row and 'Age' in df_row:
        fvc = df_row['FVC']
        age = df_row['Age']
        sex = df_row['Sex']
        if sex == 'Male':
            pp_fvc = fvc / (27.63 - 0.112*age) if age>0 else 0.8
        else:
            pp_fvc = fvc / (21.78 - 0.101*age) if age>0 else 0.8
        vector.append(min(pp_fvc, 2.0))
    return np.array(vector)

def calculate_lll_loss(mean_pred, log_var, targets):
    # Numerically stable programmatic LLL negative for loss minimization
    var = torch.exp(log_var)
    delta = torch.abs(mean_pred - targets)
    lll = - ( - torch.sqrt(torch.tensor(2.0)) * delta / (var.sqrt() + 1e-6) - torch.log(var.sqrt() * torch.sqrt(torch.tensor(2.0))) )
    return lll.mean()

def calculate_lll(actual, predicted, sigma):
    sigma = np.maximum(sigma, 1e-6)
    delta = np.abs(actual - predicted)
    return -np.sqrt(2)*delta/sigma - np.log(sigma*np.sqrt(2))

A = {}
TAB = {}
P = []

print("Calculating decays ...")
for patient in tqdm(train_df['Patient'].unique()):
    sub = train_df[train_df['Patient']==patient].copy().sort_values('Weeks')
    fvc = sub['FVC'].values
    weeks = sub['Weeks'].values
    if len(weeks) >=2:
        try:
            if len(weeks)==2:
                slope = (fvc[1]-fvc[0])/(weeks[1]-weeks[0])
            else:
                slopes=[]
                for i in range(len(weeks)):
                    for j in range(i+1,len(weeks)):
                        if weeks[j]!=weeks[i]:
                            slopes.append((fvc[j]-fvc[i])/(weeks[j]-weeks[i]))
                slope = np.median(slopes) if slopes else 0.0
            A[patient] = slope
        except:
            A[patient]=0.0
    else:
        A[patient]=0.0
    TAB[patient] = get_optimized_tab_features(sub.iloc[0])
    P.append(patient)

print(f"Processed {len(P)} patients.")

class OptimizedAugmentation:
    def __init__(self, augment=True):
        if augment:
            self.transform = albu.Compose([
                albu.Rotate(limit=10,p=0.5),
                albu.HorizontalFlip(p=0.4),
                albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.6),
                albu.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.4),
                albu.GaussNoise(var_limit=(5.0,20.0), p=0.3),
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
        else:
            self.transform = albu.Compose([
                albu.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
                ToTensorV2()
            ])
    def __call__(self, image):
        return self.transform(image=image)['image']

class OptimizedDenseNetModel(nn.Module):
    def __init__(self, tabular_dim=10, dropout_rate=0.2):
        super().__init__()
        densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        self.features = densenet.features
        for i,param in enumerate(self.features.parameters()):
            param.requires_grad = i > 100
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.tabular_processor = nn.Sequential(
            nn.Linear(tabular_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(128,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.fusion_layer = nn.Sequential(
            nn.Linear(1024 + 256, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(dropout_rate),
            nn.Linear(512,256), nn.BatchNorm1d(256), nn.ReLU(),
        )
        self.mean_head = nn.Sequential(
            nn.Linear(256,128), nn.ReLU(),
            nn.Linear(128,64), nn.ReLU(),
            nn.Linear(64,1)
        )
        self.log_var_head = nn.Sequential(
            nn.Linear(256,32), nn.ReLU(),
            nn.Linear(32,1), nn.Tanh()
        )
        self._initialize_weights()
    def _initialize_weights(self):
        for m in [self.mean_head,self.log_var_head]:
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias,0.0)
    def forward(self, images, tabular):
        b = images.size(0)
        img_features = self.features(images)
        img_features = self.global_pool(img_features).view(b,-1)
        tab_features = self.tabular_processor(tabular)
        combined = torch.cat([img_features, tab_features], dim=1)
        fused = self.fusion_layer(combined)
        mean_pred = self.mean_head(fused)
        log_var = self.log_var_head(fused)
        return mean_pred.squeeze(), log_var.squeeze()

class OptimizedOSICDataset(Dataset):
    def __init__(self, patients, A_dict, TAB_dict, data_dir, split='train'):
        self.patients = [p for p in patients if p not in ['ID00011637202177653955184','ID00052637202186188008618']]
        self.A_dict = A_dict
        self.TAB_dict = TAB_dict
        self.data_dir = Path(data_dir)
        self.split = split
        self.augmentor = OptimizedAugmentation(augment=(split=='train'))
        self.patient_images = {}
        for patient in self.patients:
            patient_dir = self.data_dir / patient
            if patient_dir.exists():
                image_files = [f for f in patient_dir.iterdir() if f.suffix.lower()=='.dcm']
                if image_files:
                    self.patient_images[patient] = image_files
        self.valid_patients = [p for p in self.patients if p in self.patient_images]
        print(f"Dataset {split}: {len(self.valid_patients)} patients with images")
    def __len__(self):
        if self.split=='train':
            return len(self.valid_patients)*8
        else:
            return len(self.valid_patients)
    def __getitem__(self, idx):
        if self.split=='train':
            patient_idx = idx % len(self.valid_patients)
        else:
            patient_idx = idx
        patient = self.valid_patients[patient_idx]
        available_images = self.patient_images[patient]
        selected_image = random.choice(available_images) if available_images else available_images[0]
        img = self.load_dicom(selected_image)
        img_tensor = self.augmentor(img)
        tab_features = torch.tensor(self.TAB_dict[patient], dtype=torch.float32)
        target = torch.tensor(self.A_dict[patient], dtype=torch.float32)
        return img_tensor, tab_features, target, patient
    def load_dicom(self, path):
        try:
            dcm = pydicom.dcmread(str(path))
            img = dcm.pixel_array.astype(np.float32)
            if len(img.shape)==3:
                img = img[img.shape[0]//2]
            img = cv2.resize(img,(384,384))
            img_min,img_max = img.min(), img.max()
            if img_max>img_min:
                img = (img-img_min)/(img_max-img_min)*255
            else:
                img = np.zeros_like(img)
            clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))
            img = np.stack([img,img,img],axis=2).astype(np.uint8)
            return img
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return np.zeros((384,384,3), dtype=np.uint8)

class OptimizedTrainer:
    def __init__(self, model, device, lr=1e-4):
        self.model = model
        self.device = device
        self.lr = lr
        self.best_val_r2 = -float('inf')
        self.best_val_mae = float('inf')
        self.best_val_lll = -float('inf')
    def train(self, train_loader, val_loader, epochs=50):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
        patience_counter = 0
        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            train_batches = 0
            for images, tabular, targets, _ in train_loader:
                images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                optimizer.zero_grad()
                mean_pred, log_var = self.model(images, tabular)
                # Use negative LLL as loss
                var = torch.exp(log_var)
                delta = torch.abs(mean_pred - targets)
                # Calculate negative log likelihood loss (Laplace)
                loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                loss = loss.mean()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                train_loss += loss.item()
                train_batches += 1
            avg_train_loss = train_loss / train_batches if train_batches > 0 else 0
            self.model.eval()
            val_loss_sum = 0.0
            val_batches = 0
            val_predictions, val_targets, val_log_vars = [], [], []
            with torch.no_grad():
                for images, tabular, targets, _ in val_loader:
                    images, tabular, targets = images.to(self.device), tabular.to(self.device), targets.to(self.device)
                    mean_pred, log_var = self.model(images, tabular)
                    var = torch.exp(log_var)
                    delta = torch.abs(mean_pred - targets)
                    val_loss = torch.sqrt(torch.tensor(2.0)) * delta / (torch.sqrt(var) + 1e-6) + torch.log(torch.sqrt(var) * torch.sqrt(torch.tensor(2.0)) + 1e-6)
                    val_loss = val_loss.mean()
                    val_loss_sum += val_loss.item()
                    val_batches += 1
                    mean_pred_np = mean_pred.cpu().numpy()
                    log_var_np = log_var.cpu().numpy()
                    targets_np = targets.cpu().numpy()
                    if mean_pred_np.ndim == 0:
                        val_predictions.append(mean_pred_np.item())
                        val_log_vars.append(log_var_np.item())
                        val_targets.append(targets_np.item())
                    else:
                        val_predictions.extend(mean_pred_np.tolist())
                        val_log_vars.extend(log_var_np.tolist())
                        val_targets.extend(targets_np.tolist())
            avg_val_loss = val_loss_sum / val_batches if val_batches > 0 else 0
            if len(val_predictions) > 0:
                val_pred_np = np.array(val_predictions)
                val_target_np = np.array(val_targets)
                val_log_var_np = np.array(val_log_vars)
                val_sigma_np = np.exp(val_log_var_np / 2)
                r2 = r2_score(val_target_np, val_pred_np)
                mae = np.mean(np.abs(val_pred_np - val_target_np))
                rmse = np.sqrt(np.mean((val_pred_np - val_target_np) ** 2))
                lll_values = calculate_lll(val_target_np, val_pred_np, val_sigma_np)
                avg_lll = np.mean(lll_values)
                current_lr = optimizer.param_groups[0]['lr']
                print(f"Epoch {epoch+1}: LR={current_lr:.2e}")
                print(f"          Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")
                print(f"          R¬≤={r2:.4f}, MAE={mae:.4f}, RMSE={rmse:.4f}, LLL={avg_lll:.4f}")
                scheduler.step(r2)
                if r2 > self.best_val_r2:
                    self.best_val_r2 = r2
                    self.best_val_mae = mae
                    self.best_val_lll = avg_lll
                    torch.save(self.model.state_dict(), 'Oct_14_best_LLL_6_optimized_model.pth')
                    print(f"üéØ NEW BEST! R¬≤: {r2:.4f}")
                    patience_counter = 0
                else:
                    patience_counter += 1
                if patience_counter >= 10:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                print("-"*50)
        return self.best_val_r2, self.best_val_mae, self.best_val_lll

def optimized_main():
    print("üîÑ Creating optimized data loaders...")
    patients_list = list(P)
    decay_values = [A[patient] for patient in patients_list]
    decay_bins = pd.cut(decay_values, bins=4, labels=False)
    train_patients, val_patients = train_test_split(
        patients_list, test_size=0.15, random_state=42, stratify=decay_bins
    )
    print(f"Train: {len(train_patients)}, Val: {len(val_patients)}")
    tabular_dim = len(TAB[train_patients[0]])
    print(f"Tabular feature dimension: {tabular_dim}")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    train_dataset = OptimizedOSICDataset(train_patients, A, TAB, TRAIN_DIR, 'train')
    val_dataset = OptimizedOSICDataset(val_patients, A, TAB, TRAIN_DIR, 'val')
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)
    model = OptimizedDenseNetModel(tabular_dim=tabular_dim).to(DEVICE)
    print(f"üìä Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    try:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        test_batch = next(iter(train_loader))
        images, tabular, targets, _ = test_batch
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        with torch.no_grad():
            mean_pred, log_var = model(images, tabular)
        print("‚úÖ Model forward pass successful!")
        print(f"Output shapes - Mean: {mean_pred.shape}, Log Var: {log_var.shape}")
        print(f"üíæ GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    except Exception as e:
        print(f"‚ùå Model test failed: {e}")
        return
    trainer = OptimizedTrainer(model, DEVICE, lr=1e-4)
    best_r2, best_mae, best_lll = trainer.train(train_loader, val_loader, epochs=50)
    print("\nüî• FINAL RESULTS:")
    print(f"Best R¬≤ = {best_r2:.4f}")
    print(f"Best MAE = {best_mae:.4f}")
    print(f"Best LLL = {best_lll:.4f}")
    return best_r2, best_mae, best_lll

if __name__=="__main__":
    final_r2, final_mae, final_lll = optimized_main()


üöÄ Optimized OSIC Model - LLL as Main Loss
üì± Device: cuda
Loaded dataset with shape: (1549, 7)
Calculating decays ...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 176/176 [00:00<00:00, 1176.32it/s]

Processed 176 patients.
üîÑ Creating optimized data loaders...
Train: 149, Val: 27
Tabular feature dimension: 9





Dataset train: 149 patients with images
Dataset val: 25 patients with images
üìä Model parameters: 7,827,138
‚úÖ Model forward pass successful!
Output shapes - Mean: torch.Size([8]), Log Var: torch.Size([8])
üíæ GPU memory: 0.11 GB
Epoch 1: LR=1.00e-04
          Train Loss=5.8783, Val Loss=4.8014
          R¬≤=-0.0665, MAE=4.5679, RMSE=5.6201, LLL=-4.8577
üéØ NEW BEST! R¬≤: -0.0665
--------------------------------------------------
Epoch 2: LR=1.00e-04
          Train Loss=4.7018, Val Loss=4.6248
          R¬≤=0.1047, MAE=4.2391, RMSE=5.1495, LLL=-4.6033
üéØ NEW BEST! R¬≤: 0.1047
--------------------------------------------------
Epoch 3: LR=1.00e-04
          Train Loss=4.5227, Val Loss=4.0017
          R¬≤=0.1232, MAE=4.0894, RMSE=5.0958, LLL=-4.4151
üéØ NEW BEST! R¬≤: 0.1232
--------------------------------------------------
Epoch 4: LR=1.00e-04
          Train Loss=4.4788, Val Loss=4.4055
          R¬≤=0.1043, MAE=4.4388, RMSE=5.1505, LLL=-4.6828
------------------------------