In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split, GroupShuffleSplit
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, roc_curve, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import albumentations as A
from albumentations.pytorch import ToTensorV2
import gc
import warnings
warnings.filterwarnings('ignore')

# ================== CONFIGURATION ==================
DISEASE_LABELS = [
    'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 
    'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 
    'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
    'Pleural Other', 'Fracture', 'Support Devices'
]

# Optimized hyperparameters
IMAGE_SIZE = 224
BATCH_SIZE = 32
GRADIENT_ACCUMULATION = 2
EPOCHS = 16
LEARNING_RATE = 1e-4
TRAIN_SPLIT = 0.85
USE_ALL_TRAIN = True
LABEL_SMOOTHING = 0.1

# Model selection
USE_ENSEMBLE = True
MODEL_TYPE = 'ensemble'

# Memory optimization flags
MEMORY_SAFE_MODE = True
CLEAR_CACHE_FREQ = 20
ENABLE_CHECKPOINTING = True
MAX_SAMPLES_PER_CHUNK = 50000
FEATURE_SAVE_CHUNKS = True
DELETE_INTERMEDIATE = True

# ================== MEMORY UTILITIES ==================

def clear_memory():
    """Aggressive memory clearing"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

def print_memory_stats():
    """Print current memory usage"""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        reserved = torch.cuda.memory_reserved() / 1024**3
        print(f"GPU Memory - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB")

# ================== ASYMMETRIC LOSS (FIXED) ==================

class AsymmetricLoss(nn.Module):
    """Asymmetric Loss for imbalanced multi-label classification"""
    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8):
        super().__init__()
        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.eps = eps
        
    def forward(self, x, y):
        # Probabilities
        x_sigmoid = torch.sigmoid(x)
        xs_pos = x_sigmoid
        xs_neg = 1 - x_sigmoid
        
        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            xs_neg = (xs_neg + self.clip).clamp(max=1)
        
        # Basic CE calculation
        los_pos = y * torch.log(xs_pos.clamp(min=self.eps))
        los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps))
        
        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            pt0 = xs_pos * y
            pt1 = xs_neg * (1 - y)
            pt = pt0 + pt1
            one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y)
            one_sided_w = torch.pow(1 - pt, one_sided_gamma)
            los_pos *= one_sided_w
            los_neg *= one_sided_w
        
        loss = -los_pos - los_neg
        
        return loss.mean()

# ================== FEATURE EXTRACTORS (UNCHANGED) ==================

class DenseNetExtractor(nn.Module):
    """DenseNet-121 feature extractor"""
    def __init__(self, num_features=512):
        super().__init__()
        
        self.densenet = models.densenet121(pretrained=True)
        num_ftrs = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Identity()
        
        # Feature projection
        self.projection = nn.Sequential(
            nn.Linear(num_ftrs, num_features),
            nn.BatchNorm1d(num_features),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3)
        )
        
        self.num_features = num_features
        
    def forward(self, x):
        features = self.densenet(x)
        features = self.projection(features)
        return features

class ResNetExtractor(nn.Module):
    """ResNet-50 feature extractor"""
    def __init__(self, num_features=512):
        super().__init__()
        
        self.resnet = models.resnet50(pretrained=True)
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()
        
        # Feature projection
        self.projection = nn.Sequential(
            nn.Linear(num_ftrs, num_features),
            nn.BatchNorm1d(num_features),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3)
        )
        
        self.num_features = num_features
        
    def forward(self, x):
        features = self.resnet(x)
        features = self.projection(features)
        return features

class EfficientNetExtractor(nn.Module):
    """EfficientNet-B0 feature extractor"""
    def __init__(self, num_features=512):
        super().__init__()
        
        self.efficientnet = models.efficientnet_b0(pretrained=True)
        num_ftrs = self.efficientnet.classifier[1].in_features
        self.efficientnet.classifier = nn.Identity()
        
        # Feature projection
        self.projection = nn.Sequential(
            nn.Linear(num_ftrs, num_features),
            nn.BatchNorm1d(num_features),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3)
        )
        
        self.num_features = num_features
        
    def forward(self, x):
        features = self.efficientnet(x)
        features = self.projection(features)
        return features

# ================== ENSEMBLE FEATURE EXTRACTOR ==================

class EnsembleFeatureExtractor(nn.Module):
    """Ensemble of DenseNet, ResNet, and EfficientNet"""
    def __init__(self, num_features=1024, fusion_method='concat'):
        super().__init__()
        
        self.fusion_method = fusion_method
        
        # Initialize individual extractors
        self.densenet = DenseNetExtractor(num_features=512)
        self.resnet = ResNetExtractor(num_features=512)
        self.efficientnet = EfficientNetExtractor(num_features=512)
        
        # Freeze early layers to save memory
        self._freeze_early_layers()
        
        # Feature fusion layer
        if fusion_method == 'concat':
            self.fusion = nn.Sequential(
                nn.Linear(512 * 3, 2048),
                nn.BatchNorm1d(2048),
                nn.ReLU(inplace=True),
                nn.Dropout(0.4),
                nn.Linear(2048, num_features),
                nn.BatchNorm1d(num_features),
                nn.ReLU(inplace=True),
                nn.Dropout(0.3)
            )
        elif fusion_method == 'attention':
            self.attention_weights = nn.Parameter(torch.ones(3) / 3)
            self.fusion = nn.Sequential(
                nn.Linear(512, num_features),
                nn.BatchNorm1d(num_features),
                nn.ReLU(inplace=True),
                nn.Dropout(0.3)
            )
        
        self.num_features = num_features
    
    def _freeze_early_layers(self):
        """Freeze early layers to reduce memory usage"""
        # Freeze DenseNet early blocks
        for name, param in self.densenet.densenet.features.named_parameters():
            if 'denseblock3' not in name and 'denseblock4' not in name:
                param.requires_grad = False
        
        # Freeze ResNet early layers
        for name, param in self.resnet.resnet.named_parameters():
            if 'layer3' not in name and 'layer4' not in name:
                param.requires_grad = False
        
        # Freeze EfficientNet early layers
        for name, param in self.efficientnet.efficientnet.features.named_parameters():
            if not any(x in name for x in ['6', '7']):
                param.requires_grad = False
    
    def forward(self, x):
        # Extract features from each model
        densenet_feat = self.densenet(x)
        resnet_feat = self.resnet(x)
        efficientnet_feat = self.efficientnet(x)
        
        if self.fusion_method == 'concat':
            # Concatenate features
            combined = torch.cat([densenet_feat, resnet_feat, efficientnet_feat], dim=1)
            output = self.fusion(combined)
        elif self.fusion_method == 'attention':
            # Weighted average with learned attention
            weights = F.softmax(self.attention_weights, dim=0)
            combined = weights[0] * densenet_feat + weights[1] * resnet_feat + weights[2] * efficientnet_feat
            output = self.fusion(combined)
        
        return output

# ================== DATASET WITH FIXED LABELS ==================

class CheXpertDataset(Dataset):
    """Dataset with preprocessing - FIXED LABEL HANDLING"""
    def __init__(self, dataframe, image_dir, transform=None, training=True):
        self.dataframe = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform
        self.training = training
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        
        # Build path
        path = row['Path']
        if 'CheXpert-v1.0-small' in path:
            relative_path = path.replace('CheXpert-v1.0-small/', '')
        else:
            relative_path = path
        
        full_path = os.path.join(self.image_dir, relative_path)
        
        # Read image
        image = None
        if os.path.exists(full_path):
            try:
                image = cv2.imread(full_path, cv2.IMREAD_GRAYSCALE)
                
                # CLAHE preprocessing
                if image is not None:
                    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
                    image = clahe.apply(image)
            except:
                pass
        
        # Create placeholder if needed
        if image is None:
            image = np.ones((IMAGE_SIZE, IMAGE_SIZE), dtype=np.uint8) * 128
        
        # Resize
        if image.shape[0] != IMAGE_SIZE or image.shape[1] != IMAGE_SIZE:
            image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_LANCZOS4)
        
        # Convert to RGB
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        
        # Apply transforms
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        
        # Get labels - FIXED: Return binary labels for classification
        labels = []
        for disease in DISEASE_LABELS:
            if disease in row:
                label = row[disease]
                if pd.isna(label):
                    label = 0.0
                elif label == -1:
                    # Handle uncertain labels
                    if self.training:
                        if disease in ['Atelectasis', 'Edema', 'Pleural Effusion']:
                            label = 1.0  # Positive
                        else:
                            label = 0.0  # Negative
                    else:
                        label = 0.0
                else:
                    label = float(label)
                    
                # Apply label smoothing ONLY during neural network training
                if self.training and LABEL_SMOOTHING > 0 and label in [0.0, 1.0]:
                    if label == 1.0:
                        label = 1.0 - LABEL_SMOOTHING/2
                    else:
                        label = LABEL_SMOOTHING/2
                        
                labels.append(label)
            else:
                labels.append(0.0)
        
        labels = np.array(labels, dtype=np.float32)
        
        return image, labels, idx

# ================== MEMORY-SAFE TRAINING SYSTEM ==================

class MemorySafeEnsembleLRSystem:
    """Memory-optimized Ensemble + LR System"""
    def __init__(self, device='cuda', num_features=1024, model_type='ensemble'):
        self.device = device
        self.num_features = num_features
        self.model_type = model_type
        
        # Initialize feature extractor
        if model_type == 'ensemble':
            self.feature_extractor = EnsembleFeatureExtractor(
                num_features=num_features, 
                fusion_method='concat'
            ).to(device)
        elif model_type == 'densenet':
            self.feature_extractor = DenseNetExtractor(num_features=num_features).to(device)
        elif model_type == 'resnet':
            self.feature_extractor = ResNetExtractor(num_features=num_features).to(device)
        elif model_type == 'efficientnet':
            self.feature_extractor = EfficientNetExtractor(num_features=num_features).to(device)
        else:
            raise ValueError(f"Unknown model type: {model_type}")
        
        self.scaler = StandardScaler()
        self.classifiers = {}
    
    def extract_features_memory_safe(self, dataloader, phase='train', save_to_disk=True):
        """Extract features with memory optimization"""
        self.feature_extractor.eval()
        
        if save_to_disk and MEMORY_SAFE_MODE:
            return self._extract_features_to_chunks(dataloader, phase)
        else:
            return self._extract_features_standard(dataloader, phase)
    
    def _extract_features_to_chunks(self, dataloader, phase):
        """Extract features and save in chunks to avoid memory overflow"""
        print(f"\nExtracting {phase} features in memory-safe mode...")
        
        chunk_idx = 0
        chunk_features = []
        chunk_labels = []
        saved_chunks = []
        
        with torch.no_grad():
            for batch_idx, (images, labels, _) in enumerate(tqdm(dataloader, desc=f'Extracting {phase} features')):
                images = images.to(self.device)
                
                # Extract features
                features = self.feature_extractor(images)
                
                # Move to CPU immediately and delete GPU tensor
                features_cpu = features.cpu().numpy()
                del features
                
                chunk_features.append(features_cpu)
                chunk_labels.append(labels.numpy())
                
                # Clear GPU cache frequently
                if batch_idx % CLEAR_CACHE_FREQ == 0:
                    clear_memory()
                    if batch_idx % 100 == 0:
                        print_memory_stats()
                
                # Save chunk when it reaches max size
                current_samples = sum(f.shape[0] for f in chunk_features)
                if current_samples >= MAX_SAMPLES_PER_CHUNK:
                    # Stack and save chunk
                    chunk_feat_array = np.vstack(chunk_features)
                    chunk_label_array = np.vstack(chunk_labels)
                    
                    chunk_file = f'{phase}_features_chunk_{chunk_idx}.npz'
                    np.savez_compressed(chunk_file, 
                                      features=chunk_feat_array, 
                                      labels=chunk_label_array)
                    saved_chunks.append(chunk_file)
                    
                    print(f"Saved chunk {chunk_idx} with {chunk_feat_array.shape[0]} samples")
                    
                    # Clear chunk data
                    chunk_features = []
                    chunk_labels = []
                    chunk_idx += 1
                    
                    # Force memory cleanup
                    del chunk_feat_array, chunk_label_array
                    clear_memory()
        
        # Save remaining data
        if chunk_features:
            chunk_feat_array = np.vstack(chunk_features)
            chunk_label_array = np.vstack(chunk_labels)
            
            chunk_file = f'{phase}_features_chunk_{chunk_idx}.npz'
            np.savez_compressed(chunk_file, 
                              features=chunk_feat_array, 
                              labels=chunk_label_array)
            saved_chunks.append(chunk_file)
            
            print(f"Saved final chunk {chunk_idx} with {chunk_feat_array.shape[0]} samples")
        
        return saved_chunks
    
    def _extract_features_standard(self, dataloader, phase):
        """Standard feature extraction (original method)"""
        features_list = []
        labels_list = []
        
        with torch.no_grad():
            for batch_idx, (images, labels, _) in enumerate(tqdm(dataloader, desc=f'Extracting {phase} features')):
                images = images.to(self.device)
                
                # Extract features
                features = self.feature_extractor(images)
                
                features_list.append(features.cpu().numpy())
                labels_list.append(labels.numpy())
                
                # Clear cache periodically
                if batch_idx % 30 == 0:
                    gc.collect()
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
        
        features = np.vstack(features_list)
        labels = np.vstack(labels_list)
        
        return features, labels
    
    def train_lr_classifiers_from_chunks(self, train_chunks, val_chunks=None):
        """Train LR classifiers from saved chunks - FIXED WITH MANUAL CLASS WEIGHTS"""
        print("\nTraining Logistic Regression Classifiers from chunks...")
        
        # First, fit StandardScaler on all data
        print("Fitting StandardScaler on all chunks...")
        for chunk_file in train_chunks:
            data = np.load(chunk_file)
            features = data['features']
            self.scaler.partial_fit(features)
            del data, features
            clear_memory()
        
        # Train classifier for each disease
        for disease_idx, disease in enumerate(DISEASE_LABELS):
            print(f'\nTraining LR for {disease}...')
            
            # First pass: collect all labels to compute class weights
            all_y = []
            for chunk_file in train_chunks:
                data = np.load(chunk_file)
                labels = data['labels'][:, disease_idx]
                labels_binary = (labels > 0.5).astype(int)
                all_y.extend(labels_binary)
                del data, labels
            
            all_y = np.array(all_y)
            
            if all_y.sum() < 10:
                print(f'Too few positive samples for {disease}, skipping...')
                continue
            
            # Calculate class weights manually
            n_samples = len(all_y)
            n_pos = all_y.sum()
            n_neg = n_samples - n_pos
            
            # Balanced class weights
            pos_weight = n_samples / (2.0 * n_pos) if n_pos > 0 else 1.0
            neg_weight = n_samples / (2.0 * n_neg) if n_neg > 0 else 1.0
            
            print(f"{disease} - Pos samples: {n_pos}/{n_samples} ({n_pos/n_samples*100:.1f}%), "
                  f"Weights: pos={pos_weight:.2f}, neg={neg_weight:.2f}")
            
            try:
                # Use SGDClassifier without class_weight parameter
                from sklearn.linear_model import SGDClassifier
                
                lr = SGDClassifier(
                    loss='log',
                    penalty='l2',
                    alpha=0.01,
                    max_iter=1000,
                    random_state=42,
                    n_jobs=-1
                )
                
                # Train on chunks with manual sample weights
                for epoch in range(2):  # Multiple passes over data
                    for chunk_file in train_chunks:
                        data = np.load(chunk_file)
                        features = data['features']
                        labels = data['labels'][:, disease_idx]
                        labels_binary = (labels > 0.5).astype(int)
                        
                        # Create sample weights based on class
                        sample_weights = np.where(labels_binary == 1, pos_weight, neg_weight)
                        
                        # Scale features
                        features_scaled = self.scaler.transform(features)
                        
                        # Partial fit with sample weights
                        lr.partial_fit(features_scaled, labels_binary, 
                                     classes=[0, 1], sample_weight=sample_weights)
                        
                        del data, features, labels, features_scaled, sample_weights
                        clear_memory()
                
                self.classifiers[disease] = lr
                print(f'{disease} - Training completed')
                
            except Exception as e:
                print(f"Error training LR for {disease}: {e}")
    
    def train_lr_classifiers(self, train_features, train_labels):
        """Original LR training method - FIXED FOR BINARY LABELS"""
        print("\nTraining Logistic Regression Classifiers...")
        
        # Standardize features
        train_features_scaled = self.scaler.fit_transform(train_features)
        
        for i, disease in enumerate(DISEASE_LABELS):
            print(f'\nTraining LR for {disease}...')
            
            y = train_labels[:, i]
            # Convert to binary labels (threshold at 0.5)
            y_binary = (y > 0.5).astype(int)
            
            if y_binary.sum() < 10:
                print(f'Too few positive samples for {disease}, skipping...')
                continue
            
            try:
                # Logistic Regression
                lr = LogisticRegression(
                    penalty='l2',
                    C=1.0,
                    max_iter=1000,
                    class_weight='balanced',
                    random_state=42,
                    solver='lbfgs',
                    n_jobs=-1
                )
                
                lr.fit(train_features_scaled, y_binary)
                
                # Validate
                train_pred = lr.predict_proba(train_features_scaled)[:, 1]
                train_auc = roc_auc_score(y_binary, train_pred)
                
                self.classifiers[disease] = lr
                print(f'{disease} - Train AUC: {train_auc:.3f}')
                
            except Exception as e:
                print(f"Error training LR for {disease}: {e}")
    
    def predict_from_chunks(self, test_chunks):
        """Make predictions from saved chunks"""
        print("\nMaking predictions from chunks...")
        
        all_predictions = {disease: [] for disease in DISEASE_LABELS}
        
        for chunk_file in test_chunks:
            data = np.load(chunk_file)
            features = data['features']
            
            # Scale features
            features_scaled = self.scaler.transform(features)
            
            # Predict for each disease
            for disease in DISEASE_LABELS:
                if disease in self.classifiers:
                    lr = self.classifiers[disease]
                    pred = lr.predict_proba(features_scaled)[:, 1]
                    all_predictions[disease].extend(pred)
                else:
                    all_predictions[disease].extend(np.zeros(len(features)))
            
            del data, features, features_scaled
            clear_memory()
        
        # Convert to numpy arrays
        predictions = {disease: np.array(preds) for disease, preds in all_predictions.items()}
        
        return predictions
    
    def predict(self, test_features):
        """Original prediction method"""
        test_features_scaled = self.scaler.transform(test_features)
        
        predictions = {}
        for disease in DISEASE_LABELS:
            if disease in self.classifiers:
                lr = self.classifiers[disease]
                predictions[disease] = lr.predict_proba(test_features_scaled)[:, 1]
            else:
                predictions[disease] = np.zeros(len(test_features))
        
        return predictions
    
    def train_feature_extractor(self, train_loader, val_loader, epochs=3):
        """Train feature extractor - FIXED WITH ASYMMETRIC LOSS"""
        print(f"\nTraining {self.model_type.upper()} Feature Extractor...")
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(self.num_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, len(DISEASE_LABELS))
        ).to(self.device)
        
        # Loss function - Now defined in the code
        criterion = AsymmetricLoss(gamma_neg=4, gamma_pos=1, clip=0.05)
        
        # Optimizer
        if self.model_type == 'ensemble':
            params = [
                {'params': self.feature_extractor.densenet.parameters(), 'lr': LEARNING_RATE * 0.1},
                {'params': self.feature_extractor.resnet.parameters(), 'lr': LEARNING_RATE * 0.1},
                {'params': self.feature_extractor.efficientnet.parameters(), 'lr': LEARNING_RATE * 0.1},
                {'params': self.feature_extractor.fusion.parameters(), 'lr': LEARNING_RATE},
                {'params': self.classifier.parameters(), 'lr': LEARNING_RATE}
            ]
        else:
            params = [
                {'params': self.feature_extractor.parameters(), 'lr': LEARNING_RATE * 0.1},
                {'params': self.classifier.parameters(), 'lr': LEARNING_RATE}
            ]
        
        optimizer = torch.optim.AdamW(params, weight_decay=1e-5)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
        
        # Mixed precision
        scaler = torch.cuda.amp.GradScaler()
        
        best_val_auc = 0
        
        for epoch in range(epochs):
            # Training
            self.feature_extractor.train()
            self.classifier.train()
            
            train_loss = 0
            train_preds = []
            train_labels = []
            
            pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs} [Train]')
            for batch_idx, (images, labels, _) in enumerate(pbar):
                images = images.to(self.device)
                labels = labels.to(self.device)
                
                # Mixed precision training
                with torch.cuda.amp.autocast():
                    features = self.feature_extractor(images)
                    outputs = self.classifier(features)
                    loss = criterion(outputs, labels)
                
                loss = loss / GRADIENT_ACCUMULATION
                
                # Backward
                scaler.scale(loss).backward()
                
                if (batch_idx + 1) % GRADIENT_ACCUMULATION == 0:
                    scaler.unscale_(optimizer)
                    torch.nn.utils.clip_grad_norm_(
                        list(self.feature_extractor.parameters()) + list(self.classifier.parameters()), 
                        max_norm=1.0
                    )
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                
                train_loss += loss.item() * GRADIENT_ACCUMULATION
                
                with torch.no_grad():
                    train_preds.append(torch.sigmoid(outputs).cpu().numpy())
                    train_labels.append(labels.cpu().numpy())
                
                pbar.set_postfix({'loss': loss.item() * GRADIENT_ACCUMULATION})
                
                # Memory management
                if MEMORY_SAFE_MODE and batch_idx % CLEAR_CACHE_FREQ == 0:
                    clear_memory()
            
            # Calculate training metrics - convert to binary for AUC calculation
            train_preds = np.vstack(train_preds)
            train_labels = np.vstack(train_labels)
            train_labels_binary = (train_labels > 0.5).astype(int)
            train_auc = self.calculate_multi_label_auc(train_labels_binary, train_preds)
            
            # Validation
            val_loss, val_auc = self.validate(val_loader, criterion)
            
            avg_train_loss = train_loss / len(train_loader)
            
            print(f'Epoch {epoch+1}: Train Loss={avg_train_loss:.4f}, Train AUC={train_auc:.4f}, '
                  f'Val Loss={val_loss:.4f}, Val AUC={val_auc:.4f}')
            
            # Save best model
            if val_auc > best_val_auc:
                best_val_auc = val_auc
                torch.save({
                    'feature_extractor': self.feature_extractor.state_dict(),
                    'classifier': self.classifier.state_dict(),
                    'epoch': epoch,
                    'val_auc': val_auc
                }, f'best_{self.model_type}_model.pth')
                print(f'Saved best model with Val AUC: {best_val_auc:.4f}')
            
            scheduler.step()
        
        # Load best model
        checkpoint = torch.load(f'best_{self.model_type}_model.pth')
        self.feature_extractor.load_state_dict(checkpoint['feature_extractor'])
        self.classifier.load_state_dict(checkpoint['classifier'])
        print(f"Loaded best model with Val AUC: {checkpoint['val_auc']:.4f}")
    
    def validate(self, val_loader, criterion):
        """Validation"""
        self.feature_extractor.eval()
        self.classifier.eval()
        
        val_loss = 0
        val_preds = []
        val_labels = []
        
        with torch.no_grad():
            for images, labels, _ in tqdm(val_loader, desc='Validation'):
                images = images.to(self.device)
                labels = labels.to(self.device)
                
                with torch.cuda.amp.autocast():
                    features = self.feature_extractor(images)
                    outputs = self.classifier(features)
                    loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                val_preds.append(torch.sigmoid(outputs).cpu().numpy())
                val_labels.append(labels.cpu().numpy())
        
        val_preds = np.vstack(val_preds)
        val_labels = np.vstack(val_labels)
        # Convert to binary for AUC calculation
        val_labels_binary = (val_labels > 0.5).astype(int)
        val_auc = self.calculate_multi_label_auc(val_labels_binary, val_preds)
        
        return val_loss / len(val_loader), val_auc
    
    def calculate_multi_label_auc(self, y_true, y_pred):
        """Calculate mean AUC"""
        aucs = []
        for i in range(y_true.shape[1]):
            if y_true[:, i].sum() > 0 and y_true[:, i].sum() < len(y_true):
                try:
                    auc = roc_auc_score(y_true[:, i], y_pred[:, i])
                    aucs.append(auc)
                except:
                    pass
        return np.mean(aucs) if aucs else 0

# ================== MAIN PIPELINE WITH MEMORY SAFETY ==================

def main_memory_safe():
    """Main pipeline with memory optimization"""
    import time
    start_time = time.time()
    
    # Paths
    train_csv = '/kaggle/input/chexpert/train.csv'
    valid_csv = '/kaggle/input/chexpert/valid.csv'
    image_dir = '/kaggle/input/chexpert/'
    
    # Device setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        torch.backends.cudnn.benchmark = True
    
    # Set seeds
    np.random.seed(42)
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    
    # Initial memory check
    clear_memory()
    print_memory_stats()
    
    # Load data
    train_data, val_data, test_df = prepare_data(train_csv, valid_csv, image_dir)
    
    # Handle USE_ALL_TRAIN flag
    if USE_ALL_TRAIN and MEMORY_SAFE_MODE:
        print("\nUSE_ALL_TRAIN=True detected. Using memory-safe data handling...")
        # Combine train and validation data
        all_train_data = pd.concat([train_data, val_data], ignore_index=True)
        print(f"Total training samples after combining: {len(all_train_data)}")
        
        # Split into train/val for model training
        train_size = int(0.9 * len(all_train_data))
        train_data = all_train_data[:train_size]
        val_data = all_train_data[train_size:]
        
        print(f"New split - Train: {len(train_data)}, Val: {len(val_data)}")
    
    # Create transforms
    train_transform, val_transform = create_transforms()
    
    # Create datasets
    train_dataset = CheXpertDataset(train_data, image_dir, train_transform, training=True)
    val_dataset = CheXpertDataset(val_data, image_dir, val_transform, training=False)
    test_dataset = CheXpertDataset(test_df, image_dir, val_transform, training=False)
    
    # Create dataloaders
    train_loader = DataLoader(
        train_dataset, batch_size=BATCH_SIZE,
        shuffle=True, num_workers=2,  # Reduced workers for memory
        pin_memory=False,  # Disable pin_memory to save RAM
        drop_last=True
    )
    val_loader = DataLoader(
        val_dataset, batch_size=BATCH_SIZE,
        shuffle=False, num_workers=2,
        pin_memory=False
    )
    test_loader = DataLoader(
        test_dataset, batch_size=BATCH_SIZE,
        shuffle=False, num_workers=2,
        pin_memory=False
    )
    
    # Initialize system
    print(f"\nInitializing Memory-Safe {MODEL_TYPE.upper()} + LR System...")
    system = MemorySafeEnsembleLRSystem(device=device, num_features=1024, model_type=MODEL_TYPE)
    print_memory_stats()
    
    # Train feature extractor
    system.train_feature_extractor(train_loader, val_loader, epochs=EPOCHS)
    
    # Clear memory after training
    clear_memory()
    print_memory_stats()
    
    # Extract features with memory safety
    print("\nExtracting features...")
    
    if MEMORY_SAFE_MODE and USE_ALL_TRAIN:
        # Extract features in chunks
        train_chunks = system.extract_features_memory_safe(train_loader, 'train', save_to_disk=True)
        val_chunks = system.extract_features_memory_safe(val_loader, 'validation', save_to_disk=True)
        test_chunks = system.extract_features_memory_safe(test_loader, 'test', save_to_disk=True)
        
        # Delete feature extractor to free memory before training classifiers
        if DELETE_INTERMEDIATE:
            del system.feature_extractor
            if hasattr(system, 'classifier'):
                del system.classifier
            clear_memory()
            print("Deleted feature extractor to free memory")
            print_memory_stats()
        
        # Train LR classifiers from chunks
        system.train_lr_classifiers_from_chunks(train_chunks + val_chunks)
        
        # Make predictions from chunks
        predictions = system.predict_from_chunks(test_chunks)
        
        # Load test labels
        test_labels = []
        for chunk_file in test_chunks:
            data = np.load(chunk_file)
            test_labels.append(data['labels'])
            del data
        test_labels = np.vstack(test_labels)
        
        # Clean up chunk files
        all_chunks = train_chunks + val_chunks + test_chunks
        for chunk_file in all_chunks:
            if os.path.exists(chunk_file):
                os.remove(chunk_file)
        print(f"Cleaned up {len(all_chunks)} temporary chunk files")
        
    else:
        # Standard extraction (original method)
        train_features, train_labels = system.extract_features_memory_safe(train_loader, 'train', save_to_disk=False)
        val_features, val_labels = system.extract_features_memory_safe(val_loader, 'validation', save_to_disk=False)
        test_features, test_labels = system.extract_features_memory_safe(test_loader, 'test', save_to_disk=False)
        
        # Combine train and validation
        all_train_features = np.vstack([train_features, val_features])
        all_train_labels = np.vstack([train_labels, val_labels])
        
        print(f"\nCombined training set: {len(all_train_features)} samples")
        
        # Train LR classifiers
        system.train_lr_classifiers(all_train_features, all_train_labels)
        
        # Make predictions
        print("\nMaking predictions...")
        predictions = system.predict(test_features)
    
    # Convert test labels to binary for evaluation
    test_labels_binary = (test_labels > 0.5).astype(int)
    
    # Evaluate
    results = evaluate_model(predictions, test_labels_binary, test_df)
    
    # Save results
    results_df = pd.DataFrame(results).T
    results_df.to_csv(f'{MODEL_TYPE}_lr_memory_safe_results.csv')
    print(f"\nResults saved to '{MODEL_TYPE}_lr_memory_safe_results.csv'")
    
    # Print final summary
    print("\n" + "="*80)
    print("CLASSIFICATION COMPLETED")
    print("="*80)
    print(f"Model Type: {MODEL_TYPE.upper()}")
    print(f"Feature Dimension: {system.num_features}")
    print(f"Classifier: Logistic Regression")
    print(f"Memory Safe Mode: {MEMORY_SAFE_MODE}")
    print(f"USE_ALL_TRAIN: {USE_ALL_TRAIN}")
    print(f"Test Samples: {len(test_labels)}")
    
    # Print top 5 diseases by AUC
    print("\nTop 5 Diseases by AUC:")
    sorted_results = sorted(results.items(), key=lambda x: x[1]['AUC'], reverse=True)[:5]
    for disease, metrics in sorted_results:
        print(f"{disease}: AUC={metrics['AUC']:.3f}, F1={metrics['F1']:.3f}")
    
    # Final memory check
    print_memory_stats()
    
    print(f"\nTotal time: {(time.time() - start_time)/60:.1f} minutes")
    
    return system, results

# ================== REQUIRED FUNCTIONS ==================

def prepare_data(train_csv, valid_csv, image_dir):
    """Prepare data - FIXED FOR BINARY LABELS"""
    print("Loading and preparing data...")
    
    train_df = pd.read_csv(train_csv)
    test_df = pd.read_csv(valid_csv)
    
    print(f"Total train samples: {len(train_df)}")
    print(f"Total test samples: {len(test_df)}")
    
    # Extract patient IDs
    def extract_patient_id(path):
        parts = path.split('/')
        for part in parts:
            if part.startswith('patient'):
                return part
        return None
    
    train_df['PatientID'] = train_df['Path'].apply(extract_patient_id)
    test_df['PatientID'] = test_df['Path'].apply(extract_patient_id)
    
    # Remove invalid samples
    train_df = train_df[train_df['PatientID'].notna()]
    test_df = test_df[test_df['PatientID'].notna()]
    
    # Handle uncertain labels - FIXED: convert to binary
    u_ones = ['Atelectasis', 'Edema', 'Pleural Effusion']
    u_zeros = ['Cardiomegaly', 'Consolidation', 'Pneumonia', 'Pneumothorax']
    
    for df in [train_df, test_df]:
        for disease in DISEASE_LABELS:
            if disease in df.columns:
                df[disease] = df[disease].fillna(0)
                
                if disease in u_ones:
                    # Uncertain as positive (U-Ones policy)
                    df[disease] = df[disease].replace(-1, 1)
                elif disease in u_zeros:
                    # Uncertain as negative (U-Zeros policy)  
                    df[disease] = df[disease].replace(-1, 0)
                else:
                    # Uncertain as negative for other diseases
                    df[disease] = df[disease].replace(-1, 0)
    
    # Patient-based split
    unique_patients = train_df['PatientID'].unique()
    print(f"Number of unique patients: {len(unique_patients)}")
    
    gss = GroupShuffleSplit(n_splits=1, test_size=1-TRAIN_SPLIT, random_state=42)
    train_idx, val_idx = next(gss.split(train_df, groups=train_df['PatientID']))
    
    train_data = train_df.iloc[train_idx]
    val_data = train_df.iloc[val_idx]
    
    print(f"\nData split:")
    print(f"Train: {len(train_data)} samples")
    print(f"Val: {len(val_data)} samples")
    print(f"Test: {len(test_df)} samples")
    
    return train_data, val_data, test_df

def create_transforms():
    """Create augmentation pipelines"""
    train_transform = A.Compose([
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.15, rotate_limit=15, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        A.CLAHE(clip_limit=2.0, p=0.3),
        A.CoarseDropout(max_holes=8, max_height=20, max_width=20, p=0.2),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    
    val_transform = A.Compose([
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    
    return train_transform, val_transform

def evaluate_model(predictions, true_labels, test_df):
    """Evaluate model performance - FIXED FOR BINARY LABELS"""
    results = {}
    
    print("\n" + "="*80)
    print("EVALUATION RESULTS")
    print("="*80)
    
    print("\nPer-Disease Performance:")
    print("-"*80)
    print(f"{'Disease':<30} {'AUC':>8} {'AP':>8} {'F1':>8} {'Sens':>8} {'Spec':>8}")
    print("-"*80)
    
    all_metrics = []
    
    for i, disease in enumerate(DISEASE_LABELS):
        y_true = true_labels[:, i]
        y_pred = predictions[disease]
        
        if y_true.sum() > 0 and y_true.sum() < len(y_true):
            # Calculate metrics
            auc = roc_auc_score(y_true, y_pred)
            ap = average_precision_score(y_true, y_pred)
            
            # Find optimal threshold
            precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
            f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
            best_idx = np.argmax(f1_scores[:-1])
            best_threshold = thresholds[best_idx]
            
            # Binary predictions
            y_pred_binary = (y_pred > best_threshold).astype(int)
            
            # Calculate metrics
            tn = np.sum((y_true == 0) & (y_pred_binary == 0))
            fp = np.sum((y_true == 0) & (y_pred_binary == 1))
            fn = np.sum((y_true == 1) & (y_pred_binary == 0))
            tp = np.sum((y_true == 1) & (y_pred_binary == 1))
            
            sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
            specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
            f1 = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0
            
            results[disease] = {
                'AUC': auc, 'AP': ap, 'F1': f1,
                'Sensitivity': sensitivity, 'Specificity': specificity,
                'Threshold': best_threshold
            }
            
            all_metrics.append([disease, auc, ap, f1, sensitivity, specificity])
            
            print(f"{disease:<30} {auc:>8.3f} {ap:>8.3f} {f1:>8.3f} "
                  f"{sensitivity:>8.3f} {specificity:>8.3f}")
    
    # Overall metrics
    if all_metrics:
        print("\n" + "-"*80)
        print("Overall Performance Summary:")
        print("-"*80)
        
        metrics_array = np.array([m[1:] for m in all_metrics])
        metric_names = ['AUC', 'AP', 'F1', 'Sensitivity', 'Specificity']
        
        for i, name in enumerate(metric_names):
            mean_val = np.mean(metrics_array[:, i])
            std_val = np.std(metrics_array[:, i])
            print(f"Mean {name}: {mean_val:.3f} ± {std_val:.3f}")
    
    return results

# ================== RUN SYSTEM ==================

if __name__ == "__main__":
    print("\n" + "="*80)
    print("MEMORY-SAFE ENSEMBLE MODEL SYSTEM")
    print("Fixed Label Handling with Binary Classification")
    print("="*80)
    
    print(f"\nConfiguration:")
    print(f"- USE_ALL_TRAIN: {USE_ALL_TRAIN}")
    print(f"- MEMORY_SAFE_MODE: {MEMORY_SAFE_MODE}")
    print(f"- MODEL_TYPE: {MODEL_TYPE}")
    print(f"- MAX_SAMPLES_PER_CHUNK: {MAX_SAMPLES_PER_CHUNK}")
    print(f"- BATCH_SIZE: {BATCH_SIZE}")
    print(f"- EPOCHS: {EPOCHS}")
    
    # Run memory-safe pipeline
    system, results = main_memory_safe()
    
    print("\n" + "="*80)
    print("Training completed successfully!")
    print("="*80)




MEMORY-SAFE ENSEMBLE MODEL SYSTEM
Fixed Label Handling with Binary Classification

Configuration:
- USE_ALL_TRAIN: True
- MEMORY_SAFE_MODE: True
- MODEL_TYPE: ensemble
- MAX_SAMPLES_PER_CHUNK: 50000
- BATCH_SIZE: 32
- EPOCHS: 16
Using device: cuda
GPU: Tesla T4
GPU Memory - Allocated: 0.00GB, Reserved: 0.00GB
Loading and preparing data...
Total train samples: 223414
Total test samples: 234
Number of unique patients: 64540

Data split:
Train: 190358 samples
Val: 33056 samples
Test: 234 samples

USE_ALL_TRAIN=True detected. Using memory-safe data handling...
Total training samples after combining: 223414
New split - Train: 201072, Val: 22342

Initializing Memory-Safe ENSEMBLE + LR System...


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 222MB/s]
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 260MB/s]
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 30.7MB/s]


GPU Memory - Allocated: 0.16GB, Reserved: 0.17GB

Training ENSEMBLE Feature Extractor...


Epoch 1/16 [Train]: 100%|██████████| 6283/6283 [43:35<00:00,  2.40it/s, loss=0.0602]
Validation: 100%|██████████| 699/699 [04:53<00:00,  2.38it/s]


Epoch 1: Train Loss=0.0636, Train AUC=0.6356, Val Loss=0.0602, Val AUC=0.7239
Saved best model with Val AUC: 0.7239


Epoch 2/16 [Train]: 100%|██████████| 6283/6283 [25:41<00:00,  4.08it/s, loss=0.0529]
Validation: 100%|██████████| 699/699 [02:15<00:00,  5.17it/s]


Epoch 2: Train Loss=0.0580, Train AUC=0.7102, Val Loss=0.0578, Val AUC=0.7509
Saved best model with Val AUC: 0.7509


Epoch 3/16 [Train]: 100%|██████████| 6283/6283 [25:45<00:00,  4.07it/s, loss=0.0579]
Validation: 100%|██████████| 699/699 [02:27<00:00,  4.73it/s]


Epoch 3: Train Loss=0.0563, Train AUC=0.7365, Val Loss=0.0569, Val AUC=0.7639
Saved best model with Val AUC: 0.7639


Epoch 4/16 [Train]: 100%|██████████| 6283/6283 [26:50<00:00,  3.90it/s, loss=0.0549]
Validation: 100%|██████████| 699/699 [02:21<00:00,  4.95it/s]


Epoch 4: Train Loss=0.0553, Train AUC=0.7518, Val Loss=0.0562, Val AUC=0.7699
Saved best model with Val AUC: 0.7699


Epoch 5/16 [Train]: 100%|██████████| 6283/6283 [26:52<00:00,  3.90it/s, loss=0.0511]
Validation: 100%|██████████| 699/699 [02:25<00:00,  4.79it/s]


Epoch 5: Train Loss=0.0547, Train AUC=0.7611, Val Loss=0.0559, Val AUC=0.7744
Saved best model with Val AUC: 0.7744


Epoch 6/16 [Train]: 100%|██████████| 6283/6283 [26:33<00:00,  3.94it/s, loss=0.0593]
Validation: 100%|██████████| 699/699 [02:21<00:00,  4.94it/s]


Epoch 6: Train Loss=0.0542, Train AUC=0.7688, Val Loss=0.0557, Val AUC=0.7780
Saved best model with Val AUC: 0.7780


Epoch 7/16 [Train]: 100%|██████████| 6283/6283 [32:58<00:00,  3.18it/s, loss=0.0563]  
Validation: 100%|██████████| 699/699 [02:23<00:00,  4.86it/s]


Epoch 7: Train Loss=0.0538, Train AUC=0.7747, Val Loss=0.0555, Val AUC=0.7809
Saved best model with Val AUC: 0.7809


Epoch 8/16 [Train]: 100%|██████████| 6283/6283 [26:24<00:00,  3.96it/s, loss=0.0541]
Validation: 100%|██████████| 699/699 [02:19<00:00,  5.00it/s]


Epoch 8: Train Loss=0.0534, Train AUC=0.7797, Val Loss=0.0554, Val AUC=0.7832
Saved best model with Val AUC: 0.7832


Epoch 9/16 [Train]: 100%|██████████| 6283/6283 [25:40<00:00,  4.08it/s, loss=0.0452]
Validation: 100%|██████████| 699/699 [02:24<00:00,  4.83it/s]


Epoch 9: Train Loss=0.0531, Train AUC=0.7844, Val Loss=0.0553, Val AUC=0.7851
Saved best model with Val AUC: 0.7851


Epoch 10/16 [Train]: 100%|██████████| 6283/6283 [27:03<00:00,  3.87it/s, loss=0.0572]
Validation: 100%|██████████| 699/699 [02:20<00:00,  4.97it/s]


Epoch 10: Train Loss=0.0528, Train AUC=0.7873, Val Loss=0.0552, Val AUC=0.7856
Saved best model with Val AUC: 0.7856


Epoch 11/16 [Train]: 100%|██████████| 6283/6283 [26:40<00:00,  3.93it/s, loss=0.0473]
Validation: 100%|██████████| 699/699 [02:29<00:00,  4.68it/s]


Epoch 11: Train Loss=0.0526, Train AUC=0.7904, Val Loss=0.0551, Val AUC=0.7862
Saved best model with Val AUC: 0.7862


Epoch 12/16 [Train]: 100%|██████████| 6283/6283 [26:06<00:00,  4.01it/s, loss=0.0524]
Validation: 100%|██████████| 699/699 [02:16<00:00,  5.12it/s]


Epoch 12: Train Loss=0.0524, Train AUC=0.7930, Val Loss=0.0550, Val AUC=0.7868
Saved best model with Val AUC: 0.7868


Epoch 13/16 [Train]: 100%|██████████| 6283/6283 [26:18<00:00,  3.98it/s, loss=0.05]  
Validation: 100%|██████████| 699/699 [02:20<00:00,  4.97it/s]


Epoch 13: Train Loss=0.0523, Train AUC=0.7954, Val Loss=0.0551, Val AUC=0.7869
Saved best model with Val AUC: 0.7869


Epoch 14/16 [Train]: 100%|██████████| 6283/6283 [26:18<00:00,  3.98it/s, loss=0.0556]
Validation: 100%|██████████| 699/699 [02:22<00:00,  4.91it/s]


Epoch 14: Train Loss=0.0522, Train AUC=0.7960, Val Loss=0.0551, Val AUC=0.7878
Saved best model with Val AUC: 0.7878


Epoch 15/16 [Train]: 100%|██████████| 6283/6283 [26:22<00:00,  3.97it/s, loss=0.0525]
Validation: 100%|██████████| 699/699 [02:28<00:00,  4.69it/s]


Epoch 15: Train Loss=0.0521, Train AUC=0.7968, Val Loss=0.0550, Val AUC=0.7880
Saved best model with Val AUC: 0.7880


Epoch 16/16 [Train]: 100%|██████████| 6283/6283 [26:05<00:00,  4.01it/s, loss=0.0522]
Validation: 100%|██████████| 699/699 [02:19<00:00,  5.02it/s]


Epoch 16: Train Loss=0.0521, Train AUC=0.7972, Val Loss=0.0551, Val AUC=0.7878
Loaded best model with Val AUC: 0.7880
GPU Memory - Allocated: 0.32GB, Reserved: 0.49GB

Extracting features...

Extracting train features in memory-safe mode...


Extracting train features:   0%|          | 1/6283 [00:03<6:40:10,  3.82s/it]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:   2%|▏         | 101/6283 [00:29<30:10,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:   3%|▎         | 201/6283 [00:54<30:07,  3.36it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:   5%|▍         | 301/6283 [01:20<29:01,  3.43it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:   6%|▋         | 401/6283 [01:45<28:47,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:   8%|▊         | 501/6283 [02:11<28:17,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  10%|▉         | 601/6283 [02:36<28:10,  3.36it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  11%|█         | 701/6283 [03:02<27:26,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  13%|█▎        | 801/6283 [03:28<27:29,  3.32it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  14%|█▍        | 901/6283 [03:53<26:29,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  16%|█▌        | 1001/6283 [04:19<25:53,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  18%|█▊        | 1101/6283 [04:44<25:35,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  19%|█▉        | 1201/6283 [05:10<24:49,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  21%|██        | 1301/6283 [05:35<24:26,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  22%|██▏       | 1401/6283 [06:01<23:49,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  24%|██▍       | 1501/6283 [06:26<23:33,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  25%|██▍       | 1563/6283 [06:53<4:38:49,  3.54s/it]

Saved chunk 0 with 50016 samples


Extracting train features:  25%|██▌       | 1601/6283 [07:03<22:58,  3.40it/s]  

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  27%|██▋       | 1701/6283 [07:28<22:59,  3.32it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  29%|██▊       | 1801/6283 [07:54<21:59,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  30%|███       | 1901/6283 [08:19<21:19,  3.42it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  32%|███▏      | 2001/6283 [08:45<21:14,  3.36it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  33%|███▎      | 2101/6283 [09:11<20:36,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  35%|███▌      | 2201/6283 [09:36<20:15,  3.36it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  37%|███▋      | 2301/6283 [10:02<19:29,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  38%|███▊      | 2401/6283 [10:27<19:14,  3.36it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  40%|███▉      | 2501/6283 [10:53<18:26,  3.42it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  41%|████▏     | 2601/6283 [11:18<18:25,  3.33it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  43%|████▎     | 2701/6283 [11:44<17:39,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  45%|████▍     | 2801/6283 [12:09<17:12,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  46%|████▌     | 2901/6283 [12:35<16:45,  3.36it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  48%|████▊     | 3001/6283 [13:00<16:10,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  49%|████▉     | 3101/6283 [13:26<15:36,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  50%|████▉     | 3126/6283 [13:43<3:05:15,  3.52s/it]

Saved chunk 1 with 50016 samples


Extracting train features:  51%|█████     | 3201/6283 [14:02<15:09,  3.39it/s]  

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  53%|█████▎    | 3301/6283 [14:28<14:50,  3.35it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  54%|█████▍    | 3401/6283 [14:54<14:01,  3.42it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  56%|█████▌    | 3501/6283 [15:19<13:45,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  57%|█████▋    | 3601/6283 [15:45<13:11,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  59%|█████▉    | 3701/6283 [16:10<12:54,  3.33it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  60%|██████    | 3801/6283 [16:36<12:08,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  62%|██████▏   | 3901/6283 [17:01<12:01,  3.30it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  64%|██████▎   | 4001/6283 [17:27<11:13,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  65%|██████▌   | 4101/6283 [17:52<10:47,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  67%|██████▋   | 4201/6283 [18:18<10:11,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  68%|██████▊   | 4301/6283 [18:44<09:55,  3.33it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  70%|███████   | 4401/6283 [19:09<09:16,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  72%|███████▏  | 4501/6283 [19:35<08:42,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  73%|███████▎  | 4601/6283 [20:00<08:19,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  75%|███████▍  | 4689/6283 [20:33<1:34:06,  3.54s/it]

Saved chunk 2 with 50016 samples


Extracting train features:  75%|███████▍  | 4701/6283 [20:37<09:02,  2.92it/s]  

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  76%|███████▋  | 4801/6283 [21:02<07:31,  3.28it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  78%|███████▊  | 4901/6283 [21:28<06:48,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  80%|███████▉  | 5001/6283 [21:54<06:14,  3.42it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  81%|████████  | 5101/6283 [22:19<05:45,  3.42it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  83%|████████▎ | 5201/6283 [22:44<05:20,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  84%|████████▍ | 5301/6283 [23:10<04:47,  3.41it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  86%|████████▌ | 5401/6283 [23:35<04:19,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  88%|████████▊ | 5501/6283 [24:01<03:50,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  89%|████████▉ | 5601/6283 [24:26<03:19,  3.42it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  91%|█████████ | 5701/6283 [24:52<02:51,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  92%|█████████▏| 5801/6283 [25:17<02:22,  3.39it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  94%|█████████▍| 5901/6283 [25:43<01:55,  3.30it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  96%|█████████▌| 6001/6283 [26:09<01:23,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  97%|█████████▋| 6101/6283 [26:34<00:54,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features:  99%|█████████▊| 6201/6283 [27:00<00:24,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting train features: 100%|█████████▉| 6252/6283 [27:24<01:48,  3.51s/it]

Saved chunk 3 with 50016 samples


Extracting train features: 100%|██████████| 6283/6283 [27:32<00:00,  3.80it/s]


Saved final chunk 4 with 992 samples

Extracting validation features in memory-safe mode...


Extracting validation features:   0%|          | 1/699 [00:00<11:28,  1.01it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features:  14%|█▍        | 101/699 [00:26<03:01,  3.29it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features:  29%|██▉       | 201/699 [00:52<02:24,  3.44it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features:  43%|████▎     | 301/699 [01:17<01:57,  3.40it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features:  57%|█████▋    | 401/699 [01:43<01:28,  3.38it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features:  72%|███████▏  | 501/699 [02:08<00:57,  3.43it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features:  86%|████████▌ | 601/699 [02:34<00:29,  3.37it/s]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting validation features: 100%|██████████| 699/699 [02:59<00:00,  3.89it/s]


Saved final chunk 0 with 22342 samples

Extracting test features in memory-safe mode...


Extracting test features:  12%|█▎        | 1/8 [00:01<00:11,  1.59s/it]

GPU Memory - Allocated: 0.34GB, Reserved: 0.49GB


Extracting test features: 100%|██████████| 8/8 [00:05<00:00,  1.45it/s]


Saved final chunk 0 with 234 samples
Deleted feature extractor to free memory
GPU Memory - Allocated: 0.02GB, Reserved: 0.04GB

Training Logistic Regression Classifiers from chunks...
Fitting StandardScaler on all chunks...

Training LR for No Finding...
No Finding - Pos samples: 22379/223398 (10.0%), Weights: pos=4.99, neg=0.56
No Finding - Training completed

Training LR for Enlarged Cardiomediastinum...
Enlarged Cardiomediastinum - Pos samples: 10797/223398 (4.8%), Weights: pos=10.35, neg=0.53
Enlarged Cardiomediastinum - Training completed

Training LR for Cardiomegaly...
Cardiomegaly - Pos samples: 26997/223398 (12.1%), Weights: pos=4.14, neg=0.57
Cardiomegaly - Training completed

Training LR for Lung Opacity...
Lung Opacity - Pos samples: 105573/223398 (47.3%), Weights: pos=1.06, neg=0.95
Lung Opacity - Training completed

Training LR for Lung Lesion...
Lung Lesion - Pos samples: 9186/223398 (4.1%), Weights: pos=12.16, neg=0.52
Lung Lesion - Training completed

Training LR for E