In [1]:
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, applications
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score, roc_auc_score
from datetime import datetime
import traceback
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers.schedules import ExponentialDecay
# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

2025-06-04 01:52:20.735594: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749001940.968977      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749001941.037420      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
TRAIN_CSV = "/kaggle/input/miap-dataset/fixedTraindataset.csv"
VAL_CSV = "/kaggle/input/miap-dataset/fixedVALdataset.csv"
TEST_CSV = "/kaggle/input/miap-dataset/fixedTestdataset.csv"

IMAGE_DIRS = {
    'train': "/kaggle/input/miap-dataset/processed_train",
    'val': "/kaggle/input/miap-dataset/processed_val",
    'test': "/kaggle/input/miap-dataset/processed_test"
}

CLASSES_TO_EXCLUDE = ["No Finding"]
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS =  30
GRADCAM_SAMPLES = 5

# Create output directories
os.makedirs("/kaggle/working/models", exist_ok=True)
os.makedirs("/kaggle/working/gradcam", exist_ok=True)
os.makedirs("/kaggle/working/logs", exist_ok=True)

In [3]:
def load_and_validate_data():
    """Load data and validate all image paths exist"""
    def validate_paths(df, mode):
        valid_entries = []
        for _, row in df.iterrows():
            filename = row["File Name"]
            # Check multiple extensions
            for ext in ['', '.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG']:
                test_path = os.path.join(IMAGE_DIRS[mode], f"{os.path.splitext(filename)[0]}{ext}")
                if os.path.exists(test_path):
                    valid_entries.append({
                        'path': test_path,
                        'filename': filename,
                        'labels': row["Combined Labels"]
                    })
                    break
        return pd.DataFrame(valid_entries)

    print("Loading and validating training data...")
    train_df = pd.read_csv(TRAIN_CSV)
    train_df = validate_paths(train_df, 'train')
    
    print("Loading and validating validation data...")
    val_df = pd.read_csv(VAL_CSV)
    val_df = validate_paths(val_df, 'val')
    
    print("Loading and validating test data...")
    test_df = pd.read_csv(TEST_CSV)
    test_df = validate_paths(test_df, 'test')
    
    # Process labels
    def split_labels(label_str):
        if pd.isna(label_str):
            return []
        return [label.strip() for label in str(label_str).split("|") if label.strip() not in CLASSES_TO_EXCLUDE]
    
    train_df['labels'] = train_df['labels'].apply(split_labels)
    val_df['labels'] = val_df['labels'].apply(split_labels)
    test_df['labels'] = test_df['labels'].apply(split_labels)
    
    # Get all unique classes
    all_labels = []
    for df in [train_df, val_df, test_df]:
        all_labels.extend([label for sublist in df['labels'] for label in sublist])
    classes = sorted(list(set(all_labels) - set(CLASSES_TO_EXCLUDE)))
    
    # Create MultiLabelBinarizer
    mlb = MultiLabelBinarizer()
    mlb.fit([classes])  # Fit with all possible classes
    
    return train_df, val_df, test_df, classes, mlb

train_df, val_df, test_df, classes, mlb = load_and_validate_data()

Loading and validating training data...
Loading and validating validation data...
Loading and validating test data...


In [4]:
def create_class_dataset(df, class_name, mode, augment=False):
    """Create balanced dataset for a specific class with validation checks"""
    valid_samples = []
    for _, row in df.iterrows():
        if row['path']:
            label = 1 if class_name in row['labels'] else 0
            valid_samples.append((row['path'], row['filename'], label))
    
    pos_samples = [x for x in valid_samples if x[2] == 1]
    neg_samples = [x for x in valid_samples if x[2] == 0]
    
    if not pos_samples or not neg_samples:
        raise ValueError(f"Insufficient samples for {class_name} - Pos: {len(pos_samples)}, Neg: {len(neg_samples)}")
    
    n_samples = min(len(pos_samples), len(neg_samples))
    balanced_samples = pos_samples[:n_samples] + neg_samples[:n_samples]
    np.random.shuffle(balanced_samples)
    
    if mode == 'val':
        val_pos = sum(1 for _, _, label in balanced_samples if label == 1)
        print(f"Validation balance - Pos: {val_pos}, Neg: {len(balanced_samples)-val_pos}")
        if val_pos == 0 or val_pos == len(balanced_samples):
            raise ValueError(f"Invalid validation distribution for {class_name}")
    
    def load_image(path, filename, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, IMAGE_SIZE)
        img = tf.keras.applications.densenet.preprocess_input(img)
        if augment:
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_brightness(img, 0.1)
            img = tf.image.random_contrast(img, 0.9, 1.1)
        return img, filename, tf.cast(label, tf.float32)
    
    paths, filenames, labels = zip(*balanced_samples)
    dataset = tf.data.Dataset.from_tensor_slices((list(paths), list(filenames), list(labels)))
    dataset = dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [5]:
LEARNING_RATE = 2e-4  # Reduced initial learning rate
DROPOUT_RATE = 0.6
class DenseNetWithHead(tf.keras.Model):
    def __init__(self, image_size):
        super().__init__()
        self.base_model = tf.keras.applications.DenseNet121(
            include_top=False,
            weights="imagenet",
            input_shape=(*image_size, 3),
            pooling=None
        )
        for layer in self.base_model.layers[-20:]:
            if not isinstance(layer, tf.keras.layers.BatchNormalization):
                layer.trainable = True
                
        self.gap = tf.keras.layers.GlobalAveragePooling2D()
        self.drop = tf.keras.layers.Dropout(0.5)
        self.head = tf.keras.layers.Dense(1, activation='sigmoid')
        self._gradcam_layer_name = 'conv5_block16_2_conv'
        self._base_model = self.base_model

    def call(self, inputs, training=False):
        x = self.base_model(inputs, training=training)
        x = self.gap(x)
        x = self.drop(x, training=training)
        return self.head(x)

def build_densenet_model():
    model = DenseNetWithHead(IMAGE_SIZE)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss=tf.keras.losses.BinaryFocalCrossentropy(from_logits=False, gamma=2.0),
        metrics=[
            tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.BinaryAccuracy(name='acc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )
    return model

In [6]:
def train_single_class(class_name):
    """Train model for a single class"""
    print(f"\n=== Training model for: {class_name} ===")
    print(f"Started at {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
    
    try:
        # Create datasets
        train_ds = create_class_dataset(train_df, class_name, 'train', augment=True)
        val_ds = create_class_dataset(val_df, class_name, 'val')
        val_labels = np.array([y.numpy() for _, _, y in val_ds.unbatch()])
        pos_ratio = np.mean(val_labels)
        
        if pos_ratio < 0.2 or pos_ratio > 0.8:
            print(f"Skipping {class_name} - imbalanced validation set ({pos_ratio:.2f} positive)")
            return None
            
        # Build model
        print(f"Building model for {class_name}...")
        model = build_densenet_model()
        
        # Initialize with dummy input
        print("Initializing model with dummy input...")
        dummy = tf.zeros((1, *IMAGE_SIZE, 3))
        _ = model(dummy, training=False)
        
        # Calculate class weights
        pos_count = train_df['labels'].apply(lambda x: class_name in x).sum()
        pos_weight = (len(train_df) - pos_count) / max(1, pos_count)
        
        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                patience=5,
                monitor='val_auc',
                mode='max',
                restore_best_weights=True,
                baseline=0.7
            ),
            tf.keras.callbacks.ModelCheckpoint(
                f"/kaggle/working/models/{class_name}_model.keras",
                monitor='val_auc',
                save_best_only=True,
                mode='max'
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=2,
                min_lr=1e-6,
                verbose=1
            ),
            tf.keras.callbacks.CSVLogger(
                f"/kaggle/working/logs/{class_name}_history.csv"
            )
        ]

        print(f"Training samples: {len(list(train_ds))} batches")
        print(f"Validation samples: {len(list(val_ds))} batches")
        print(f"Using class weight - positive: {pos_weight:.2f}")

        # Train
        history = model.fit(
            train_ds.map(lambda img, filename, label: (img, label)),
            validation_data=val_ds.map(lambda img, filename, label: (img, label)),
            epochs=EPOCHS,
            callbacks=callbacks,
            verbose=1,
            class_weight={0: 1., 1: pos_weight}
        )

        # Find best threshold
        val_probs = model.predict(val_ds.map(lambda img, filename, label: (img, label)))
        val_labels = np.array([y.numpy() for _, y in val_ds.map(lambda img, filename, label: (img, label)).unbatch()])
        
        best_threshold = 0.5
        best_f1 = 0
        for threshold in np.linspace(0.1, 0.9, 17):
            current_f1 = f1_score(val_labels, val_probs > threshold)
            if current_f1 > best_f1:
                best_f1 = current_f1
                best_threshold = threshold
                
        results = {
            'best_f1': float(best_f1),
            'best_threshold': float(best_threshold),
            'train_samples': len(list(train_ds)) * BATCH_SIZE,
            'val_samples': len(val_labels),
            'val_pos_ratio': float(np.mean(val_labels))
        }
        
        print(f"Best threshold: {best_threshold:.3f} (F1: {best_f1:.3f})")
        
        # Generate GradCAM
        print("Generating GradCAM visualizations...")
        save_gradcam_images(model, val_ds, class_name, best_threshold)
        
        print(f"Completed at {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
        return results
        
    except Exception as e:
        print(f"Error training {class_name}: {str(e)}")
        print(f"Exception type: {type(e)}")
        print(f"Traceback:\n{traceback.format_exc()}")
        return None

In [7]:
def generate_gradcam(model, img_array):
    """Generate Grad-CAM heatmap with explicit tensor operations"""
    try:
        # Get the base model and conv layer
        base_model = model.base_model  # Using base_model instead of _base_model
        conv_layer = base_model.get_layer(model._gradcam_layer_name)
        
        # Create a model that goes from input to conv layer
        grad_model = tf.keras.Model(
            inputs=base_model.input,
            outputs=[
                conv_layer.output,
                base_model.output
            ]
        )
        
        # Record operations for gradient computation
        with tf.GradientTape() as tape:
            # Forward pass through base model to get conv outputs
            inputs = tf.cast(img_array, tf.float32)
            conv_output, base_output = grad_model(inputs, training=False)
            
            # Forward pass through remaining layers
            x = model.gap(base_output)
            x = model.drop(x, training=False)
            predictions = model.head(x)
            
            # Get the score for the target class
            score = predictions[:, 0]
        
        # Calculate gradients
        grads = tape.gradient(score, conv_output)
        
        # Global average pooling
        pooled_grads = tf.reduce_mean(grads, axis=(1, 2))
        
        # Weight the channels and create heatmap
        conv_output = conv_output[0]
        pooled_grads = pooled_grads[0]
        
        # Apply weights to create heatmap
        heatmap = tf.zeros_like(conv_output[:, :, 0])
        for i in range(pooled_grads.shape[-1]):
            heatmap += conv_output[:, :, i] * pooled_grads[i]
        
        # Post-process heatmap
        heatmap = tf.maximum(heatmap, 0) / (tf.reduce_max(heatmap) + tf.keras.backend.epsilon())
        
        return heatmap.numpy()
        
    except Exception as e:
        print(f"Error in generate_gradcam: {str(e)}")
        print(f"Conv layer name: {model._gradcam_layer_name}")
        print(f"Base model layers: {[layer.name for layer in model.base_model.layers]}")
        raise

In [8]:
def save_gradcam_images(model, dataset, class_name, threshold, num_images=5):
    """Save Grad-CAM visualizations with improved error handling"""
    try:
        os.makedirs("/kaggle/working/gradcam/", exist_ok=True)
        
        for batch in dataset.take(1):
            images, filenames, _ = batch
            
            for i in range(min(num_images, len(images))):
                try:
                    img = images[i]
                    filename = filenames[i]
                    
                    # Ensure input is in the correct format
                    img_array = tf.expand_dims(img, axis=0)
                    img_array = tf.cast(img_array, tf.float32)
                    
                    print(f"Processing image {i+1}/{min(num_images, len(images))}...")
                    print(f"Input shape: {img_array.shape}")
                    
                    # Generate heatmap
                    heatmap = generate_gradcam(model, img_array)
                    
                    if heatmap is None:
                        print(f"Warning: Heatmap generation failed for image {i+1}")
                        continue
                        
                    print(f"Heatmap shape: {heatmap.shape}")
                    
                    # Resize heatmap to match input image size
                    heatmap_resized = cv2.resize(heatmap, (IMAGE_SIZE[1], IMAGE_SIZE[0]))
                    heatmap_uint8 = np.uint8(255 * heatmap_resized)
                    heatmap_colored = cv2.applyColorMap(heatmap_uint8, cv2.COLORMAP_JET)
                    
                    # Process original image
                    orig_img = img.numpy()
                    orig_img = ((orig_img + 1) * 127.5).astype('uint8')
                    
                    # Create overlay
                    superimposed = cv2.addWeighted(orig_img, 0.6, heatmap_colored, 0.4, 0)
                    
                    # Save output
                    output_path = f"/kaggle/working/gradcam/{class_name}_{filename.numpy().decode('utf-8')}"
                    cv2.imwrite(output_path, superimposed)
                    print(f"Saved visualization to {output_path}")
                    
                except Exception as e:
                    print(f"Error processing image {i+1}: {str(e)}")
                    continue
                    
    except Exception as e:
        print(f"Error in save_gradcam_images: {str(e)}")
        raise

In [9]:
def evaluate_models():
    with open("/kaggle/working/thresholds.json", "r") as f:
        thresholds = json.load(f)
    
    # Prepare test data
    test_labels = mlb.transform(test_df['labels'])
    all_preds = np.zeros((len(test_df), len(classes)))
    all_probs = np.zeros((len(test_df), len(classes)))
    
    for class_idx, class_name in enumerate(classes):
        model_path = f"/kaggle/working/models/{class_name}_model.keras"
        if not os.path.exists(model_path):
            print(f"Skipping {class_name} - model not found")
            continue
            
        print(f"Evaluating {class_name}...")
        model = tf.keras.models.load_model(model_path)
        
        # Create test dataset
        try:
            test_ds = create_class_dataset(test_df, class_name, 'test')
        except ValueError as e:
            print(f"Skipping {class_name}: {str(e)}")
            continue
        
        # Predict
        probs = model.predict(test_ds.map(lambda img, filename, label: (img, label)))
        preds = (probs > thresholds[class_name]).astype(int)
        
        # Store results
        all_probs[:, class_idx] = probs.flatten()
        all_preds[:, class_idx] = preds.flatten()
        
        # Generate Grad-CAM for test samples
        save_gradcam_images(model, test_ds, class_name, thresholds[class_name])
    
    # Calculate metrics
    results = {
        'micro_f1': f1_score(test_labels, all_preds, average='micro'),
        'macro_f1': f1_score(test_labels, all_preds, average='macro'),
        'micro_auc': roc_auc_score(test_labels, all_probs, average='micro'),
        'macro_auc': roc_auc_score(test_labels, all_probs, average='macro')
    }
    
    print("\nTest Set Metrics:")
    for metric, value in results.items():
        print(f"{metric}: {value:.4f}")
    
    # Save predictions
    results_df = pd.DataFrame({
        'File_Name': test_df['filename'],
        **{f'prob_{class_name}': all_probs[:, i] for i, class_name in enumerate(classes)},
        **{f'pred_{class_name}': all_preds[:, i] for i, class_name in enumerate(classes)}
    })
    results_df.to_csv("/kaggle/working/predictions.csv", index=False)
    
    with open("/kaggle/working/test_metrics.json", "w") as f:
        json.dump(results, f)

In [13]:
import pandas as pd

# Get unique classes from training data
unique_classes = set()
for labels in train_df['labels'].values:
    if isinstance(labels, str):  # If labels are stored as strings
        unique_classes.update(labels.split('|'))
    elif isinstance(labels, list):  # If labels are stored as lists
        unique_classes.update(labels)

# Sort classes alphabetically
classes = sorted(list(unique_classes))

print("Classes found in training data:")
print("=" * 40)
for i, class_name in enumerate(classes, 1):
    print(f"{i:2d}. {class_name}")
print("=" * 40)
print(f"Total number of classes: {len(classes)}")

# Store for future use
with open("/kaggle/working/classes.json", "w") as f:
    json.dump(classes, f)

Classes found in training data:
 1. Atelectasis
 2. COVID-19
 3. Cardiomegaly
 4. Consolidation
 5. Edema
 6. Effusion
 7. Emphysema
 8. Fibrosis
 9. Hernia
10. Infiltration
11. Mass
12. Nodule
13. Pleural_Thickening
14. Pneumonia
15. Pneumothorax
Total number of classes: 15


In [14]:
# Train 1
results = train_single_class('Atelectasis')
if results:
    with open("/kaggle/working/Atelectasis_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Atelectasis ===
Started at 2025-06-03 15:25:26 UTC


I0000 00:00:1748964330.866195      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1748964330.866935      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Validation balance - Pos: 2342, Neg: 2342
Building model for Atelectasis...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Initializing model with dummy input...


I0000 00:00:1748964350.766907      31 cuda_dnn.cc:529] Loaded cuDNN version 90300


Training samples: 434 batches
Validation samples: 147 batches
Using class weight - positive: 8.73
Epoch 1/30


I0000 00:00:1748964491.211984     107 service.cc:148] XLA service 0x7c8b54001540 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748964491.212733     107 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1748964491.213054     107 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1748964580.504166     107 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m410s[0m 533ms/step - acc: 0.5488 - auc: 0.6014 - loss: 0.8547 - precision: 0.5315 - recall: 0.8817 - val_acc: 0.5111 - val_auc: 0.7218 - val_loss: 0.5181 - val_precision: 0.5056 - val_recall: 0.9983 - learning_rate: 2.0000e-04
Epoch 2/30
[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 303ms/step - acc: 0.5661 - auc: 0.6719 - loss: 0.5045 - precision: 0.5383 - recall: 0.9543 - val_acc: 0.5102 - val_auc: 0.7413 - val_loss: 0.4754 - val_precision: 0.5052 - val_recall: 0.9983 - learning_rate: 2.0000e-04
Epoch 3/30
[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 304ms/step - acc: 0.5831 - auc: 0.7291 - loss: 0.4403 - precision: 0.5482 - recall: 0.9659 - val_acc: 0.5152 - val_auc: 0.7442 - val_loss: 0.3771 - val_precision: 0.5077 - val_recall: 0.9991 - learning_rate: 2.0000e-04
Epoch 4/30
[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 300ms/step - acc: 0.6001 - auc: 0.7632

In [17]:
# Train 2
results = train_single_class('COVID-19')
if results:
    with open("/kaggle/working/COVID-19_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: COVID-19 ===
Started at 2025-06-03 15:57:28 UTC
Validation balance - Pos: 74, Neg: 74
Building model for COVID-19...
Initializing model with dummy input...
Training samples: 15 batches
Validation samples: 5 batches
Using class weight - positive: 298.96
Epoch 1/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 8s/step - acc: 0.7383 - auc: 0.8633 - loss: 46.1934 - precision: 0.7950 - recall: 0.6617 - val_acc: 0.5000 - val_auc: 0.5676 - val_loss: 3.6436 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0000e-04
Epoch 2/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 396ms/step - acc: 0.9348 - auc: 0.9991 - loss: 0.4922 - precision: 0.8890 - recall: 0.9994 - val_acc: 0.5000 - val_auc: 0.8446 - val_loss: 2.0857 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0000e-04
Epoch 3/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 388ms/step - acc: 0.9102 - auc: 0.9940 - lo

In [18]:
# Train 3
results = train_single_class('Cardiomegaly')
if results:
    with open("/kaggle/working/Cardiomegaly_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Cardiomegaly ===
Started at 2025-06-03 16:03:22 UTC
Validation balance - Pos: 504, Neg: 504
Building model for Cardiomegaly...
Initializing model with dummy input...
Training samples: 104 batches
Validation samples: 32 batches
Using class weight - positive: 39.90
Epoch 1/30
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 2s/step - acc: 0.5348 - auc: 0.5726 - loss: 3.7709 - precision: 0.5257 - recall: 0.8254 - val_acc: 0.5774 - val_auc: 0.6548 - val_loss: 0.3101 - val_precision: 0.6741 - val_recall: 0.2996 - learning_rate: 2.0000e-04
Epoch 2/30
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 318ms/step - acc: 0.5507 - auc: 0.6792 - loss: 1.0528 - precision: 0.5290 - recall: 0.9777 - val_acc: 0.6657 - val_auc: 0.7593 - val_loss: 0.2871 - val_precision: 0.6069 - val_recall: 0.9405 - learning_rate: 2.0000e-04
Epoch 3/30
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 309ms/step - acc: 0.5814 - auc: 0.7498 - 

In [19]:
# Train 4
results = train_single_class('Consolidation')
if results:
    with open("/kaggle/working/Consolidation_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Consolidation ===
Started at 2025-06-03 16:18:09 UTC
Validation balance - Pos: 945, Neg: 945
Building model for Consolidation...
Initializing model with dummy input...
Training samples: 176 batches
Validation samples: 60 batches
Using class weight - positive: 23.06
Epoch 1/30
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m316s[0m 835ms/step - acc: 0.5461 - auc: 0.5961 - loss: 1.5431 - precision: 0.5245 - recall: 0.8980 - val_acc: 0.5000 - val_auc: 0.6833 - val_loss: 1.0220 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 2/30
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 310ms/step - acc: 0.5164 - auc: 0.6445 - loss: 0.8529 - precision: 0.5058 - recall: 0.9769 - val_acc: 0.5005 - val_auc: 0.7494 - val_loss: 0.4874 - val_precision: 0.5003 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 3/30
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 297ms/step - acc: 0.5204 - auc: 0.68

In [20]:
# Train 5
results = train_single_class('Edema')
if results:
    with open("/kaggle/working/Edema_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Edema ===
Started at 2025-06-03 16:32:08 UTC
Validation balance - Pos: 464, Neg: 464
Building model for Edema...
Initializing model with dummy input...
Training samples: 87 batches
Validation samples: 29 batches
Using class weight - positive: 47.84
Epoch 1/30
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m285s[0m 1s/step - acc: 0.6078 - auc: 0.6968 - loss: 2.6045 - precision: 0.5686 - recall: 0.8793 - val_acc: 0.5334 - val_auc: 0.8080 - val_loss: 0.7257 - val_precision: 0.8298 - val_recall: 0.0841 - learning_rate: 2.0000e-04
Epoch 2/30
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 304ms/step - acc: 0.5642 - auc: 0.7474 - loss: 1.0739 - precision: 0.5334 - recall: 0.9876 - val_acc: 0.6261 - val_auc: 0.6592 - val_loss: 0.2284 - val_precision: 0.6077 - val_recall: 0.7112 - learning_rate: 2.0000e-04
Epoch 3/30
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 302ms/step - acc: 0.5573 - auc: 0.7479 - loss: 1.0163 - precis

In [10]:
# Train 6 
results = train_single_class('Effusion')
if results:
    with open("/kaggle/working/Effusion_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Effusion ===
Started at 2025-06-03 16:55:18 UTC


I0000 00:00:1748969722.346367    1898 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1748969722.347102    1898 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Validation balance - Pos: 2674, Neg: 2674
Building model for Effusion...
Initializing model with dummy input...


I0000 00:00:1748969739.604986    1898 cuda_dnn.cc:529] Loaded cuDNN version 90300


Training samples: 497 batches
Validation samples: 168 batches
Using class weight - positive: 7.49
Epoch 1/30


I0000 00:00:1748969886.315222    1935 service.cc:148] XLA service 0x7b73d40032e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748969886.316183    1935 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1748969886.316202    1935 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1748969975.537983    1935 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m437s[0m 522ms/step - acc: 0.5860 - auc: 0.6847 - loss: 0.6303 - precision: 0.5518 - recall: 0.9083 - val_acc: 0.6238 - val_auc: 0.8489 - val_loss: 0.2640 - val_precision: 0.5719 - val_recall: 0.9847 - learning_rate: 2.0000e-04
Epoch 2/30
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 303ms/step - acc: 0.6420 - auc: 0.7949 - loss: 0.3877 - precision: 0.5872 - recall: 0.9552 - val_acc: 0.5385 - val_auc: 0.8600 - val_loss: 0.2534 - val_precision: 0.5201 - val_recall: 0.9955 - learning_rate: 2.0000e-04
Epoch 3/30
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 299ms/step - acc: 0.6545 - auc: 0.8187 - loss: 0.3527 - precision: 0.5956 - recall: 0.9628 - val_acc: 0.6303 - val_auc: 0.8577 - val_loss: 0.2279 - val_precision: 0.5762 - val_recall: 0.9850 - learning_rate: 2.0000e-04
Epoch 4/30
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 300ms/step - acc: 0.6737 - auc: 0.8424

In [11]:
# Train 7
results = train_single_class('Emphysema')
if results:
    with open("/kaggle/working/Emphysema_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Emphysema ===
Started at 2025-06-03 17:29:54 UTC
Validation balance - Pos: 479, Neg: 479
Building model for Emphysema...
Initializing model with dummy input...
Training samples: 96 batches
Validation samples: 30 batches
Using class weight - positive: 43.17
Epoch 1/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 1s/step - acc: 0.5178 - auc: 0.5678 - loss: 3.2316 - precision: 0.5082 - recall: 0.8548 - val_acc: 0.5960 - val_auc: 0.7382 - val_loss: 0.3519 - val_precision: 0.8067 - val_recall: 0.2526 - learning_rate: 2.0000e-04
Epoch 2/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 319ms/step - acc: 0.5168 - auc: 0.6516 - loss: 1.2063 - precision: 0.5070 - recall: 0.9835 - val_acc: 0.5386 - val_auc: 0.7657 - val_loss: 0.4136 - val_precision: 0.5203 - val_recall: 0.9896 - learning_rate: 2.0000e-04
Epoch 3/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step - acc: 0.5175 - auc: 0.6655 - loss: 1.0584 -

In [12]:
# Train 8
results = train_single_class('Fibrosis')
if results:
    with open("/kaggle/working/Fibrosis_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Fibrosis ===
Started at 2025-06-03 17:50:08 UTC
Validation balance - Pos: 329, Neg: 329
Building model for Fibrosis...
Initializing model with dummy input...
Training samples: 64 batches
Validation samples: 21 batches
Using class weight - positive: 65.76
Epoch 1/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 2s/step - acc: 0.5265 - auc: 0.5611 - loss: 6.3215 - precision: 0.5290 - recall: 0.7780 - val_acc: 0.5000 - val_auc: 0.5977 - val_loss: 1.3622 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0000e-04
Epoch 2/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 327ms/step - acc: 0.5192 - auc: 0.6391 - loss: 1.2959 - precision: 0.5167 - recall: 0.9876 - val_acc: 0.6140 - val_auc: 0.6674 - val_loss: 0.2514 - val_precision: 0.5854 - val_recall: 0.7812 - learning_rate: 2.0000e-04
Epoch 3/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 325ms/step - acc: 0.5217 - auc: 0.6666 - loss: 1

In [13]:
#rain 9
results = train_single_class('Hernia')
if results:
    with open("/kaggle/working/Hernia_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Hernia ===
Started at 2025-06-03 18:01:10 UTC
Validation balance - Pos: 41, Neg: 41
Building model for Hernia...
Initializing model with dummy input...
Training samples: 9 batches
Validation samples: 3 batches
Using class weight - positive: 481.09
Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m278s[0m 13s/step - acc: 0.5955 - auc: 0.5763 - loss: 119.1388 - precision: 0.5612 - recall: 0.5336 - val_acc: 0.5000 - val_auc: 0.7136 - val_loss: 1.3770 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0000e-04
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 486ms/step - acc: 0.6591 - auc: 0.8061 - loss: 17.8224 - precision: 0.5909 - recall: 0.8808 - val_acc: 0.5000 - val_auc: 0.7406 - val_loss: 1.8141 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0000e-04
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step - acc: 0.6698 - auc: 0.8488 - loss: 4.95

In [10]:
# Train 10
results = train_single_class('Infiltration')
if results:
    with open("/kaggle/working/Infiltration_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Infiltration ===
Started at 2025-06-04 02:00:08 UTC


I0000 00:00:1749002413.122091      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1749002413.122873      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Validation balance - Pos: 4000, Neg: 4000
Building model for Infiltration...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Initializing model with dummy input...


I0000 00:00:1749002438.983988      31 cuda_dnn.cc:529] Loaded cuDNN version 90300


Training samples: 743 batches
Validation samples: 250 batches
Using class weight - positive: 4.68
Epoch 1/30


I0000 00:00:1749002604.863909      99 service.cc:148] XLA service 0x7bb64c0035b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1749002604.864767      99 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1749002604.864874      99 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1749002695.269365      99 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m743/743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m533s[0m 477ms/step - acc: 0.5170 - auc: 0.5378 - loss: 0.7142 - precision: 0.5127 - recall: 0.8113 - val_acc: 0.5000 - val_auc: 0.5483 - val_loss: 4.1339 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 2/30
[1m743/743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 339ms/step - acc: 0.5176 - auc: 0.5911 - loss: 0.4220 - precision: 0.5108 - recall: 0.9270 - val_acc: 0.5394 - val_auc: 0.6838 - val_loss: 0.1969 - val_precision: 0.5208 - val_recall: 0.9852 - learning_rate: 2.0000e-04
Epoch 3/30
[1m743/743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 339ms/step - acc: 0.5185 - auc: 0.6100 - loss: 0.3890 - precision: 0.5111 - recall: 0.9498 - val_acc: 0.5185 - val_auc: 0.6535 - val_loss: 0.4374 - val_precision: 0.5095 - val_recall: 0.9935 - learning_rate: 2.0000e-04
Epoch 4/30
[1m743/743[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311ms/step - acc: 0.5214 - auc: 0.6295 -

In [11]:
# Train 11
results = train_single_class('Mass')
if results:
    with open("/kaggle/working/Mass_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Mass ===
Started at 2025-06-04 02:28:58 UTC
Validation balance - Pos: 1167, Neg: 1167
Building model for Mass...
Initializing model with dummy input...
Training samples: 218 batches
Validation samples: 73 batches
Using class weight - positive: 18.36
Epoch 1/30
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m350s[0m 849ms/step - acc: 0.5080 - auc: 0.5346 - loss: 1.3595 - precision: 0.5065 - recall: 0.9164 - val_acc: 0.5000 - val_auc: 0.5880 - val_loss: 0.9352 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 2/30
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 347ms/step - acc: 0.5124 - auc: 0.5703 - loss: 0.7949 - precision: 0.5080 - recall: 0.9786 - val_acc: 0.5000 - val_auc: 0.7043 - val_loss: 0.4934 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 3/30
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 343ms/step - acc: 0.5234 - auc: 0.6550 - loss: 0.663

In [12]:
# Train 12
results = train_single_class('Nodule')
if results:
    with open("/kaggle/working/Nodule_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Nodule ===
Started at 2025-06-04 02:59:38 UTC
Validation balance - Pos: 1296, Neg: 1296
Building model for Nodule...
Initializing model with dummy input...
Training samples: 238 batches
Validation samples: 81 batches
Using class weight - positive: 16.72
Epoch 1/30
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 395ms/step - acc: 0.5071 - auc: 0.5303 - loss: 1.5164 - precision: 0.5103 - recall: 0.8560 - val_acc: 0.5000 - val_auc: 0.5756 - val_loss: 0.6341 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 2/30
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 347ms/step - acc: 0.5111 - auc: 0.5694 - loss: 0.7569 - precision: 0.5089 - recall: 0.9748 - val_acc: 0.5000 - val_auc: 0.6377 - val_loss: 0.5011 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 3/30
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 346ms/step - acc: 0.5091 - auc: 0.5879 - loss: 0

In [13]:
# Train 13
results = train_single_class('Pleural_Thickening')
if results:
    with open("/kaggle/working/Pleural_Thickening_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Pleural_Thickening ===
Started at 2025-06-04 03:20:01 UTC
Validation balance - Pos: 681, Neg: 681
Building model for Pleural_Thickening...
Initializing model with dummy input...
Training samples: 126 batches
Validation samples: 43 batches
Using class weight - positive: 32.53
Epoch 1/30
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m326s[0m 1s/step - acc: 0.5112 - auc: 0.5445 - loss: 1.6877 - precision: 0.5063 - recall: 0.9058 - val_acc: 0.5007 - val_auc: 0.5889 - val_loss: 0.4711 - val_precision: 0.5004 - val_recall: 0.9971 - learning_rate: 2.0000e-04
Epoch 2/30
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 367ms/step - acc: 0.5105 - auc: 0.6076 - loss: 0.9869 - precision: 0.5054 - recall: 0.9852 - val_acc: 0.5000 - val_auc: 0.6211 - val_loss: 1.1078 - val_precision: 0.5000 - val_recall: 1.0000 - learning_rate: 2.0000e-04
Epoch 3/30
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step - acc: 0.5108 - auc

In [14]:
# Train 14
results = train_single_class('Pneumonia')
if results:
    with open("/kaggle/working/Pneumonia_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Pneumonia ===
Started at 2025-06-04 03:32:50 UTC
Validation balance - Pos: 285, Neg: 285
Building model for Pneumonia...
Initializing model with dummy input...
Training samples: 54 batches
Validation samples: 18 batches
Using class weight - positive: 78.03
Epoch 1/30
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 2s/step - acc: 0.5051 - auc: 0.5513 - loss: 3.6161 - precision: 0.4945 - recall: 0.9060 - val_acc: 0.5000 - val_auc: 0.6144 - val_loss: 0.9370 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0000e-04
Epoch 2/30
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 379ms/step - acc: 0.4903 - auc: 0.5655 - loss: 1.5228 - precision: 0.4890 - recall: 0.9893 - val_acc: 0.5333 - val_auc: 0.6360 - val_loss: 0.4240 - val_precision: 0.5176 - val_recall: 0.9825 - learning_rate: 2.0000e-04
Epoch 3/30
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 385ms/step - acc: 0.4938 - auc: 0.6104 - loss:

In [None]:
# Train 15
results = train_single_class('Pneumothorax')
if results:
    with open("/kaggle/working/Pneumothorax_metrics.json", "w") as f:
        json.dump(results, f)


=== Training model for: Pneumothorax ===
Started at 2025-06-04 03:39:43 UTC
Validation balance - Pos: 1003, Neg: 1003
Building model for Pneumothorax...
Initializing model with dummy input...
Training samples: 203 batches
Validation samples: 63 batches
Using class weight - positive: 19.86
Epoch 1/30
