In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.metrics import roc_auc_score

import pandas as pd
from PIL import Image
from torchvision import models
from torch.utils.data import Dataset
import mlflow
from prometheus_client import start_http_server, Summary


In [2]:
# -----------------------
# Config
# -----------------------
BATCH_SIZE = 4
EPOCHS = 5
LEARNING_RATE = 1e-4
NUM_CLASSES = 14
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRAIN_CSV = "sample_train.csv"
TEST_CSV = "sample_test.csv"
IMG_ROOT = "."  # Current dir includes CheXpert-v1.0/

LABELS = [
    "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity", "Lung Lesion",
    "Edema", "Consolidation", "Pneumonia", "Atelectasis", "Pneumothorax",
    "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices", "No Finding"
]

In [3]:
# -----------------------
# Dataset
# -----------------------
class CheXpertDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.df[LABELS] = self.df[LABELS].fillna(0).replace(-1, 0)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.root_dir, row["Path"])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        labels = torch.tensor(row[LABELS].values.astype("float32"))
        return image, labels


In [4]:
# -----------------------
# Model
# -----------------------
class CheXpertModel(nn.Module):
    def __init__(self, num_classes=14):
        super().__init__()
        base = models.densenet121(pretrained=True)
        in_features = base.classifier.in_features
        base.classifier = nn.Linear(in_features, num_classes)
        self.model = base

    def forward(self, x):
        return self.model(x)

In [5]:
# -----------------------
# AUC Evaluation
# -----------------------
# def compute_auc(y_true, y_pred):
#     try:
#         aucs = [roc_auc_score(y_true[:, i], y_pred[:, i]) for i in range(y_true.shape[1])]
#         return sum(aucs) / len(aucs)
#     except Exception:
#         return None

In [6]:
# -----------------------
# Transforms
# -----------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [7]:
# -----------------------
# Load Data
# -----------------------
train_ds = CheXpertDataset(TRAIN_CSV, IMG_ROOT, transform)
test_ds = CheXpertDataset(TEST_CSV, IMG_ROOT, transform)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

In [8]:
# 🔹 MLflow Experiment Setup
mlflow.set_experiment("model_training_experiment")

# 🔹 Prometheus Metric Setup
training_time = Summary('training_duration_seconds', 'Time spent training model')
start_http_server(8001)  # Prometheus will scrape metrics from http://localhost:8001/metrics


2025/04/30 15:35:23 INFO mlflow.tracking.fluent: Experiment with name 'model_training_experiment' does not exist. Creating a new experiment.


(<prometheus_client.exposition.start_wsgi_server.<locals>.TmpServer at 0x19d18141650>,
 <Thread(Thread-3 (serve_forever), started daemon 25964)>)

In [19]:
# -----------------------
# Train
# -----------------------
model = CheXpertModel(NUM_CLASSES).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.BCEWithLogitsLoss()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)

best_model_path = "best_model.pth"


@training_time.time()
def train_model():
    best_loss = float('inf')  # now inside the function
    best_model_path = "best_model.pth"

    with mlflow.start_run():
        mlflow.log_param("epochs", EPOCHS)
        mlflow.log_param("batch_size", BATCH_SIZE)

        for epoch in range(EPOCHS):
            model.train()
            total_loss = 0

            for imgs, labels in train_loader:
                imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
                optimizer.zero_grad()
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            avg_loss = total_loss / len(train_loader)
            print(f"[Epoch {epoch+1}] Train Loss: {avg_loss:.4f}")
            scheduler.step(avg_loss)

            if avg_loss < best_loss:
                best_loss = avg_loss
                torch.save(model.state_dict(), best_model_path)
                print(f"✅ Saved new best model at epoch {epoch+1} (Loss: {avg_loss:.4f})")

        # Make sure final_accuracy is defined or remove this if you don't calculate it
        mlflow.pytorch.log_model(model, "model")

train_model()


[Epoch 1] Train Loss: 0.6641
✅ Saved new best model at epoch 1 (Loss: 0.6641)
[Epoch 2] Train Loss: 0.5336
✅ Saved new best model at epoch 2 (Loss: 0.5336)
[Epoch 3] Train Loss: 0.4333
✅ Saved new best model at epoch 3 (Loss: 0.4333)
[Epoch 4] Train Loss: 0.3676
✅ Saved new best model at epoch 4 (Loss: 0.3676)




[Epoch 5] Train Loss: 0.3175
✅ Saved new best model at epoch 5 (Loss: 0.3175)




In [16]:
# -----------------------
# Evaluation
# -----------------------
model.load_state_dict(torch.load(best_model_path))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(DEVICE)
        outputs = torch.sigmoid(model(imgs)).cpu()
        all_preds.append(outputs)
        all_labels.append(labels)

all_preds = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

# Convert probabilities to binary predictions (threshold = 0.5)
preds_binary = (all_preds >= 0.5).astype(int)

# Compute accuracy (macro across all labels)
correct = (preds_binary == all_labels).sum()
total = preds_binary.size
accuracy = correct / total

print(f"\n✅ Test Accuracy (avg over all labels): {accuracy:.4f}")
mlflow.log_metric("test_accuracy", accuracy)


# Show ground truth vs prediction for a few samples
print("\n📊 Ground Truth vs Predictions (first 3 images):")
for i in range(min(6, len(all_preds))):
    print(f"\nImage {i+1}:")
    print("Labels (GT):     ", all_labels[i].astype(int).tolist())
    print("Predictions (bin):", (all_preds[i] >= 0.5).astype(int).tolist())


  model.load_state_dict(torch.load(best_model_path))



✅ Test Accuracy (avg over all labels): 0.8143

📊 Ground Truth vs Predictions (first 3 images):

Image 1:
Labels (GT):      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
Predictions (bin): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Image 2:
Labels (GT):      [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
Predictions (bin): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Image 3:
Labels (GT):      [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
Predictions (bin): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Image 4:
Labels (GT):      [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
Predictions (bin): [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]

Image 5:
Labels (GT):      [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
Predictions (bin): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]


In [18]:
mlflow.end_run()
