In [None]:
import os
import glob
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, classification_report
from transformers import CLIPProcessor, CLIPModel#150M params

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Dataset/Datasetp.csv")
valid_df = pd.read_csv("/content/drive/MyDrive/Dataset/valid.csv")

In [None]:
print(df.shape)
print(valid_df.shape)

In [None]:
train_base_path = "/content/drive/MyDrive/Dataset/train"
valid_base_path = "/content/drive/MyDrive/Dataset/valid"

In [None]:
df['Path'] = df['Path'].apply(lambda x: x.replace("CheXpert-v1.0-small/train", train_base_path))
valid_df['Path'] = valid_df['Path'].apply(lambda x: x.replace("CheXpert-v1.0-small/valid", valid_base_path))

In [None]:
print(df['Path'].apply(os.path.exists).value_counts())
print(valid_df['Path'].apply(os.path.exists).value_counts())

In [None]:
sample_images = glob.glob("/content/drive/MyDrive/Dataset/train/**/*.jpg", recursive=True)
print("Total sample images found:", len(sample_images))
print(sample_images[:5])

In [None]:
train_df = df[df['Path'].apply(os.path.exists)].reset_index(drop=True)
valid_df = valid_df[valid_df['Path'].apply(os.path.exists)].reset_index(drop=True)

print("Train set after filtering:", train_df.shape)
print("Valid set after filtering:", valid_df.shape)

In [None]:
drop_cols = ['Sex', 'Age', 'Frontal/Lateral', 'AP/PA',
             'Fracture', 'Enlarged Cardiomediastinum',
             'Lung Lesion', 'Pleural Other', 'Support Devices']

In [None]:
train_df = train_df.drop(columns=drop_cols)
valid_df = valid_df.drop(columns=drop_cols)

In [None]:
train_df = train_df.fillna(0).replace(-1, 0)
valid_df = valid_df.fillna(0).replace(-1, 0)

In [None]:
print("Missing values (train):")
print(train_df.isna().sum())

In [None]:
label_cols = [
    'No Finding', 'Cardiomegaly', 'Lung Opacity',
    'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',
    'Pneumothorax', 'Pleural Effusion'
]

In [None]:
train_df[label_cols] = train_df[label_cols].astype(int)

In [None]:
train_df

In [None]:
report_templates = {
    "No Finding": "No significant abnormality detected.",
    "Cardiomegaly": "Heart appears enlarged.",
    "Lung Opacity": "Opacity observed in the lung region.",
    "Edema": "Fluid accumulation in lungs indicating edema.",
    "Consolidation": "Lung consolidation visible.",
    "Pneumonia": "Opacity in lower lobe suggesting pneumonia.",
    "Atelectasis": "Signs of partial lung collapse (atelectasis).",
    "Pneumothorax": "Air trapped in pleural space suggesting pneumothorax.",
    "Pleural Effusion": "Pleural effusion observed."
}


In [None]:
def generate_report(row):
    report = [text for col, text in report_templates.items() if row[col] == 1]
    if not report:
        report.append("No abnormal findings observed.")
    return " ".join(report)

train_df['Report'] = train_df.apply(generate_report, axis=1)
valid_df['Report'] = valid_df.apply(generate_report, axis=1)

In [None]:
train_df

In [None]:
train_df.to_csv("/content/drive/MyDrive/Dataset/Chexpert_train_reports.csv", index=False)
valid_df.to_csv("/content/drive/MyDrive/Dataset/Chexpert_valid_reports.csv", index=False)
print("Files saved successfully!")

In [None]:
model_id = "openai/clip-vit-base-patch32"
device = "cuda" if torch.cuda.is_available() else "cpu"

processor = CLIPProcessor.from_pretrained(model_id)#tensor
clip = CLIPModel.from_pretrained(model_id).to(device)#embeddings
clip.eval()

In [None]:
class CheXpertCLIPDataset(Dataset):
    def __init__(self, df, label_cols):
        self.df = df.reset_index(drop=True)
        self.label_cols = label_cols

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row['Path']
        report = str(row['Report'])
        labels = torch.tensor(row[self.label_cols].values.astype(np.float32), dtype=torch.float32)
        return img_path, report, labels

In [None]:
def collate_batch(batch):
    img_paths, texts, labels = zip(*batch)#list of tuple
    images = [Image.open(p).convert("RGB") for p in img_paths]
    inputs = processor(text=list(texts), images=images, return_tensors="pt", padding=True)
    labels = torch.stack(labels)
    return inputs, labels

In [None]:
train_dataset = CheXpertCLIPDataset(train_df, label_cols)
valid_dataset = CheXpertCLIPDataset(valid_df, label_cols)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_batch, num_workers=2, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False, collate_fn=collate_batch, num_workers=2, pin_memory=True)

In [None]:
for p in clip.parameters():
    p.requires_grad = False

In [None]:
img_dim, txt_dim = 512, 512
fusion_dim = img_dim + txt_dim

In [None]:
classifier = nn.Sequential(
    nn.Linear(fusion_dim, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, len(label_cols))
).to(device)

In [None]:
pos_weights = []
for col in label_cols:
    n_pos = train_df[col].sum()
    n_neg = len(train_df) - n_pos
    w1 = (n_neg / n_pos) if n_pos > 0 else 0.0
    pos_weights.append(w1)

pos_weight = torch.tensor(pos_weights, dtype=torch.float32).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.Adam(classifier.parameters(), lr=1e-4)

In [None]:
num_epochs = 6
train_losses, val_losses = [], []

for epoch in range(num_epochs):
    classifier.train()
    running_loss = 0.0 #So it is a temporary counter for calculate loss in every epoch.
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} train"):#8 per trip 509 per epoch, delivery truck, label on bar, unpack item
        inputs = {k: v.to(device) for k, v in inputs.items()}
        labels = labels.to(device)

        with torch.no_grad():
            outs = clip(**inputs)
            img_emb, txt_emb = outs.image_embeds, outs.text_embeds

        fused = torch.cat([img_emb, txt_emb], dim=1)
        logits = classifier(fused)
        loss = criterion(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # ---- Validation ----
    classifier.eval() #test mode, disables droupout,Stable predictions
    val_loss = 0.0
    with torch.no_grad():#Skip gradient computation,Saves memory,Faster
        for inputs, labels in valid_loader:
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            outs = clip(**inputs)
            fused = torch.cat([outs.image_embeds, outs.text_embeds], dim=1)
            logits = classifier(fused) #[8, 9] raw scores                  #[8, 1024]
            val_loss += criterion(logits, labels).item()

    avg_val_loss = val_loss / len(valid_loader)
    val_losses.append(avg_val_loss)
    print(f"Epoch {epoch+1}: Train Loss={avg_train_loss:.4f}, Val Loss={avg_val_loss:.4f}")#Epoch 1: Train Loss=0.8234, Val Loss=0.7891


In [None]:
plt.figure(figsize=(7,5))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()
plt.show()

In [None]:
torch.save(classifier.state_dict(), "/content/clip_classifier_head.pt")
print("Saved classifier head to /content/clip_classifier_head.pt")

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import torch

def show_actual_vs_predicted(df, clip_model, classifier, processor, device, label_names, num_samples=2):
    samples = df.sample(num_samples).reset_index(drop=True)
    plt.figure(figsize=(10, 5))

    for i, row in samples.iterrows():
        img_path = row['Path']
        report_text = row['Report']
        actual_labels = [label for label in label_names if row[label] == 1]

        # Preprocess
        image = Image.open(img_path).convert("RGB")
        inputs = processor(text=[report_text], images=[image], return_tensors="pt", padding=True).to(device)

        # Predict
        with torch.no_grad():
            outputs = clip_model(**inputs)
            fused = torch.cat([outputs.image_embeds, outputs.text_embeds], dim=1)
            logits = classifier(fused)
            probs = torch.sigmoid(logits).cpu().numpy()[0]

        preds_binary = (probs >= 0.5).astype(int)
        predicted_labels = [label_names[j] for j, v in enumerate(preds_binary) if v == 1]
        if not predicted_labels:
            predicted_labels = ["No Finding"]

        # Plot each image
        plt.subplot(1, 2, i + 1)
        plt.imshow(image)
        plt.axis("off")
        plt.title(
            f"Actual: {', '.join(actual_labels) if actual_labels else 'No Finding'}\n"
            f"Pred: {', '.join(predicted_labels)}",
            fontsize=7
        )

    plt.tight_layout()
    plt.show()


In [None]:
label_names = [
    "No Finding", "Cardiomegaly", "Lung Opacity",
    "Edema", "Consolidation", "Pneumonia",
    "Atelectasis", "Pneumothorax", "Pleural Effusion"
]

show_actual_vs_predicted(valid_df, clip, classifier, processor, device, label_names, num_samples=2)

