In [1]:
import h5py
from transformers import CLIPProcessor, CLIPModel
import torch
from torch import nn
import tqdm 
import torch.utils.data as utils
import pandas as pd

In [2]:
path = '/kaggle/input/mva-dlmi-2025-histopathology-ood-classification/'
TRAIN_IMAGES_PATH = path + 'train.h5'
VAL_IMAGES_PATH = path + 'val.h5'
TEST_IMAGES_PATH = path + 'test.h5'

In [3]:
device = torch.device('cuda' if torch.cuda.is_available()
                      else 'cpu'
) #'mps' if torch.backends.mps.is_available() and torch.backends.mps.is_built() 
scaler = torch.amp.GradScaler('cuda')

In [4]:
'''
class CustomDataset(utils.Dataset):
    def __init__(self, file):
        self.pixel_values = []
        self.labels = [] if file != TEST_IMAGES_PATH else None
        self.processor = CLIPProcessor.from_pretrained("vinid/plip")

        with h5py.File(file, 'r') as dataset:
            for key in dataset.keys():
                image = dataset[key+'/img'][:]
                inputs = self.processor(
                    images=image,
                    return_tensors="pt",
                    do_rescale=False,
                    padding=True
                )
                self.pixel_values.append(inputs["pixel_values"].squeeze(0)) 
                
                if file != TEST_IMAGES_PATH:
                    label = dataset[key+'/label'][()]
                    self.labels.append(label)
        
    def __len__(self):
        return len(self.pixel_values) 
    
    def __getitem__(self, idx):
        if self.labels is not None: 
            return self.pixel_values[idx], torch.tensor(self.labels[idx], dtype=torch.float32)
        return self.pixel_values[idx] 
'''

'\nclass CustomDataset(utils.Dataset):\n    def __init__(self, file):\n        self.pixel_values = []\n        self.labels = [] if file != TEST_IMAGES_PATH else None\n        self.processor = CLIPProcessor.from_pretrained("vinid/plip")\n\n        with h5py.File(file, \'r\') as dataset:\n            for key in dataset.keys():\n                image = dataset[key+\'/img\'][:]\n                inputs = self.processor(\n                    images=image,\n                    return_tensors="pt",\n                    do_rescale=False,\n                    padding=True\n                )\n                self.pixel_values.append(inputs["pixel_values"].squeeze(0)) \n                \n                if file != TEST_IMAGES_PATH:\n                    label = dataset[key+\'/label\'][()]\n                    self.labels.append(label)\n        \n    def __len__(self):\n        return len(self.pixel_values) \n    \n    def __getitem__(self, idx):\n        if self.labels is not None: \n            re

In [5]:
class CustomDataset(utils.Dataset):
    def __init__(self, file):
        self.images = []
        self.labels = [] if file != TEST_IMAGES_PATH else None
        with h5py.File(file, 'r') as dataset:
            for key in dataset.keys():
                image = dataset[key+'/img'][:]
                self.images.append(image)
                if file != TEST_IMAGES_PATH:
                    label = dataset[key+'/label'][()]            
                    self.labels.append(label)
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        if self.labels is not None:
            return torch.from_numpy(self.images[idx]).to(torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)
        return torch.from_numpy(self.images[idx]).to(torch.float32)

In [6]:
def init_model(module):
    if isinstance(module, nn.Linear):
        nn.init.xavier_normal_(module.weight)
        if module.bias is not None:
            nn.init.zeros_(module.bias)

In [7]:
'''
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.device = device 
        
        self.vision_model = CLIPModel.from_pretrained("vinid/plip").vision_model
        
        for param in self.vision_model.parameters():
            param.requires_grad = False
        self.vision_model.to(self.device)

        self.classifier = nn.Sequential(
            nn.Linear(768, 384), 
            nn.ReLU(),
            nn.Linear(384, 192), 
            nn.ReLU(),
            nn.Linear(192, 48), 
            nn.ReLU(),
            nn.Linear(48, 1),
            nn.Sigmoid()
        )
        
        self.apply(init_model)
        self.to(self.device)
            
    def forward(self, pixel_values):
                
        with torch.no_grad():
            embedding = self.vision_model(pixel_values=pixel_values).last_hidden_state[:, 0, :]
        
        return self.classifier(embedding)
'''

'\nclass Model(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.device = device \n        \n        self.vision_model = CLIPModel.from_pretrained("vinid/plip").vision_model\n        \n        for param in self.vision_model.parameters():\n            param.requires_grad = False\n        self.vision_model.to(self.device)\n\n        self.classifier = nn.Sequential(\n            nn.Linear(768, 384), \n            nn.ReLU(),\n            nn.Linear(384, 192), \n            nn.ReLU(),\n            nn.Linear(192, 48), \n            nn.ReLU(),\n            nn.Linear(48, 1),\n            nn.Sigmoid()\n        )\n        \n        self.apply(init_model)\n        self.to(self.device)\n            \n    def forward(self, pixel_values):\n                \n        with torch.no_grad():\n            embedding = self.vision_model(pixel_values=pixel_values).last_hidden_state[:, 0, :]\n        \n        return self.classifier(embedding)\n'

In [15]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.device = device 
        
        self.processor = CLIPProcessor.from_pretrained("vinid/plip")
        self.vision_model = CLIPModel.from_pretrained("vinid/plip").vision_model
        
        for param in self.vision_model.parameters():
            param.requires_grad = False
        self.vision_model.to(self.device)

        self.classifier = nn.Sequential(
            nn.Linear(768, 384), 
            nn.ReLU(),
            nn.Linear(384, 192), 
            nn.ReLU(),
            nn.Linear(192, 48), 
            nn.ReLU(),
            nn.Linear(48, 1),
        )
        
        self.apply(init_model)
        self.to(self.device)
            
    def forward(self, image):
        inputs = self.processor(
            images=image, 
            return_tensors="pt", 
            padding=True, 
            do_rescale=False
        )
        
        pixel_values = inputs['pixel_values'].to(self.device, dtype=torch.float32)
        
        with torch.no_grad():
            embedding = self.vision_model(pixel_values=pixel_values).last_hidden_state[:, 0, :]
        
        return self.classifier(embedding)

In [9]:
train_batch_size, val_batch_size, test_batch_size = 1000, 680, 850

train_dataset = CustomDataset(TRAIN_IMAGES_PATH) 
val_dataset = CustomDataset(VAL_IMAGES_PATH)
test_dataset = CustomDataset(TEST_IMAGES_PATH)

train_loader = utils.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8, pin_memory=True)
val_loader = utils.DataLoader(val_dataset, batch_size=val_batch_size, shuffle=True, num_workers=6, pin_memory=True)
test_loader = utils.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=4, pin_memory=True)



In [17]:
model = Model()
model.to(device)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.classifier.parameters(), lr=1e-4, weight_decay=1e-4)

In [11]:
def calculate_accuracy(predictions, labels):
    binary_preds = (predictions >= 0.5).float()
    correct = (binary_preds == labels).float()
    accuracy = correct.mean() * 100
    return accuracy

In [12]:
def use(epochs):
    train_accs, val_accs = [], []
    
    for epoch in tqdm.tqdm(range(epochs), desc='Epochs'):
        model.train()
        total_acc = 0.0
        
        for images, labels in tqdm.tqdm(train_loader, desc=f'Train {epoch+1}'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()

            
            with torch.amp.autocast('cuda'):

                y_pred = model(images).squeeze()
                loss = loss_fn(y_pred, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            acc = calculate_accuracy(y_pred, labels)
            total_acc += acc

        train_acc = total_acc / (len(train_loader))
        train_accs.append(train_acc)


        model.eval()
        total_val_acc = 0.0
        
        with torch.no_grad():
            for images, labels in tqdm.tqdm(val_loader, desc=f'Val {epoch+1}'):
                images, labels = images.to(device), labels.to(device)
                
                with torch.amp.autocast('cuda'):
                    val_pred = model(images).squeeze()
                
                val_acc = calculate_accuracy(val_pred, labels)
                total_val_acc += val_acc

        val_acc = total_val_acc / len(val_loader)
        val_accs.append(val_acc)

        print(f"Epoch{epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")


    test_predictions = []
    
    with h5py.File(TEST_IMAGES_PATH, 'r') as f:
        idx = list(f.keys())
    
    model.eval()
    
    with torch.no_grad():
        for images in tqdm.tqdm(test_loader, desc='Testing'):
            images = images.to(device)
            
            with torch.amp.autocast('cuda'):
                test_pred = (model(images).squeeze() > 0.5).float()
            test_predictions.append(test_pred)
            
        test = list(torch.cat(test_predictions).cpu().numpy())

    return list(zip(idx, test)) 

In [18]:
result = use(1)

Epochs:   0%|          | 0/1 [00:00<?, ?it/s]
Train 1:   0%|          | 0/100 [00:00<?, ?it/s][A
Train 1:   1%|          | 1/100 [00:09<15:58,  9.68s/it][A
Train 1:   2%|▏         | 2/100 [00:14<11:21,  6.96s/it][A
Train 1:   3%|▎         | 3/100 [00:19<09:48,  6.07s/it][A
Train 1:   4%|▍         | 4/100 [00:24<08:57,  5.60s/it][A
Train 1:   5%|▌         | 5/100 [00:29<08:30,  5.37s/it][A
Train 1:   6%|▌         | 6/100 [00:34<08:10,  5.21s/it][A
Train 1:   7%|▋         | 7/100 [00:39<07:57,  5.13s/it][A
Train 1:   8%|▊         | 8/100 [00:44<07:46,  5.07s/it][A
Train 1:   9%|▉         | 9/100 [00:49<07:39,  5.05s/it][A
Train 1:  10%|█         | 10/100 [00:54<07:29,  5.00s/it][A
Train 1:  11%|█         | 11/100 [00:59<07:25,  5.01s/it][A
Train 1:  12%|█▏        | 12/100 [01:04<07:19,  4.99s/it][A
Train 1:  13%|█▎        | 13/100 [01:09<07:14,  5.00s/it][A
Train 1:  14%|█▍        | 14/100 [01:14<07:26,  5.19s/it][A
Train 1:  15%|█▌        | 15/100 [01:19<07:16,  5.14s/it]

Epoch1/1 - Train Acc: 83.9210, Val Acc: 81.1412


Testing: 100%|██████████| 101/101 [07:01<00:00,  4.18s/it]


In [25]:
df = pd.DataFrame(result, columns=['ID', 'Pred'])
df.to_csv('/kaggle/working/submission.csv', index=False)