In [1]:
import h5py
import torch
import random
import numpy as np
import pandas as pd
import torchmetrics
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
import torchvision

In [2]:
# Constants
TRAIN_IMAGES_PATH = 'train.h5'
VAL_IMAGES_PATH = 'val.h5'
TEST_IMAGES_PATH = 'test.h5'
SEED = 0
BATCH_SIZE = 32
NUM_EPOCHS = 50
PATIENCE = 10
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-4

In [3]:
# Set seeds for reproducibility
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

# Data Preprocessing

We defin here the preprocessing applied to the data. The most important is the normalization to avoid center different distributions

In [None]:
train_transform = transforms.Compose([
    transforms.ConvertImageDtype(torch.float32),
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.ConvertImageDtype(torch.float32),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Hematodataset

A dataset that allows us to speed up the loading of the keys by saving them

In [12]:
import os
import pickle as pkl

class HistoDataset(Dataset):
    def __init__(self, path, transform, mode='train'):
        self.path = path
        self.transform = transform
        self.mode = mode
        # Open file just to get the keys, then close
        # Define a cache file for the keys
        cache_path = f'{self.path}_keys.pkl'
        if os.path.exists(cache_path):
            with open(cache_path, 'rb') as f:
                self.ids = pkl.load(f)
        else:
            with h5py.File(path, 'r') as f:
                self.ids = list(f.keys())
            with open(cache_path, 'wb') as f:
                pkl.dump(self.ids, f)
        # File handle will be lazily initialized
        self.file = None

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        # Lazily open the file if not already done
        if self.file is None:
            self.file = h5py.File(self.path, 'r')
            
        img_id = self.ids[idx]
        # Load the image from the open file
        img = torch.tensor(self.file[img_id]['img'][...])
        img = self.transform(img)
        
        if self.mode == 'train':
            label = torch.tensor(self.file[img_id]['label'][...], dtype=torch.float32)
            return img, label
        return img, img_id

    def __del__(self):
        # Ensure the file is closed when the dataset is destroyed
        if self.file is not None:
            self.file.close()


In [13]:
# Create datasets and dataloaders
train_ds = HistoDataset(TRAIN_IMAGES_PATH, train_transform, 'train')
val_ds = HistoDataset(VAL_IMAGES_PATH, val_transform, 'train')
test_ds = HistoDataset(TEST_IMAGES_PATH, val_transform, 'test')

In [14]:
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

# 1st Experiment

In this first experiment we try finetunning a resnet50 with the last layer being a simple linear layer

In [None]:
model = torchvision.models.resnet50(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

num_features = model.fc.in_features
model.fc = torch.nn.Sequential(
    torch.nn.Dropout(0.5),
    torch.nn.Linear(num_features, 1)
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)



In [16]:
import os

pos_weight_save = "pos_weight.pkl"

In [17]:
import pickle as pkl

In [18]:
# Loss function with class balancing
if not os.path.exists(pos_weight_save):
    with h5py.File(TRAIN_IMAGES_PATH, 'r') as f:
        labels = [f[img_id]['label'][()] for img_id in f.keys()]
    pos_weight = (len(labels) - sum(labels)) / sum(labels)
    with open("pos_weight.pkl", "wb") as f:
        pkl.dump(pos_weight, f)
else: 
    with open("pos_weight.pkl", "rb") as f:
        pos_weight = pkl.load(f)
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))

In [12]:
# Optimizer and scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

In [None]:
# Training loop
best_val_loss = float('inf')
best_epoch = 0

for epoch in range(NUM_EPOCHS):
    # Training phase
    model.train()
    train_loss = 0.0
    progress = tqdm(train_loader, desc=f'Epoch {epoch+1} [Train]')
    for inputs, labels in progress:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        progress.set_postfix({'loss': loss.item()})
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        progress = tqdm(val_loader, desc=f'Epoch {epoch+1} [Val]')
        for inputs, labels in progress:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            
            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    
    # Calculate metrics
    train_loss /= len(train_ds)
    val_loss /= len(val_ds)
    val_acc = correct / total
    
    print(f'Epoch {epoch+1}')
    print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')
        
    # Update scheduler and check early stopping
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        torch.save(model.state_dict(), 'best_model.pth')
        print('New best model saved!')
    
    if epoch - best_epoch >= PATIENCE:
        print(f'Early stopping at epoch {epoch+1}')
        break

Epoch 1 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 1 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 1
Train Loss: 0.3300 | Val Loss: 0.3830 | Val Acc: 0.8285
New best model saved!


Epoch 2 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

In [19]:
# Test prediction
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
predictions = []
ids = []

with torch.no_grad():
    for inputs, img_ids in tqdm(test_loader, desc='Predicting'):
        inputs = inputs.to(device)
        outputs = model(inputs).squeeze()
        preds = (torch.sigmoid(outputs) > 0.5).cpu().numpy().astype(int)
        predictions.extend(preds)
        ids.extend(img_ids)

Predicting:   0%|          | 0/2658 [00:00<?, ?it/s]

In [20]:
# Create submission
submission = pd.DataFrame({'ID': [int(i) for i in ids], 'Pred': predictions})
submission.set_index('ID', inplace=True)
submission.to_csv('submission.csv')
print('Submission file created!')

Submission file created!


This submission file ended up lower than the baseline so we decided to push the finetunning further

# LoRA Finetunining

To make the Lora Finetuning, we replace the Conv2d layers with LaROConv2d layers and train only those layers (and the classsifier of course)

In [None]:
import torch
from torch import nn

class LoRAConv2d(nn.Module):
    def __init__(self, conv_layer, rank=4):
        super().__init__()
        self.conv = conv_layer
        self.rank = rank
        
        # Freeze original parameters
        for param in self.conv.parameters():
            param.requires_grad = False
            
        # Add LoRA parameters
        in_channels = self.conv.in_channels
        out_channels = self.conv.out_channels
        # kernel_size = self.conv.kernel_size
        
        # LoRA parameters (using 1x1 convolutions)
        self.lora_A = nn.Conv2d(
            in_channels, rank, 
            kernel_size=1, stride=1,
            padding=0, bias=False
        )
        self.lora_B = nn.Conv2d(
            rank, out_channels, 
            kernel_size=1, stride=1,
            padding=0, bias=False
        )
        
        # Initialize parameters
        nn.init.kaiming_uniform_(self.lora_A.weight, a=np.sqrt(5))
        nn.init.zeros_(self.lora_B.weight)

    def forward(self, x):
        orig = self.conv(x)
        lora = self.lora_B(self.lora_A(x))
        # Adjust spatial dimensions if needed
        if orig.shape[-2:] != lora.shape[-2:]:
            lora = nn.functional.interpolate(
                lora, size=orig.shape[-2:],
                mode='bilinear', align_corners=False
            )
        return orig + lora

In [43]:
def apply_lora(model, rank=4):
    # Apply LoRA to last 10 convolutional layers
    layers_modified = 0
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d):
            # Replace with LoRA-conv
            new_conv = LoRAConv2d(module, rank=rank)
            parent = model
            parts = name.split('.')
            for part in parts[:-1]:
                parent = getattr(parent, part)
            setattr(parent, parts[-1], new_conv)
            layers_modified += 1
            # if layers_modified >= 10:  # Limit number of modified layers
                # break
    print(f"Modified {layers_modified} layers")
    return model

In [30]:
LORA_RANK = 8

model = torchvision.models.resnet50(pretrained=True)
model = apply_lora(model, rank=LORA_RANK)

Modified 53 layers


In [31]:
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(num_features, 1)
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [32]:
# Loss function with class balancing
if not os.path.exists(pos_weight_save):
    with h5py.File(TRAIN_IMAGES_PATH, 'r') as f:
        labels = [f[img_id]['label'][()] for img_id in f.keys()]
    pos_weight = (len(labels) - sum(labels)) / sum(labels)
    with open("pos_weight.pkl", "wb") as f:
        pkl.dump(pos_weight, f)
else: 
    with open("pos_weight.pkl", "rb") as f:
        pos_weight = pkl.load(f)
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))

In [33]:
# Optimizer (only for trainable parameters)
trainable_params = []
for name, param in model.named_parameters():
    if 'lora_' in name or 'fc' in name:
        trainable_params.append(param)
        print(f"Training parameter: {name}")
        
optimizer = torch.optim.Adam(trainable_params, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)


Training parameter: conv1.lora_A.weight
Training parameter: conv1.lora_B.weight
Training parameter: layer1.0.conv1.lora_A.weight
Training parameter: layer1.0.conv1.lora_B.weight
Training parameter: layer1.0.conv2.lora_A.weight
Training parameter: layer1.0.conv2.lora_B.weight
Training parameter: layer1.0.conv3.lora_A.weight
Training parameter: layer1.0.conv3.lora_B.weight
Training parameter: layer1.0.downsample.0.lora_A.weight
Training parameter: layer1.0.downsample.0.lora_B.weight
Training parameter: layer1.1.conv1.lora_A.weight
Training parameter: layer1.1.conv1.lora_B.weight
Training parameter: layer1.1.conv2.lora_A.weight
Training parameter: layer1.1.conv2.lora_B.weight
Training parameter: layer1.1.conv3.lora_A.weight
Training parameter: layer1.1.conv3.lora_B.weight
Training parameter: layer1.2.conv1.lora_A.weight
Training parameter: layer1.2.conv1.lora_B.weight
Training parameter: layer1.2.conv2.lora_A.weight
Training parameter: layer1.2.conv2.lora_B.weight
Training parameter: laye

In [34]:
# Training loop
best_val_loss = float('inf')
best_epoch = 0

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    progress = tqdm(train_loader, desc=f'Epoch {epoch+1} [Train]')
    for inputs, labels in progress:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        progress.set_postfix({'loss': loss.item()})
    
    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        progress = tqdm(val_loader, desc=f'Epoch {epoch+1} [Val]')
        for inputs, labels in progress:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            
            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    
    # Metrics
    train_loss /= len(train_ds)
    val_loss /= len(val_ds)
    val_acc = correct / total
    
    print(f'Epoch {epoch+1}')
    print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')
    
    # Scheduler and early stopping
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        torch.save(model.state_dict(), 'best_lora_model.pth')
        print('New best model saved!')
    
    if epoch - best_epoch >= PATIENCE:
        print(f'Early stopping at epoch {epoch+1}')
        break

Epoch 1 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 1 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 1
Train Loss: 0.1441 | Val Loss: 0.1947 | Val Acc: 0.9261
New best model saved!


Epoch 2 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 2 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 2
Train Loss: 0.0836 | Val Loss: 0.1961 | Val Acc: 0.9278


Epoch 3 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 3 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 3
Train Loss: 0.0702 | Val Loss: 0.2173 | Val Acc: 0.9292


Epoch 4 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 4 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 4
Train Loss: 0.0634 | Val Loss: 0.2683 | Val Acc: 0.9098


Epoch 5 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 5 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 5
Train Loss: 0.0574 | Val Loss: 0.2252 | Val Acc: 0.9391


Epoch 6 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 6 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 6
Train Loss: 0.0547 | Val Loss: 0.2251 | Val Acc: 0.9295


Epoch 7 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 7 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 7
Train Loss: 0.0522 | Val Loss: 0.2178 | Val Acc: 0.9323


Epoch 8 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 8 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 8
Train Loss: 0.0456 | Val Loss: 0.2194 | Val Acc: 0.9311


Epoch 9 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 9 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 9
Train Loss: 0.0445 | Val Loss: 0.2506 | Val Acc: 0.9305


Epoch 10 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 10 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 10
Train Loss: 0.0451 | Val Loss: 0.2391 | Val Acc: 0.9281


Epoch 11 [Train]:   0%|          | 0/3125 [00:00<?, ?it/s]

Epoch 11 [Val]:   0%|          | 0/1091 [00:00<?, ?it/s]

Epoch 11
Train Loss: 0.0427 | Val Loss: 0.2380 | Val Acc: 0.9327
Early stopping at epoch 11


In [46]:
import torchvision
# Generate predictions
model = torchvision.models.resnet50(pretrained=True)
model = apply_lora(model, rank=8)
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(num_features, 1)
)
model.load_state_dict(torch.load('best_lora_model.pth'))
model.eval()
predictions = []
ids = []

Modified 53 layers


In [47]:
model.to(device)

ResNet(
  (conv1): LoRAConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (lora_A): Conv2d(3, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (lora_B): Conv2d(8, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): LoRAConv2d(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (lora_A): Conv2d(64, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (lora_B): Conv2d(8, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): LoRAConv2d(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bia

In [36]:
with torch.no_grad():
    for inputs, img_ids in tqdm(test_loader, desc='Predicting'):
        inputs = inputs.to(device)
        outputs = model(inputs).squeeze()
        preds = (torch.sigmoid(outputs) > 0.5).cpu().numpy().astype(int)
        predictions.extend(preds)
        ids.extend(img_ids)

# Create submission
submission = pd.DataFrame({'ID': [int(i) for i in ids], 'Pred': predictions})
submission.set_index('ID', inplace=True)
submission.to_csv('lora_submission.csv')
print('LoRA submission file created!')

Predicting:   0%|          | 0/2658 [00:00<?, ?it/s]

LoRA submission file created!


This submission file ended up at 0.94