In [1]:
import os
import gc
import torch
from PIL import Image
from transformers import AutoModel
from torchvision.transforms import v2
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torch.optim import AdamW
from torch import nn

# Creating Datasets/Dataloaders

In [2]:
# Making datasets
def loader(path):
    img = Image.open(path)
    return img

transform = v2.Compose(
    [
        v2.Resize(224),
        v2.CenterCrop(224),
        v2.ToImage(), 
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ]
)
    
dataset = ImageFolder(
    'images/40X/', 
    loader=loader,
    transform=transform
)

# Split data into train/val/test
num_imgs = len(dataset.samples)
train_size = int(num_imgs * 0.7)
val_size = int(num_imgs * 0.15)
test_size = num_imgs - train_size - val_size

generator = torch.Generator().manual_seed(42)

train_dataset, val_dataset, test_dataset = random_split(
    dataset=dataset,
    lengths=[train_size, val_size, test_size],
    generator=generator
)

# Make dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=4)
val_dataloader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=4)

# Load Model and Create Classification Head

In [3]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

# Use GPU if avail
# Load model
with torch.no_grad():
    backbone = AutoModel.from_pretrained('kaiko-ai/midnight')

In [4]:
class PathBinaryClassifier(nn.Module):
    def __init__(self, backbone, hidden=512, dropout=0.2):
        super().__init__()

        self.backbone = backbone
        self.dim = backbone.config.hidden_size # 1536 for Midnight Model
        self.dropout = dropout

        # Classifier block
        self.classifier = nn.Sequential(
            nn.Linear(self.dim, hidden),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden, 1),
            nn.Sigmoid()
        )

    def forward(self, X):
        out = self.backbone(X)['pooler_output']
        logit = self.classifier(out)
        return logit

In [5]:
# Specify components needed for training

# NEED GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Instantiate model
model = PathBinaryClassifier(backbone).to(device)

# Specify opt and loss function
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = nn.BCELoss()

In [6]:
X, y = None, None

for X_batch, y_batch in train_dataloader:
    X, y, = X_batch.to(device), y_batch.to(device)
    break

In [7]:
output = model(X).squeeze()


In [8]:
output.dtype, y.float().dtype

(torch.float32, torch.float32)

In [9]:
optimizer.zero_grad()

loss = criterion(output, y.float())

In [10]:
loss.backward()

In [11]:
optimizer.step()

OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 14.56 GiB of which 33.81 MiB is free. Including non-PyTorch memory, this process has 14.53 GiB memory in use. Of the allocated memory 13.70 GiB is allocated by PyTorch, and 707.19 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)