# Purpose of Notebook 1


**This notebook has one and only one responsibility:**
* Convert image patches into H-optimus embeddings and save them to disk.


**What it's doing**


1. Load patch metadata (with fixed split)
2. Load H-optimus as a frozen encoder
3. Load patch images
4. Apply correct transforms
5. Extract embeddings
6. Save embeddings + metadata


**1. Paths**

In [2]:
import os

DATA_ROOT = "/kaggle/input/01-wsi-level-stratified-split"
CSV_PATH = os.path.join(DATA_ROOT, "patches_metadata_with_split.csv")

PATCH_DIR = "/kaggle/input/camelyon-prepro-v4-patches/patches"

OUTPUT_DIR = "/kaggle/working/hoptimus_embeddings"
os.makedirs(OUTPUT_DIR, exist_ok=True)


**2. Load Metadata(Read Only)**

In [3]:
import pandas as pd

df = pd.read_csv(CSV_PATH)

# Sanity check
assert set(df["split"].unique()) == {"train", "val", "test"}
df.head()

Unnamed: 0,patch_path,wsi_id,x,y,label,split
0,/kaggle/working/patches/normal_074_x54016_y698...,normal_074,54016,69888,0,train
1,/kaggle/working/patches/normal_074_x18688_y744...,normal_074,18688,74496,0,train
2,/kaggle/working/patches/normal_074_x48640_y757...,normal_074,48640,75776,0,train
3,/kaggle/working/patches/normal_074_x50688_y757...,normal_074,50688,75776,0,train
4,/kaggle/working/patches/normal_074_x50944_y757...,normal_074,50944,75776,0,train


**3. Define image transforms (CRITICAL)**


Our patches are 256×256.
H-optimus expects 224×224 with ImageNet normalization.

In [4]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])


**4. Dataset class (clean and minimal)**

Defined patch only dataset

In [5]:
from torch.utils.data import Dataset
from PIL import Image

class PatchDataset(Dataset):
    def __init__(self, df, patch_dir, transform):
        self.df = df.reset_index(drop=True)
        self.patch_dir = patch_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        patch_path = os.path.join(
            self.patch_dir,
            os.path.basename(row["patch_path"])
        )

        img = Image.open(patch_path).convert("RGB")
        img = self.transform(img)

        return {
            "image": img,
            "wsi_id": row["wsi_id"],
            "label": row["label"],
            "split": row["split"],
        }


**5. Load H-optimus (frozen encoder)**

Conceptually, this is what matters:


* model in eval() mode
* gradients disabled
* used only for inference


 will load the H-optimus backbone according to Bioptimus instructions.

In [6]:
# Loading Libraries
!pip install -q timm huggingface_hub

In [7]:
import os
import pandas as pd
import torch
import timm
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
from huggingface_hub import login




In [9]:
login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [10]:
from huggingface_hub import whoami
whoami()


{'type': 'user',
 'id': '695327acd4bc74211b07e094',
 'name': 'Hadia4',
 'fullname': 'Amjad',
 'isPro': False,
 'avatarUrl': '/avatars/32a1592e2e92a30e21bd9fbe3577171e.svg',
 'orgs': [],
 'auth': {'type': 'access_token',
  'accessToken': {'displayName': 'H-optimus',
   'role': 'fineGrained',
   'createdAt': '2025-12-30T01:48:51.872Z',
   'fineGrained': {'canReadGatedRepos': True,
    'global': [],
    'scoped': [{'entity': {'_id': '695327acd4bc74211b07e094',
       'type': 'user',
       'name': 'Hadia4'},
      'permissions': ['repo.content.read',
       'repo.write',
       'inference.serverless.write',
       'inference.endpoints.infer.write',
       'inference.endpoints.write',
       'user.webhooks.read',
       'user.webhooks.write',
       'collection.read',
       'collection.write']}]}}}}

In [11]:
DATA_ROOT = "/kaggle/input/01-wsi-level-stratified-split"
CSV_PATH = os.path.join(DATA_ROOT, "patches_metadata_with_split.csv")
PATCH_DIR = "/kaggle/input/camelyon-prepro-v4-patches/patches"

df = pd.read_csv(CSV_PATH)

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = timm.create_model(
    "hf-hub:bioptimus/H-optimus-0",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=False
)

model.to(device)
model.eval()

for p in model.parameters():
    p.requires_grad = False


config.json:   0%|          | 0.00/447 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

**Correct image preprocessing (DO NOT CHANGE)**

In [13]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.707223, 0.578729, 0.703617),
        std=(0.211883, 0.230117, 0.177517),
    ),
])


In [14]:
print(transform)


Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=(0.707223, 0.578729, 0.703617), std=(0.211883, 0.230117, 0.177517))
)


In [None]:
img = Image.open(img_path).convert("RGB")
x = transform(img).unsqueeze(0).to(device)
feat = model(x)
print(feat.shape)

**Patch → embedding loop (core of Notebook 1)**

In [18]:
embeddings = []
meta_rows = []

model.eval()

with torch.autocast(device_type="cuda", dtype=torch.float16):
    with torch.inference_mode():
        for _, row in tqdm(df.iterrows(), total=len(df)):
            img_path = os.path.join(
                PATCH_DIR, os.path.basename(row["patch_path"])
            )

            img = Image.open(img_path).convert("RGB")
            x = transform(img).unsqueeze(0).to(device)

            feat = model(x).squeeze(0).cpu()
            embeddings.append(feat)

            patch_id = os.path.splitext(os.path.basename(row["patch_path"]))[0]

            meta_rows.append({
                "patch_id": patch_id,
                "wsi_id": row["wsi_id"],
                "label": row["label"],
                "split": row["split"],
            })


100%|██████████| 16800/16800 [40:46<00:00,  6.87it/s]


In [19]:
import torch

embeddings = torch.stack(embeddings)

torch.save(
    {
        "embeddings": embeddings,
        "metadata": meta_rows
    },
    "/kaggle/working/hoptimus_patch_embeddings.pt"
)


**Sanity Checks**

In [21]:
print(embeddings.shape)


torch.Size([16800, 1536])


In [23]:
import torch

# Make sure all items are tensors
embeddings = [torch.as_tensor(e) if not isinstance(e, torch.Tensor) else e for e in embeddings]

# Stack into a single tensor
embeddings_tensor = torch.vstack(embeddings)
print("Embeddings shape:", embeddings_tensor.shape)  # should be [16800, 1536]

# 1. Check dtype and device
print("Dtype:", embeddings_tensor.dtype)
print("Device:", embeddings_tensor.device)

# 2. Quick stats to see if embeddings look reasonable
print("Mean:", embeddings_tensor.mean().item())
print("Std:", embeddings_tensor.std().item())
print("Min:", embeddings_tensor.min().item())
print("Max:", embeddings_tensor.max().item())

# 3. Check for any NaNs or Infs
print("Any NaNs:", torch.isnan(embeddings_tensor).any().item())
print("Any Infs:", torch.isinf(embeddings_tensor).any().item())


Embeddings shape: torch.Size([16800, 1536])
Dtype: torch.float32
Device: cpu
Mean: -0.0005382670206017792
Std: 0.7780240774154663
Min: -4.873270034790039
Max: 5.492297649383545
Any NaNs: False
Any Infs: False


**Inspecting Checkpoints**

In [28]:
import torch

# Load the checkpoint
checkpoint_path = "/kaggle/working/hoptimus_patch_embeddings.pt"
checkpoint = torch.load(checkpoint_path, map_location="cpu")  # use CPU to avoid GPU issues

# See the keys in the checkpoint
print("Keys in checkpoint:", checkpoint.keys())

# Check the embeddings tensor
embeddings = checkpoint["embeddings"]
print("Embeddings shape:", embeddings.shape)
print("Embeddings dtype:", embeddings.dtype)

# Check the metadata
meta_rows = checkpoint["metadata"]
print("Number of metadata entries:", len(meta_rows))
print("First 3 metadata entries:", meta_rows[:3])


Keys in checkpoint: dict_keys(['embeddings', 'metadata'])
Embeddings shape: torch.Size([16800, 1536])
Embeddings dtype: torch.float32
Number of metadata entries: 16800
First 3 metadata entries: [{'patch_id': 'normal_074_x54016_y69888', 'wsi_id': 'normal_074', 'label': 0, 'split': 'train'}, {'patch_id': 'normal_074_x18688_y74496', 'wsi_id': 'normal_074', 'label': 0, 'split': 'train'}, {'patch_id': 'normal_074_x48640_y75776', 'wsi_id': 'normal_074', 'label': 0, 'split': 'train'}]
