# 1. Data Loading and Visualization

In [21]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from tqdm import tqdm
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/el-hackathon-2025/elucidata_ai_challenge_data.h5


In [22]:
h5_path = '/kaggle/input/el-hackathon-2025/elucidata_ai_challenge_data.h5'
h5_file = h5py.File(h5_path, 'r')

# Load image and spot handles
train_images = h5_file['images']['Train']
train_spots = h5_file['spots']['Train']

test_image = h5_file['images']['Test']['S_7'][()]
test_spots_df = pd.DataFrame(h5_file['spots']['Test']['S_7'][:])



# 2. Extract Patches and Labels

In [23]:
def extract_patch(image, x, y, patch_size=224):
    half = patch_size // 2
    x_min = max(x - half, 0)
    x_max = min(x + half, image.shape[1])
    y_min = max(y - half, 0)
    y_max = min(y + half, image.shape[0])

    patch = image[y_min:y_max, x_min:x_max]

    # Pad if patch is smaller than expected
    if patch.shape[0] < patch_size or patch.shape[1] < patch_size:
        pad_y = patch_size - patch.shape[0]
        pad_x = patch_size - patch.shape[1]
        patch = np.pad(patch, ((0, pad_y), (0, pad_x), (0, 0)), mode='constant')
    
    return patch

In [24]:
train_patches = []
train_labels = []

for i in range(1, 7): 
    slide_id = f'S_{i}'
    image = train_images[slide_id][()]
    spots_df = pd.DataFrame(train_spots[slide_id][:])

    for _, row in tqdm(spots_df.iterrows(), total=len(spots_df)):
        x, y = int(row['x']), int(row['y'])
        label = [row[f'C{j}'] for j in range(1, 36)]
        patch = extract_patch(image, x, y)
        train_patches.append(patch)
        train_labels.append(label)

train_patches = np.array(train_patches)
train_labels = np.array(train_labels)


100%|██████████| 2197/2197 [00:00<00:00, 6241.38it/s]
100%|██████████| 2270/2270 [00:00<00:00, 6102.85it/s]
100%|██████████| 690/690 [00:00<00:00, 5892.09it/s]
100%|██████████| 1187/1187 [00:00<00:00, 5784.97it/s]
100%|██████████| 1677/1677 [00:00<00:00, 6053.08it/s]
100%|██████████| 328/328 [00:00<00:00, 5915.24it/s]


# 3. Define Dataset and DataLoader

In [25]:
class SpotDataset(Dataset):
    def __init__(self, patches, labels, transform=None):
        self.patches = patches
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.patches)
    
    def __getitem__(self, idx):
        img = self.patches[idx].astype(np.uint8)
        img = transforms.ToPILImage()(img)
        if self.transform:
            img = self.transform(img)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return img, label

In [26]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# 4. Initialize Model

In [27]:
def get_model():
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 35)
    return model

# Train Loop

In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = SpotDataset(train_patches, train_labels, transform=train_transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

model = get_model().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
loss_fn = nn.MSELoss()

for epoch in range(5): 
    model.train()
    total_loss = 0
    for imgs, targets in tqdm(dataloader):
        imgs, targets = imgs.to(device), targets.to(device)
        optimizer.zero_grad()
        preds = model(imgs)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss / len(dataloader)}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]
100%|██████████| 261/261 [17:15<00:00,  3.97s/it]


Epoch 1 Loss: 0.8410746984440705


100%|██████████| 261/261 [17:00<00:00,  3.91s/it]


Epoch 2 Loss: 0.5583732446148935


100%|██████████| 261/261 [16:51<00:00,  3.88s/it]


Epoch 3 Loss: 0.4491480861815456


100%|██████████| 261/261 [17:16<00:00,  3.97s/it]


Epoch 4 Loss: 0.35784392741105564


100%|██████████| 261/261 [17:03<00:00,  3.92s/it]

Epoch 5 Loss: 0.29651627278533477





In [32]:
spot_df = pd.DataFrame(h5_file['spots']['Test']['S_7'][:])
spot_ids = [f'spot_{i}' for i in range(len(spot_df))]

model.eval()
predictions = []

with torch.no_grad():
    for _, row in tqdm(spot_df.iterrows(), total=len(spot_df)):
        x, y = int(row['x']), int(row['y'])
        patch = extract_patch(test_image, x, y)
        img = transforms.ToPILImage()(patch.astype(np.uint8))
        img = train_transform(img).unsqueeze(0).to(device)
        pred = model(img).cpu().numpy().flatten()
        predictions.append(pred)


100%|██████████| 2088/2088 [01:48<00:00, 19.17it/s]


# Create Submission File

In [35]:
submission_df = pd.DataFrame(predictions, columns=[f'C{i}' for i in range(1, 36)])
submission_df.insert(0, 'ID', spot_ids)
submission_df.to_csv('submission.csv', index=False)