# Try 2

In [1]:
import os
import cv2
import json
import torch
import random
import numpy as np
from glob import glob
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor
import tifffile as tiff

In [2]:
# ----------- Config -----------

DATASET_ROOT = r"C:\Users\luukn\AIMI_MONKEY2\monkey-training"

train_dirs = [
    os.path.join(DATASET_ROOT, "images/pas-original"),
    os.path.join(DATASET_ROOT, "images/pas-diagnostic"),
]

annotation_dir = os.path.join(DATASET_ROOT, "annotations/json_pixel")

In [3]:
# ----------- Helper Functions -----------

def parse_annotations(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)
    coords = [(int(p["point"][0]), int(p["point"][1])) for p in data["points"]]
    return coords


def extract_patches(image, stride, patch_size):
    h, w, _ = image.shape
    patches = []
    coords = []
    for y in range(0, h - patch_size + 1, stride):
        for x in range(0, w - patch_size + 1, stride):
            patch = image[y:y + patch_size, x:x + patch_size]
            patches.append(patch)
            coords.append((x, y))
    return patches, coords

In [4]:
# ----------- Transform -----------

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256)),  # or your patch size
])

In [21]:
import tifffile

class InflammatoryCellDataset(Dataset):
    def __init__(self, image_dir_list, annotation_dir, patch_size_px=256, num_patches=256):
        self.samples = []
        self.patch_size_px = patch_size_px
        self.num_patches = num_patches

        for img_dir in image_dir_list:
            print(f"\nScanning: {img_dir}")
            image_files = glob(os.path.join(img_dir, "*.tif"))
            print(f"Found {len(image_files)} image(s)")

            for img_path in image_files:
                print(f"Processing: {img_path}")
                filename = os.path.basename(img_path)
                slide_id = "_".join(filename.split('_')[:2])  # e.g. A_P000001
                ann_path = os.path.join(annotation_dir, f"{slide_id}_inflammatory-cells.json")

                if not os.path.exists(ann_path):
                    print(f"❌ Annotation not found: {ann_path}")
                    continue

                ann_coords = parse_annotations(ann_path)

                with tifffile.TiffFile(img_path) as tif:
                    page = tif.pages[0]
                    mm = page.asarray(memmap=True)  # memory-mapped numpy array
                    h, w = mm.shape[:2]

                    patches_sampled = 0
                    attempts = 0
                    max_attempts = self.num_patches * 10

                    while patches_sampled < self.num_patches and attempts < max_attempts:
                        x_offset = random.randint(0, w - self.patch_size_px)
                        y_offset = random.randint(0, h - self.patch_size_px)

                        patch = mm[y_offset:y_offset + self.patch_size_px, x_offset:x_offset + self.patch_size_px]

                        # If grayscale, convert to 3 channels
                        if patch.ndim == 2:
                            patch = np.stack([patch]*3, axis=-1)
                        elif patch.shape[2] == 4:
                            patch = patch[:, :, :3]

                        # Find annotations inside patch
                        local_targets = []
                        for (gx, gy) in ann_coords:
                            if x_offset <= gx < x_offset + self.patch_size_px and y_offset <= gy < y_offset + self.patch_size_px:
                                local_x = gx - x_offset
                                local_y = gy - y_offset
                                local_targets.append((local_x, local_y))

                        if local_targets:
                            for t in local_targets:
                                self.samples.append((patch, t))
                            patches_sampled += 1

                        attempts += 1

                print(f"Sampled {patches_sampled} patches from {img_path}")

        print(f"✅ Total training samples: {len(self.samples)}")


In [22]:
# ----------- Model -----------

class CoordRegressionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 32 * 32, 256),
            nn.ReLU(),
            nn.Linear(256, 2)  # x and y
        )

    def forward(self, x):
        x = self.features(x)
        x = self.regressor(x)
        return x

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

dataset = InflammatoryCellDataset(train_dirs, annotation_dir)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
print("Loaded dataset")

model = CoordRegressionCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

Using device: cpu

Scanning: C:\Users\luukn\AIMI_MONKEY2\monkey-training\images/pas-original
Found 18 image(s)
Processing: C:\Users\luukn\AIMI_MONKEY2\monkey-training\images/pas-original\D_P000001_PAS_Original.tif


TypeError: TiffPage.asarray() got an unexpected keyword argument 'memmap'

In [None]:
# ----------- Training -----------
epochs = 1
for epoch in range(epochs):
    print(f'Model training... Epoch {epochs}')
    model.train()
    total_loss = 0.0
    for images, targets in loader:
        images, targets = images.to(device), targets.to(device)

        preds = model(images)
        loss = criterion(preds, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"[Epoch {epoch+1}] Loss: {total_loss / len(loader):.4f}")


Using device: cpu

Scanning: C:\Users\luukn\AIMI_MONKEY2\monkey-training\images/pas-original
Found 18 image(s)


KeyboardInterrupt: 

In [None]:
def test_on_pas_cpg(model, image_dir="images/pas-cpg", save_path="cpg_predictions.json"):
    model.eval()
    results = []

    for image_path in glob(os.path.join(image_dir, "*.tif")):
        print(f"Inferencing on {image_path}")
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        H, W = image.shape[:2]

        patches, coords = extract_patches(image, stride=STRIDE, patch_size=PATCH_SIZE)
        batch = []
        batch_origins = []

        for patch, (x_offset, y_offset) in zip(patches, coords):
            patch_tensor = transform(patch).unsqueeze(0)
            batch.append(patch_tensor)
            batch_origins.append((x_offset, y_offset))

            if len(batch) == 32:
                input_batch = torch.cat(batch).to(device)
                with torch.no_grad():
                    preds = model(input_batch)

                for i, pred in enumerate(preds):
                    gx = int(pred[0].item() + batch_origins[i][0])
                    gy = int(pred[1].item() + batch_origins[i][1])
                    results.append({
                        "image_id": os.path.basename(image_path).split('.')[0],
                        "cell_type": "inflammatory-cell",
                        "coordinates": [gx, gy]
                    })

                batch = []
                batch_origins = []

        # Remaining patches
        if batch:
            input_batch = torch.cat(batch).to(device)
            with torch.no_grad():
                preds = model(input_batch)

            for i, pred in enumerate(preds):
                gx = int(pred[0].item() + batch_origins[i][0])
                gy = int(pred[1].item() + batch_origins[i][1])
                results.append({
                    "image_id": os.path.basename(image_path).split('.')[0],
                    "cell_type": "inflammatory-cell",
                    "coordinates": [gx, gy]
                })

    with open(save_path, "w") as f:
        json.dump(results, f, indent=2)

    print(f"Saved {len(results)} predictions to {save_path}")


## Try 1

In [5]:
import os
import json
import torch
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from glob import glob
import torch.nn as nn

In [3]:
PATCH_SIZE = 256
STRIDE = 128

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

def extract_patches(image, stride=STRIDE, patch_size=PATCH_SIZE):
    H, W = image.shape[:2]
    patches = []
    coords = []

    for y in range(0, H - patch_size + 1, stride):
        for x in range(0, W - patch_size + 1, stride):
            patch = image[y:y+patch_size, x:x+patch_size]
            patches.append(patch)
            coords.append((x, y))
    return patches, coords

def parse_annotations(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    coords = []
    for obj in data.get("cells", []):
        x, y = obj["x"], obj["y"]
        coords.append((x, y))
    return coords


In [22]:
import os
from glob import glob
import json

# Root settings
train_dirs = ["images/pas-original", "images/pas-diagnostic"]
annotation_dir = "annotations/json"  # assuming you're using pixel coords here

def check_image_and_annotation_pairs(img_dir, ann_dir):
    print(f"\nScanning: {img_dir}")
    image_files = glob(os.path.join(img_dir, "*.tif"))
    if not image_files:
        print("❌ No images found in:", img_dir)
        return

    matched = 0
    for img_path in image_files:
        filename = os.path.basename(img_path)  # e.g., A_P000001_PAS_Original.tif
        slide_id = "_".join(filename.split('_')[:2])  # e.g., A_P000001
        ann_path = os.path.join(ann_dir, f"{slide_id}_inflammatory-cells.json")
        
        if os.path.exists(ann_path):
            matched += 1
            try:
                with open(ann_path, "r") as f:
                    ann = json.load(f)
                coords = ann.get("coordinates") or ann.get("points") or ann
                if isinstance(coords, list) and coords:
                    print(f"✅ {slide_id} → {len(coords)} annotations")
                else:
                    print(f"⚠️  {slide_id} → No annotation points in JSON")
            except Exception as e:
                print(f"⚠️  {slide_id} → Failed to load JSON: {e}")
        else:
            print(f"❌ {slide_id} → Annotation not found at {ann_path}")
    
    print(f"\nSummary for {img_dir}: {matched} images have matching annotations.")

# Check both train folders
for train_dir in train_dirs:
    check_image_and_annotation_pairs(train_dir, annotation_dir)



Scanning: images/pas-original
❌ No images found in: images/pas-original

Scanning: images/pas-diagnostic
❌ No images found in: images/pas-diagnostic


In [23]:
import os
print("Working directory:", os.getcwd())


Working directory: c:\Users\luukn\OneDrive\Documenten\RadboudUniversity\Master\Year1\Q3\AIMI\Project\AIMI_MONKEY


In [None]:
class InflammatoryCellDataset(Dataset):
    def __init__(self, image_dir_list, annotation_dir, patch_size=256, stride=128):
        self.samples = []
        self.patch_size = patch_size
        self.stride = stride
        print(image_dir_list)

        for img_dir in image_dir_list:
            image_paths = os.path.join(img_dir, "*.tif")
            print(image_paths)
            image_files = glob(os.path.join(img_dir, "*.tif"))
            print(image_files)
            for img_path in image_files:
                filename = os.path.basename(img_path)  # e.g., A_P000001_PAS_Original.tif
                slide_id = "_".join(filename.split('_')[:2])  # -> A_P000001
                ann_path = os.path.join(annotation_dir, f"{slide_id}_inflammatory-cells.json")
                print(f"Processing {img_path} with annotation {ann_path}")
                if not os.path.exists(ann_path):
                    print(f"Annotation file not found for {img_path}: {ann_path}")
                    continue

                img = cv2.imread(img_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                ann_coords = parse_annotations(ann_path)
                patches, coords = extract_patches(img, stride, patch_size)

                for patch, (x_offset, y_offset) in zip(patches, coords):
                    local_targets = []
                    for (gx, gy) in ann_coords:
                        if x_offset <= gx < x_offset + patch_size and y_offset <= gy < y_offset + patch_size:
                            local_x = gx - x_offset
                            local_y = gy - y_offset
                            local_targets.append((local_x, local_y))
                    if local_targets:
                        for t in local_targets:
                            self.samples.append((patch, t))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        patch, (x, y) = self.samples[idx]
        patch = transform(patch)
        target = torch.tensor([x, y], dtype=torch.float32)
        return patch, target


In [9]:
class CoordRegressionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, stride=1, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, stride=1, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, stride=1, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)
        )
        self.regressor = nn.Linear(64, 2)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.regressor(x)


In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dirs = ["images/pas-original", "images/pas-diagnostic"]
dataset = InflammatoryCellDataset(train_dirs, "annotations/json_pixel")
loader = DataLoader(dataset, batch_size=32, shuffle=True)

model = CoordRegressionCNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

EPOCHS = 1
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0.0
    for images, targets in loader:
        images = images.to(device)
        targets = targets.to(device)

        preds = model(images)
        loss = criterion(preds, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {epoch_loss:.4f}")


['images/pas-original', 'images/pas-diagnostic']
images/pas-original\*.tif
[]
images/pas-diagnostic\*.tif
[]


ValueError: num_samples should be a positive integer value, but got num_samples=0

In [None]:
def test_on_pas_cpg(model, image_dir="images/pas-cpg", save_path="cpg_predictions.json"):
    model.eval()
    results = []

    for image_path in glob(os.path.join(image_dir, "*.tif")):
        print(f"Inferencing on {image_path}")
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        H, W = image.shape[:2]

        patches, coords = extract_patches(image, stride=STRIDE, patch_size=PATCH_SIZE)
        batch = []
        batch_origins = []

        for patch, (x_offset, y_offset) in zip(patches, coords):
            patch_tensor = transform(patch).unsqueeze(0)
            batch.append(patch_tensor)
            batch_origins.append((x_offset, y_offset))

            if len(batch) == 32:
                input_batch = torch.cat(batch).to(device)
                with torch.no_grad():
                    preds = model(input_batch)

                for i, pred in enumerate(preds):
                    gx = int(pred[0].item() + batch_origins[i][0])
                    gy = int(pred[1].item() + batch_origins[i][1])
                    results.append({
                        "image_id": os.path.basename(image_path).split('.')[0],
                        "cell_type": "inflammatory-cell",
                        "coordinates": [gx, gy]
                    })

                batch = []
                batch_origins = []

        # Remaining patches
        if batch:
            input_batch = torch.cat(batch).to(device)
            with torch.no_grad():
                preds = model(input_batch)

            for i, pred in enumerate(preds):
                gx = int(pred[0].item() + batch_origins[i][0])
                gy = int(pred[1].item() + batch_origins[i][1])
                results.append({
                    "image_id": os.path.basename(image_path).split('.')[0],
                    "cell_type": "inflammatory-cell",
                    "coordinates": [gx, gy]
                })

    with open(save_path, "w") as f:
        json.dump(results, f, indent=2)

    print(f"Saved {len(results)} predictions to {save_path}")


In [None]:
test_on_pas_cpg(model)

In [None]:
# Save
torch.save(model.state_dict(), "inflammatory_model.pth")

# Load later
model = CoordRegressionCNN()
model.load_state_dict(torch.load("inflammatory_model.pth"))
model.to(device)
model.eval()
