In [23]:
!pip install "numpy<2"



In [24]:
!pip install scikit-learn



In [None]:
from pathlib import Path
import os
import tifffile as tiff
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import xml.etree.ElementTree as ET
import re
import random
import json
import random
import torch
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, ConcatDataset
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import seaborn as sns


In [26]:
PATCHES_DIR = Path("/Users/femkeaminetzah/Documents/Artificial_Intelligence/Master/Current Courses/AI in Medical Imaging/Project/patches")

# Train directories (pas_original and pas_diagnostic)
train_dirs = [
    PATCHES_DIR / "pas-original",
    PATCHES_DIR / "pas-diagnostic"
]

# Test directories (cpg with different cell types)
test_dirs = [
    PATCHES_DIR / "cpg" / "inflammatory-cells",
    PATCHES_DIR / "cpg" / "monocytes",
    PATCHES_DIR / "cpg" / "lymphocytes"
]


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Use ImageFolder, trick it with dummy label folder
def wrap_with_label_folder(path, label="dummy"):
    temp = path.parent / f"{path.name}_wrapped"
    temp.mkdir(exist_ok=True)
    label_path = temp / label
    label_path.mkdir(exist_ok=True)
    for img in path.glob("*.png"):
        symlink = label_path / img.name
        if not symlink.exists():
            symlink.symlink_to(img)
    return temp

wrapped_train_dirs = [wrap_with_label_folder(d) for d in train_dirs]
datasets_list = [datasets.ImageFolder(d, transform=transform) for d in wrapped_train_dirs]
train_dataset = ConcatDataset(datasets_list)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Pretrain encoder (ResNet18)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder = models.resnet18(pretrained=True)
encoder.fc = nn.Identity()  # remove classification head
encoder = encoder.to(device)

optimizer = optim.Adam(encoder.parameters(), lr=1e-4)

# Pretraining loop
encoder.train()
for epoch in range(5):
    for imgs, _ in tqdm(train_loader):
        imgs = imgs.to(device)
        features = encoder(imgs)
        loss = features.norm(dim=1).mean() 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Pretraining Epoch {epoch+1} done")

encoder.eval()

all_features = []
all_labels = []

class_names = ["inflammatory-cells", "monocytes", "lymphocytes"]

for i, test_dir in enumerate(test_dirs):
    wrapped_dir = wrap_with_label_folder(test_dir, label="class")
    dataset = datasets.ImageFolder(wrapped_dir, transform=transform)
    loader = DataLoader(dataset, batch_size=32, shuffle=False)
    
    with torch.no_grad():
        for imgs, _ in loader:
            imgs = imgs.to(device)
            feats = encoder(imgs).cpu()
            all_features.append(feats)
            # Assign label i (corresponding to class_names)
            all_labels.append(torch.full((feats.size(0),), i, dtype=torch.long))

X = torch.cat(all_features).numpy()
y = torch.cat(all_labels).numpy()

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X, y)

y_pred = clf.predict(X)

cm = confusion_matrix(y, y_pred)

# Plot
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(classification_report(y, y_pred, target_names=class_names))



100%|██████████| 7/7 [00:21<00:00,  3.13s/it]


Pretraining Epoch 1 done


100%|██████████| 7/7 [00:19<00:00,  2.86s/it]


Pretraining Epoch 2 done


100%|██████████| 7/7 [00:23<00:00,  3.38s/it]


Pretraining Epoch 3 done


100%|██████████| 7/7 [00:20<00:00,  2.97s/it]


Pretraining Epoch 4 done


100%|██████████| 7/7 [00:22<00:00,  3.28s/it]


Pretraining Epoch 5 done
[[520   0   0]
 [  0 390   0]
 [390   0   0]]
                    precision    recall  f1-score   support

inflammatory-cells       0.57      1.00      0.73       520
         monocytes       1.00      1.00      1.00       390
       lymphocytes       0.00      0.00      0.00       390

          accuracy                           0.70      1300
         macro avg       0.52      0.67      0.58      1300
      weighted avg       0.53      0.70      0.59      1300



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
from pathlib import Path
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, models
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm

# --- Configs ---
PATCHES_DIR = Path("/Users/femkeaminetzah/Documents/Artificial_Intelligence/Master/Current Courses/AI in Medical Imaging/Project/patches")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_names = ["inflammatory-cells", "monocytes", "lymphocytes"]

# --- Transforms ---
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# --- Wrap images in dummy label folder for ImageFolder compatibility ---
def wrap_with_label_folder(path, label="dummy"):
    temp = path.parent / f"{path.name}_wrapped"
    temp.mkdir(exist_ok=True)
    label_path = temp / label
    label_path.mkdir(exist_ok=True)
    for img in path.glob("*.png"):
        symlink = label_path / img.name
        if not symlink.exists():
            symlink.symlink_to(img)
    return temp

# --- Training data: only inflammatory-cells ---
train_dir = wrap_with_label_folder(PATCHES_DIR / "cpg" / "inflammatory-cells", label="inflammatory")
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# --- Define model: fine-tune entire ResNet18 ---
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 1)  # Binary classification: inflammatory vs not
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# --- Fine-tuning loop ---
model.train()
for epoch in range(5):
    running_loss = 0.0
    for imgs, _ in tqdm(train_loader):
        imgs = imgs.to(device)
        labels = torch.ones(imgs.size(0), 1).to(device)  # all inflammatory
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")

# --- Evaluation on all 3 classes ---
model.eval()
all_outputs = []
all_labels = []

for i, test_class in enumerate(class_names):
    test_dir = wrap_with_label_folder(PATCHES_DIR / "cpg" / test_class, label="class")
    dataset = datasets.ImageFolder(test_dir, transform=transform)
    loader = DataLoader(dataset, batch_size=32, shuffle=False)

    with torch.no_grad():
        for imgs, _ in loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            preds = torch.sigmoid(outputs).squeeze().cpu()
            all_outputs.append(preds)
            all_labels.append(torch.full((preds.size(0),), i, dtype=torch.long))

# Combine predictions and labels
all_preds = torch.cat(all_outputs)
all_targets = torch.cat(all_labels)

# Convert to predicted class: if sigmoid > 0.5 → class 0 ("inflammatory"), else 1 ("non-inflammatory")
predicted_classes = (all_preds > 0.5).long()

# Print classification results (interpreting as class 0 vs not)
print(confusion_matrix(all_targets, predicted_classes))
print(classification_report(all_targets, predicted_classes, target_names=class_names))


100%|██████████| 17/17 [00:58<00:00,  3.46s/it]


Epoch 1, Loss: 0.7656


100%|██████████| 17/17 [00:51<00:00,  3.02s/it]


Epoch 2, Loss: 0.2734


100%|██████████| 17/17 [00:51<00:00,  3.00s/it]


Epoch 3, Loss: 0.1251


100%|██████████| 17/17 [00:49<00:00,  2.89s/it]


Epoch 4, Loss: 0.0759


100%|██████████| 17/17 [00:46<00:00,  2.73s/it]


Epoch 5, Loss: 0.0484
[[  0 520   0]
 [  0 390   0]
 [  0 390   0]]
                    precision    recall  f1-score   support

inflammatory-cells       0.00      0.00      0.00       520
         monocytes       0.30      1.00      0.46       390
       lymphocytes       0.00      0.00      0.00       390

          accuracy                           0.30      1300
         macro avg       0.10      0.33      0.15      1300
      weighted avg       0.09      0.30      0.14      1300



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
