In [None]:
# Now that the package is installed, we can use direct imports
from dataset.loader import load_foundation_model
from dataset.cached_embedding import EmbeddingCache
from unsupervised.kmeans import kmeans_clustering

print("Package imports successful!")

In [6]:
import torch
import torchvision.transforms as T

from torchvision.datasets import ImageFolder
from torchmetrics import F1Score, Accuracy

In [7]:
train_embeddings = EmbeddingCache.load_from_file("data/UNI_glomerulus_train_dataset_embeddings.pt")
val_embeddings = EmbeddingCache.load_from_file("data/UNI_glomerulus_val_dataset_embeddings.pt")

num_classes = torch.unique(train_embeddings.labels).numel()
print(f"Number of classes: {num_classes}")
print(f"Number of training embeddings: {len(train_embeddings)}")
print(f"Number of validation embeddings: {len(val_embeddings)}")

Number of classes: 6
Number of training embeddings: 7582
Number of validation embeddings: 120


In [8]:
predicted_fn, _, _ = kmeans_clustering(train_embeddings, n_clusters=num_classes)

In [9]:
metric = Accuracy(task="multiclass", num_classes=num_classes)
f1 = F1Score(task="multiclass", num_classes=num_classes)

train_predicted = predicted_fn(train_embeddings.embeddings)
train_acc = metric(train_predicted, train_embeddings.labels)
train_f1 = f1(train_predicted, train_embeddings.labels)

val_predicted = predicted_fn(val_embeddings.embeddings)
val_acc = metric(val_predicted, val_embeddings.labels)
val_f1 = f1(val_predicted, val_embeddings.labels)
print(f"Train Accuracy: {train_acc:.4f}, Train F1: {train_f1:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}, Validation F1: {val_f1:.4f}")


Train Accuracy: 0.1804, Train F1: 0.1804
Validation Accuracy: 0.1833, Validation F1: 0.1833
