# Initialization

In [None]:
import config
import torch
from torch import Tensor
from torch.utils.data import random_split, DataLoader
from dataset import SketchDataset
from diffusion_model import GD3PM, CosineNoiseScheduler
from collections import OrderedDict
from tqdm import tqdm
import math
from matplotlib import pyplot as plt
from torchmetrics.image.fid import FrechetInceptionDistance
import numpy as np
from scipy import linalg
from sklearn.metrics.pairwise import euclidean_distances

# Create a new OrderedDict without the 'module.' prefix
def DDP_to_normal(state_dict):
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        # Remove 'module.' in the name
        name = k[7:]
        new_state_dict[name] = v

    return new_state_dict

# Freeze model parameters
def freeze_model(model):
    model.eval()
    for p in model.parameters():
        p.requires_grad = False

# Evaluation Vars
GPU = 0
OUTPUT_DIR = "evaluation_outputs/"
BATCH_SIZE = 256

Load and freeze DDP (Distributed Data Parallel) Model

In [None]:
diff_model = GD3PM(GPU)
diff_state_dict = torch.load("checkpoint_nodediff_ddp_adam_nopos_32layers_512nodedim_512condim_8heads_2000denoisingsteps.pth", map_location = {'cuda:%d' % 0: 'cuda:%d' % GPU})["model"]
diff_model.load_state_dict(DDP_to_normal(diff_state_dict))
freeze_model(diff_model)

Sample and Visualize CAD Sketches from the model

In [None]:
num_samples = 4
with torch.no_grad():
    seed = diff_model.noise_scheduler.sample_latent(num_samples)
    nodes = diff_model.denoise(seed)
    for i in range(nodes.size(0)):
        SketchDataset.render_graph(nodes[i].cpu())

Load and extract the Test Set

In [None]:
print("---Loading Dataset---")
dataset = SketchDataset("data")
train_set, validate_set, test_set = random_split(dataset = dataset, lengths = [0.9, 0.05, 0.05], generator = torch.Generator().manual_seed(config.DATASET_SPLIT_SEED))
print("---Finished Loading Dataset---")

# Evaluate Log-Likelihood/ELBO (Evidence Lower Bound)

$$ELBO=\frac{T}{2}\mathbb{E}_{t \sim U(0,1),\epsilon \sim N(0,1)} [(SNR(\frac{t-1}{T}) - SNR(\frac{t}{T}))|x_0-\hat{x}_0(x_t,t)|^2]$$

In [None]:
# Calculates delta SNR
def nll_scale(a_bar, prev_a_bar):
    return prev_a_bar / (1 - prev_a_bar) - a_bar / (1 - a_bar)

# Calculate the ELBO of a batch of sketches
def diffusion_elbo(true_nodes : Tensor, pred_nodes : Tensor, params_mask : Tensor, t, scheduler : CosineNoiseScheduler): 
    constructible_var = nll_scale(scheduler.continous_variance_to_discrete_variance(scheduler.a_bar[t], D = 2), scheduler.continous_variance_to_discrete_variance(scheduler.a_bar[t - 1], D = 2))
    constructible_var = constructible_var.unsqueeze(-1).unsqueeze(1)
    node_elbo = torch.sum(constructible_var * (pred_nodes[...,config.NODE_BOOL_SLICE] - true_nodes[...,config.NODE_BOOL_SLICE]) ** 2)

    primitive_var = nll_scale(scheduler.continous_variance_to_discrete_variance(scheduler.a_bar[t], D = 5), scheduler.continous_variance_to_discrete_variance(scheduler.a_bar[t - 1], D = 5))
    primitive_var = primitive_var.unsqueeze(-1).unsqueeze(1)
    node_elbo += torch.sum(primitive_var * (pred_nodes[...,config.NODE_TYPE_SLICE] - true_nodes[...,config.NODE_TYPE_SLICE]) ** 2)

    parameter_var = nll_scale(scheduler.a_bar[t], scheduler.a_bar[t - 1])
    parameter_var = parameter_var.unsqueeze(-1).unsqueeze(1)
    node_elbo += torch.sum(parameter_var * params_mask * (pred_nodes[...,config.NODE_PARM_SLICE] - true_nodes[...,config.NODE_PARM_SLICE]) ** 2)

    return node_elbo

In [None]:
test_loader = DataLoader(dataset = test_set, batch_size = BATCH_SIZE)
sum = 0

# ELBO/VLB calculation using monte carlo estimation see Kingma et al. "Variational Diffusion models"
with torch.no_grad():
    pbar = tqdm(test_loader)
    for nodes, params_mask in pbar:
        nodes = nodes.to(GPU)
        params_mask = params_mask.to(GPU)

        t = torch.randint(low = 1, high = diff_model.max_timestep, size = (nodes.size(0),)).to(GPU)
        noised_nodes = diff_model.noise_scheduler(nodes, t)

        pred_nodes = diff_model(noised_nodes, t)

        sum += diffusion_elbo(nodes, pred_nodes, params_mask, t, diff_model.noise_scheduler)
    
    nll = (sum / len(test_set)) * diff_model.max_timestep / 2 # Calculate sample mean
    nll = nll / math.log(2) # Convert to bits
    print("Negative Log-Likelihood: ", nll)

# Evaluate FID (Fr√©chet Inception Distance), Precision, and Recall

CAD sketches are rendered to matplotlib figures as monochromatic images then converted to tensors, where FID, Precision, and Recall are calculated as ordinary image generation metrics.

In [None]:
def figure_to_tensor(nodes : Tensor):
    # Convert matplotlib figure to numpy array
    figure = SketchDataset.render_graph(nodes)
    figure.canvas.draw()
    img_tensor = torch.frombuffer(figure.canvas.tostring_rgb(), dtype=torch.uint8).clone() # Copy buffer data into a tensor object
    img_tensor = img_tensor.reshape(figure.canvas.get_width_height()[::-1] + (3,))
    
    # Add batch dimension and rearrange to (B, C, H, W)
    img_tensor = img_tensor.permute(2, 0, 1).unsqueeze(0)
    plt.close(figure)
    return img_tensor

In [None]:
with torch.no_grad():
    test_loader = DataLoader(dataset = test_set[0:10_000][0], batch_size = BATCH_SIZE) # Create Dataloader only for nodes and not params mask

    real_features = torch.zeros(10_000, 2048)
    fake_features = torch.zeros(10_000, 2048)
    fid = FrechetInceptionDistance(feature=2048).set_dtype(torch.float64).to(GPU)
    fid.reset()

    i = 1
    for batch in tqdm(test_loader):
        real_imgs = torch.vstack([figure_to_tensor(prims) for prims in batch]).to(GPU)
        gens = diff_model.sample(batch.size(0))
        gen_imgs = torch.vstack([figure_to_tensor(sample) for sample in gens]).to(GPU)

        real_features[(i - 1) * BATCH_SIZE:i * BATCH_SIZE] = fid.inception(real_imgs)
        fake_features[(i - 1) * BATCH_SIZE:i * BATCH_SIZE] = fid.inception(gen_imgs)
        i = i + 1

    torch.save({"real": real_features, "fake": fake_features}, OUTPUT_DIR + "diff_inception_features.pth")

In [None]:
def calculate_fid(real_features, fake_features):
    # Calculate mean and covariance
    mu1, sigma1 = real_features.mean(axis=0), np.cov(real_features, rowvar=False)
    mu2, sigma2 = fake_features.mean(axis=0), np.cov(fake_features, rowvar=False)
    
    # Calculate FID
    diff = mu1 - mu2
    covmean = linalg.sqrtm(sigma1.dot(sigma2))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    
    fid = diff.dot(diff) + np.trace(sigma1 + sigma2 - 2*covmean)
    return float(fid)

def calculate_precision_recall(real_features, fake_features, k=3, threshold=0.95):
    # Normalize features
    real_features = real_features / np.linalg.norm(real_features, axis=1, keepdims=True)
    fake_features = fake_features / np.linalg.norm(fake_features, axis=1, keepdims=True)
    
    # Calculate pairwise distances
    real_distances = euclidean_distances(real_features, real_features)
    fake_distances = euclidean_distances(fake_features, fake_features)
    
    # Get radii for each point (mean distance to k nearest neighbors)
    def get_kth_nearest(distances, k):
        # Sort distances for each point
        nearest = np.partition(distances, k+1, axis=1)[:, 1:k+1]
        return np.mean(nearest, axis=1)
    
    real_radii = get_kth_nearest(real_distances, k)
    fake_radii = get_kth_nearest(fake_distances, k)
    
    # Calculate cross distances
    cross_distances = euclidean_distances(real_features, fake_features)
    
    # Calculate precision and recall
    precision = np.mean(np.min(cross_distances / real_radii[:, None], axis=0) < threshold)
    recall = np.mean(np.min(cross_distances / fake_radii[None, :], axis=1) < threshold)
    
    return precision, recall

fid_score = calculate_fid(real_features.cpu().numpy(), fake_features.cpu().numpy())
precision, recall = calculate_precision_recall(real_features.cpu().numpy(), fake_features.cpu().numpy())

print(f"FID Score: {fid_score:.2f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")