In [1]:
import os
os.chdir('../../src')

In [2]:
from data import PolygonSentenceReader, Polygon
from modules import NLLLoss, ELBOLoss
from neural_process.models.np import CNP, LNP
from neural_process.models.attnnp import AttnCNP, AttnLNP
from utils import plot_polygon

import datetime
import random
import torch
import torch.nn as nn
from torch.distributions.kl import kl_divergence

In [6]:
TRAINING_ITERATIONS = int(2e5)
PLOT_AFTER = int(2e4)
BATCH_SIZE = 64
MAX_CONTEXT_POINTS = 10
MIN_SIDES = 4
MAX_SIDES = 4
x_size = 4 + 3 * MAX_SIDES
y_size = MAX_SIDES
torch.manual_seed(0)

criterion = ELBOLoss()

# Instantiate a polygon generator.
# (For example, polygons with between 3 and 8 sides.)
polygon_generator_train = PolygonSentenceReader(
    batch_size=BATCH_SIZE,
    max_num_context=MAX_CONTEXT_POINTS,
    max_seq_len=x_size,
    min_num_sides=MIN_SIDES,
    max_num_sides=MAX_SIDES,
    center=(5, 5),
    radius=3,
    testing=False,
)

polygon_generator_test = PolygonSentenceReader(
    batch_size=100,
    max_num_context=MAX_CONTEXT_POINTS,
    max_seq_len=x_size,
    min_num_sides=MIN_SIDES,
    max_num_sides=MAX_SIDES,
    center=(5, 5),
    radius=3,
    testing=True,
)

model = LNP(x_dim=x_size, y_dim=y_size)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=TRAINING_ITERATIONS, eta_min=1e-6
)
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
device = next(model.parameters()).device

# Training

In [4]:
# ----------------------
# Training Loop
# ----------------------
for it in range(TRAINING_ITERATIONS + 1):
    # Generate a batch of polygon completion tasks.
    (
        context_x,
        context_y,
        target_x,
        target_y,
        tokens,
        true_poly,
        max_seq_len,
        num_context,
    ) = polygon_generator_train.generate_polygon_batch_few_shot_completion_task()

    context_x = context_x.to(device)
    context_y = context_y.to(device)
    target_x = target_x.to(device)
    target_y = target_y[:, :, :y_size].to(device)

    optimizer.zero_grad()
    # Forward pass: the NP model expects context_x, context_y, target_x, target_y.
    dist, z, q_zc, q_zct = model(context_x, context_y, target_x, target_y)
    loss = criterion(dist, q_zct, q_zc, target_y)
    loss.backward()
    optimizer.step()
    scheduler.step()

    if it % 1000 == 0:
        print("Iteration: {}, train loss: {}".format(it, loss.item()))

    # ----------------------
    # Evaluation and Plotting
    # ----------------------
    if it % PLOT_AFTER == 0:
        # For plotting, we generate a single polygon sample.
        (
            context_x_eval,
            context_y_eval,
            target_x_eval,
            target_y_eval,
            tokens_eval,
            true_poly_eval,
            max_seq_len_eval,
            num_context_eval,
        ) = polygon_generator_test.generate_polygon_batch_few_shot_completion_task()

        context_x_eval = context_x_eval.to(device)
        context_y_eval = context_y_eval.to(device)
        target_x_eval = target_x_eval.to(device)
        target_y_eval = target_y_eval[:, :, :y_size].to(device)

        # Forward pass through the model.
        test_dist, test_z, test_q_zc, test_q_zct = model(
            context_x_eval, context_y_eval, target_x_eval, target_y_eval
        )
        loss = criterion(dist, q_zct, q_zc, target_y)

        print(
            "{}, Iteration: {}, Test Loss: {}".format(
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                it,
                loss.item(),
            )
        )

        # Get NP predictions on target tokens (taking the mean of the predictive distribution).
        pred_target = test_dist.mean  # shape: [n_z, batch, n_target, y_dim]

        # For simplicity, we average over latent samples and batch dimension.
        pred_target_avg = pred_target.mean(0).squeeze(1)  # shape: [n_target, y_dim]

        # print("pred_target_avg shape: ", pred_target_avg.shape)
        pred_target_avg = pred_target_avg[0]
        true_polygon = true_poly_eval[0]
        true_polygon_tokenised = true_polygon.to_tokenised()
        # print("pred_target_avg shape after squeeze: ", pred_target_avg.shape)

        print("True Polygon: ", true_polygon)
        print("True Polygon Angles: ", true_polygon.angles)
        print("Predicted Polygon Angles: ", pred_target_avg.tolist())

Iteration: 0, train loss: 46353216.0
2025-04-26 17:14:58, Iteration: 0, Test Loss: 46353216.0
True Polygon:  Polygon(n=4, vertices=[(7.98, 5.34), (2.98, 7.22), (2.09, 5.74), (6.27, 2.28)], lengths=[5.341760009584856, 1.726991603917054, 5.426232578870906, 3.5053815769470806], angles=[81.40877456777234, 100.41439339656749, 98.59564431278395, 79.58118772287624])
True Polygon Angles:  [81.40877456777234, 100.41439339656749, 98.59564431278395, 79.58118772287624]
Predicted Polygon Angles:  [21.79067611694336, 3.0412979125976562, 25.325286865234375, -12.281815528869629]
Iteration: 1000, train loss: 118017.3125
Iteration: 2000, train loss: 310688.0
Iteration: 3000, train loss: 46515.828125
Iteration: 4000, train loss: 111141.3671875
Iteration: 5000, train loss: 87896.0234375
Iteration: 6000, train loss: 65561.3203125
Iteration: 7000, train loss: 144039.609375
Iteration: 8000, train loss: 15943.1513671875
Iteration: 9000, train loss: 24171.220703125
Iteration: 10000, train loss: 12068.758789062

In [5]:
torch.save(model.state_dict(), "../models/polygon/np/" + model.__class__.__name__ + "_angles_completion_task_squares_only.pt")