In [1]:
import os
os.chdir('../../src')

In [2]:
from data import PolygonSentenceReader, Polygon
from modules import NLLLoss, ELBOLoss
from neural_process.models.np import CNP, LNP
from neural_process.models.attnnp import AttnCNP, AttnLNP
from utils import plot_polygon

import datetime
import random
import torch
import torch.nn as nn
from torch.distributions.kl import kl_divergence

In [5]:
TRAINING_ITERATIONS = int(2e5)
PLOT_AFTER = int(2e4)
BATCH_SIZE = 64
MAX_CONTEXT_POINTS = 10
MIN_SIDES = 3
MAX_SIDES = 8
x_size = 4 + 3 * MAX_SIDES
y_size = MAX_SIDES
torch.manual_seed(0)

criterion = ELBOLoss()

# Instantiate a polygon generator.
# (For example, polygons with between 3 and 8 sides.)
polygon_generator_train = PolygonSentenceReader(
    batch_size=BATCH_SIZE,
    max_num_context=MAX_CONTEXT_POINTS,
    max_seq_len=x_size,
    min_num_sides=MIN_SIDES,
    max_num_sides=MAX_SIDES,
    center=(5, 5),
    radius=3,
    testing=False,
)

polygon_generator_test = PolygonSentenceReader(
    batch_size=100,
    max_num_context=MAX_CONTEXT_POINTS,
    max_seq_len=x_size,
    min_num_sides=MIN_SIDES,
    max_num_sides=MIN_SIDES,
    center=(5, 5),
    radius=3,
    testing=True,
)

model = LNP(x_dim=x_size, y_dim=y_size)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=TRAINING_ITERATIONS, eta_min=1e-6
)
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
device = next(model.parameters()).device

# Training

In [9]:
# ----------------------
# Training Loop
# ----------------------
for it in range(TRAINING_ITERATIONS + 1):
    # Generate a batch of polygon completion tasks.
    (
        context_x,
        context_y,
        target_x,
        target_y,
        tokens,
        true_poly,
        max_seq_len,
        num_context,
    ) = polygon_generator_train.generate_polygon_batch_few_shot_completion_task()

    context_x = context_x.to(device)
    context_y = context_y.to(device)
    target_x = target_x.to(device)
    target_y = target_y.to(device)

    optimizer.zero_grad()
    # Forward pass: the NP model expects context_x, context_y, target_x, target_y.
    dist, z, q_zc, q_zct = model(context_x, context_y, target_x, target_y)
    loss = criterion(dist, q_zct, q_zc, target_y)
    loss.backward()
    optimizer.step()
    scheduler.step()

    if it % 1000 == 0:
        print("Iteration: {}, train loss: {}".format(it, loss.item()))

    # ----------------------
    # Evaluation and Plotting
    # ----------------------
    if it % PLOT_AFTER == 0:
        # For plotting, we generate a single polygon sample.
        (
            context_x_eval,
            context_y_eval,
            target_x_eval,
            target_y_eval,
            tokens_eval,
            true_poly_eval,
            max_seq_len_eval,
            num_context_eval,
        ) = polygon_generator_test.generate_polygon_batch_few_shot_completion_task()

        context_x_eval = context_x_eval.to(device)
        context_y_eval = context_y_eval.to(device)
        target_x_eval = target_x_eval.to(device)
        target_y_eval = target_y_eval.to(device)

        # Forward pass through the model.
        test_dist, test_z, test_q_zc, test_q_zct = model(
            context_x_eval, context_y_eval, target_x_eval, target_y_eval
        )
        loss = criterion(dist, q_zct, q_zc, target_y)

        print(
            "{}, Iteration: {}, Test Loss: {}".format(
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                it,
                loss.item(),
            )
        )

        # Get NP predictions on target tokens (taking the mean of the predictive distribution).
        pred_target = test_dist.mean  # shape: [n_z, batch, n_target, y_dim]

        # For simplicity, we average over latent samples and batch dimension.
        pred_target_avg = pred_target.mean(0).squeeze(1)  # shape: [n_target, y_dim]

        # print("pred_target_avg shape: ", pred_target_avg.shape)
        pred_target_avg = pred_target_avg[0]
        true_polygon = true_poly_eval[0]
        true_polygon_tokenised = true_polygon.to_tokenised()
        # print("pred_target_avg shape after squeeze: ", pred_target_avg.shape)

        print("True Polygon: ", true_polygon)
        print("True Polygon Angles: ", true_polygon.angles)
        print("Predicted Polygon Angles: ", pred_target_avg.tolist()[:len(true_polygon.angles)])

Iteration: 0, train loss: 1761.4365234375
2025-04-26 18:55:36, Iteration: 0, Test Loss: 1761.4365234375
True Polygon:  Polygon(n=3, vertices=[(7.9, 5.77), (2.82, 7.06), (6.72, 2.54)], lengths=[5.241230771488697, 5.969958123806229, 3.438793393037738], angles=[84.17983457513594, 34.96297794566348, 60.85718747920056])
True Polygon Angles:  [84.17983457513594, 34.96297794566348, 60.85718747920056]
Predicted Polygon Angles:  [52.9559440612793, 17.04191780090332, 67.7646713256836]
Iteration: 1000, train loss: 98.29766082763672
Iteration: 2000, train loss: 161.7765655517578
Iteration: 3000, train loss: 108.50579071044922
Iteration: 4000, train loss: 22.380998611450195
Iteration: 5000, train loss: 12.399890899658203
Iteration: 6000, train loss: 22.51494598388672
Iteration: 7000, train loss: 17.426193237304688
Iteration: 8000, train loss: 11.972577095031738
Iteration: 9000, train loss: 20.211105346679688
Iteration: 10000, train loss: 7.135207176208496
Iteration: 11000, train loss: 26.0581340789

In [5]:
torch.save(model.state_dict(), "../models/polygon/np/" + model.__class__.__name__ + "_angles_completion_task_squares_only.pt")