In [None]:
from aux_functions import *
from sphere_vector_kernels import *
from sphere_vector_gp import *

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import itertools as it
import pickle
import re

from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel

# Utility functions

In [None]:
TEMP = os.path.join("temp", "synthetic_experiments")

In [None]:
def save_experiment(X_train, y_train, X_test, y_test, fname):
    with open(os.path.join(TEMP, f"{name}.pickle"), 'wb') as f:
        pickle.dump((X_train, y_train, X_test, y_test), f)

In [None]:
def mse(y_true, y_pred):
    return jax.vmap(jax.jit(lambda a, b: jnp.linalg.norm(a - b)**2))(y_true, y_pred).mean()

def pred_nll(y_true, y_pred, std_pred):
    return -jax.vmap(jax.scipy.stats.multivariate_normal.logpdf)(y_true, y_pred, std_pred).mean()

def run_single_experiment(X_train, y_train, X_test, y_test, model, number, ds_name, verbose=True, n_restarts_optimizer=0):
    name, k = model
    
    metrics = {}
    
    gp = SphereVectorGP(kernel=k, n_restarts_optimizer=0)
    gp.fit(X_train, y_train)
    if verbose:
        display(gp)
        display("MLL:", -gp.log_marginal_likelihood_value_ / X_train.shape[0])

    mu_star, std = gp.predict(X_test, return_std=True)
    metrics["name"] = name
    metrics["n"] = number
    metrics["dataset"] = ds_name
    metrics["fitted_gp"] = str(gp)
    metrics["MSE"] = float(mse(y_test, mu_star))
    metrics["PNLL"] = float(pred_nll(y_test, mu_star, std))
    if verbose:
        display(metrics)
    return metrics

In [None]:
def mse_and_pnll_table(df, n_drop=0, tex_fname=None):
    df = df.rename(columns={"name": "Kernel", "dataset": "Dataset"})
    df["Dataset"] = df["Dataset"].apply(lambda ds: {
        "hodge_matern_sample": r"H.--M.",
        "hodge_sample": r"Hodge",
        "proj_matern_sample": r"Proj.~M.",
        "rotating_vf": "Rotation",
        "curl_free_hodge_matern_sample": "curl-free H.--M."
    }.get(ds, ds))
    results = {}
    for col in ["MSE", "PNLL"]:
        if n_drop > 0:
            df_stats = df.groupby(["Kernel", "Dataset"]).apply(
                lambda gp: gp.sort_values(col).iloc[:-n_drop]
            ).reset_index(drop=True)
        else:
            df_stats = df.copy()
        df_stats= df_stats[["Kernel", "Dataset", col]].groupby(["Kernel", "Dataset"])[col].describe().rename(columns={"mean": "Mean", "std": "Std"})
        df_stats = df_stats.reset_index(drop=False)
        df_stats = df_stats[["Dataset", "Kernel", "Mean", "Std"]]
        df_stats = df_stats.melt(id_vars=["Kernel", "Dataset"])
        df_stats = df_stats.sort_values(["Dataset", "variable", "Kernel"]).rename(columns={"variable": ""})
        df_stats = df_stats.pivot(columns=["Dataset", ""], index="Kernel")
        df_stats = df_stats.droplevel(0, axis=1)
        df_stats = df_stats
        
        s = df_stats.style.highlight_min(
            axis=0, subset=[col for col in df_stats.columns if col[1] == "Mean"], props='font-weight:bold;'
        )
        s = s.format(precision=2)
    
        latex_table = s.to_latex(hrules=True)
        latex_table = latex_table.replace("\\font-weightbold", "\\bf")

        if tex_fname is not None:
            with open(os.path.join("tables", f"{tex_fname}__{col}.tex"), "w") as f:
                f.write(latex_table)

        display(s)

# Experimental vector fields

In [None]:
n_repeat = 10
n_train = 30
n_test = 100

## Rotating vector field

In [None]:
# vector field rotating around the z-axis
def rotating_vector_field(car):
    matrix = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 0]])
    return car @ matrix

In [None]:
def uniform_samples_hemisphere(n_samples, north=True):
    samples = np.random.multivariate_normal(np.zeros(3), np.eye(3), size=n_samples)
    samples[:, 2] = (1 if north else -1) * abs(samples[:, 2])
    # normalize
    samples = samples / jax.vmap(jnp.linalg.norm)(samples)[:, None]
    return samples

In [None]:
name_base = "rotating_vf"
np.random.seed(42)
for i in range(n_repeat):
    name = f"{name_base}__{i}"
    if os.path.exists(os.path.join(TEMP, f"{name}.pickle")):
        continue
    X_train = uniform_samples_hemisphere(n_train, north=True)
    X_test = uniform_samples_hemisphere(n_test, north=False)
    y_train = rotating_vector_field(X_train)
    y_test = rotating_vector_field(X_test)
    # to spherical coord
    X_train, y_train = v_car_to_sph(X_train, y_train)
    X_test, y_test = v_car_to_sph(X_test, y_test)
    # save
    save_experiment(X_train, y_train, X_test, y_test, name)

## Samples from projected Matern

In [None]:
np.random.seed(42)

X_train = car_to_sph(uniform_samples_hemisphere(n_train, north=True))
X_test = car_to_sph(uniform_samples_hemisphere(n_test, north=False))
X = np.vstack([X_train, X_test])

gp = SphereVectorGP(kernel=ProjectedMaternSphereKernel(kappa=0.5, nu=0.5))

y = gp.sample_y(X, n_samples=n_repeat)

name_base = "proj_matern_sample"
for i in range(n_repeat):
    name = f"{name_base}__{i}"
    if os.path.exists(os.path.join(TEMP, f"{name}.pickle")):
        continue
    y_train = y[:n_train, :, i]
    y_test = y[n_train:, :, i]
    save_experiment(X_train, y_train, X_test, y_test, name)

## Samples from Hodge-Matern

In [None]:
np.random.seed(42)

X_train = car_to_sph(uniform_samples_hemisphere(n_train, north=True))
X_test = car_to_sph(uniform_samples_hemisphere(n_test, north=False))
X = np.vstack([X_train, X_test])

gp = SphereVectorGP(kernel=HodgeMaternSphereKernel(kappa=0.5, nu=0.5))

y = gp.sample_y(X, n_samples=n_repeat)

name_base = "hodge_matern_sample"
for i in range(n_repeat):
    name = f"{name_base}__{i}"
    if os.path.exists(os.path.join(TEMP, f"{name}.pickle")):
        continue
    y_train = y[:n_train, :, i]
    y_test = y[n_train:, :, i]
    save_experiment(X_train, y_train, X_test, y_test, name)

## Samples from Hodge-RBF

In [None]:
np.random.seed(42)

X_train = car_to_sph(uniform_samples_hemisphere(n_train, north=True))
X_test = car_to_sph(uniform_samples_hemisphere(n_test, north=False))
X = np.vstack([X_train, X_test])

gp = SphereVectorGP(kernel=HodgeSphereKernel(kappa=0.5))

y = gp.sample_y(X, n_samples=n_repeat)

name_base = "hodge_sample"
for i in range(n_repeat):
    name = f"{name_base}__{i}"
    if os.path.exists(os.path.join(TEMP, f"{name}.pickle")):
        continue
    y_train = y[:n_train, :, i]
    y_test = y[n_train:, :, i]
    save_experiment(X_train, y_train, X_test, y_test, name)

## Samples from curl-free Hodge-Matern

In [None]:
np.random.seed(42)

X_train = car_to_sph(uniform_samples_hemisphere(n_train, north=True))
X_test = car_to_sph(uniform_samples_hemisphere(n_test, north=False))
X = np.vstack([X_train, X_test])

gp = SphereVectorGP(kernel=HodgeMaternCurlFreeSphereKernel(kappa=0.5, nu=0.5))

y = gp.sample_y(X, n_samples=n_repeat)

name_base = "curl_free_hodge_matern_sample"
for i in range(n_repeat):
    name = f"{name_base}__{i}"
    if os.path.exists(os.path.join(TEMP, f"{name}.pickle")):
        continue
    y_train = y[:n_train, :, i]
    y_test = y[n_train:, :, i]
    save_experiment(X_train, y_train, X_test, y_test, name)

# Run experiments

In [None]:
models = [
    ("Pure noise", WhiteKernel()),
    (r"Proj.", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * ProjectedSphereKernel(kappa=.2) + WhiteKernel()),
    (r"Hodge", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * HodgeSphereKernel(kappa=.2) + WhiteKernel()),
    (r"H.--M.--$\tfrac{1}{2}$", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * HodgeMaternSphereKernel(kappa=.2, nu=0.5) + WhiteKernel()),
    (r"div-free Hodge", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * HodgeDivFreeSphereKernel(kappa=.2) + WhiteKernel()),
    (r"div-free H.--M.--$\tfrac{1}{2}$", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * HodgeMaternDivFreeSphereKernel(kappa=.2, nu=0.5) + WhiteKernel()),
    (r"curl-free Hodge", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * HodgeCurlFreeSphereKernel(kappa=.2) + WhiteKernel()),
    (r"curl-free H.--M.--$\tfrac{1}{2}$", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * HodgeMaternCurlFreeSphereKernel(kappa=.2, nu=0.5) + WhiteKernel()),
    (r"Proj.~M.--$\tfrac{1}{2}$", ConstantKernel(constant_value_bounds=(1e-5, 1e8)) * ProjectedMaternSphereKernel(kappa=.2, nu=0.5) + WhiteKernel()),
]

models = [
    (re.sub("\.\s", ".~", name), kernel) for name, kernel in models
]

In [None]:
fpath = os.path.join("temp", "results_synthetic_experiments.pickle")
if os.path.exists(fpath):
    with open(fpath, 'rb')as f:
        results = pickle.load(f)
else:
    results = {}

try:
    for dataset in os.listdir(TEMP):
        if not dataset.endswith(".pickle"):
            continue
        ds_name, i = dataset.split(".")[0].split("__")

        for model in tqdm(models, desc=f"{ds_name}, {i}"):
            name = model[0]
            if (i, name, ds_name) in results:
                continue
            with open(os.path.join(TEMP, dataset), 'rb') as f:
                X_train, y_train, X_test, y_test = pickle.load(f)

            results[(i, name, ds_name)] = run_single_experiment(X_train, y_train, X_test, y_test, model, i, ds_name, verbose=False, n_restarts_optimizer=0)

            with open(fpath, 'wb') as f:
                pickle.dump(results, f)
except:
    pass

In [None]:
df = pd.DataFrame(results.values())

In [None]:
mse_and_pnll_table(df, n_drop=0, tex_fname="synthetic_experiments")