In [1]:
import sys, pathlib
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import itertools
PROJECT_ROOT = pathlib.Path.cwd().parent  # LearningG2
sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)

PROJECT_ROOT: /Users/tomassilva/Desktop/GitHub/LearningG2


In [2]:
data = np.load("../sampling/g2_dataset.npz")
data.files

['base_points',
 'link_points',
 'rotations',
 'phis',
 'psis',
 'riemannian_metrics',
 'g2_metrics',
 'drop_maxs',
 'drop_ones',
 'etas']

In [16]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


# -------------------------
# Data utils
# -------------------------
def split_data(X, Y, seed=42):
    rng = np.random.default_rng(seed)
    N = X.shape[0]
    idx = rng.permutation(N)

    n_tr = int(0.9 * N)
    n_va = int(0.05 * N)

    tr = idx[:n_tr]
    va = idx[n_tr:n_tr + n_va]
    te = idx[n_tr + n_va:]
    return (X[tr], Y[tr]), (X[va], Y[va]), (X[te], Y[te])


def normalize_data(Xtr, Ytr):
    x_scaler = StandardScaler()
    y_scaler = StandardScaler()
    Xtr_n = x_scaler.fit_transform(Xtr)
    Ytr_n = y_scaler.fit_transform(Ytr)
    return Xtr_n, Ytr_n, x_scaler, y_scaler


# -------------------------
# Model
# -------------------------
def build_regressor_19_to_28():
    model = keras.Sequential([
        layers.Input(shape=(19,)),
        layers.Dense(512, activation="gelu"),
        layers.Dense(512, activation="gelu"),
        layers.Dense(256, activation="gelu"),
        layers.Dense(256, activation="gelu"),
        layers.Dense(28)  # linear output
    ])
    return model


# -------------------------
# Plots
# -------------------------
def plot_training_history(history):
    h = history.history
    epochs = np.arange(1, len(h["loss"]) + 1)

    # Loss
    plt.figure(figsize=(7, 4))
    plt.plot(epochs, h["loss"], label="train loss")
    if "val_loss" in h:
        plt.plot(epochs, h["val_loss"], label="val loss")
    plt.yscale("log")
    plt.xlabel("Epoch")
    plt.ylabel("MSE loss")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # MAE
    if "mae" in h:
        plt.figure(figsize=(7, 4))
        plt.plot(epochs, h["mae"], label="train MAE")
        if "val_mae" in h:
            plt.plot(epochs, h["val_mae"], label="val MAE")
        plt.yscale("log")
        plt.xlabel("Epoch")
        plt.ylabel("MAE")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()


def plot_true_vs_pred(Y_true, Y_pred, n_points=20000, title="True vs Pred"):
    yt = Y_true.reshape(-1)
    yp = Y_pred.reshape(-1)

    n = min(len(yt), n_points)
    idx = np.random.choice(len(yt), size=n, replace=False)
    yt = yt[idx]
    yp = yp[idx]

    lo = min(yt.min(), yp.min())
    hi = max(yt.max(), yp.max())

    plt.figure(figsize=(6, 6))
    plt.scatter(yt, yp, s=2, alpha=0.25)
    plt.plot([lo, hi], [lo, hi], "r--")
    plt.xlabel("True")
    plt.ylabel("Predicted")
    plt.title(title)
    plt.grid(True)
    plt.tight_layout()
    plt.show()


# -------------------------
# Train + evaluate
# -------------------------
def train_regressor_19_to_28(
    X, Y,
    batch=2048,
    epochs=150,
    lr=1e-3,
    seed=42,
    do_plots=True
):
    """
    X: (N,19)
    Y: (N,28)
    Returns:
      model, (Xte_n, Yte_true, Yte_pred), (x_scaler, y_scaler), history
    """
    X = np.asarray(X)
    Y = np.asarray(Y)
    assert X.ndim == 2 and X.shape[1] == 19, f"X must be (N,19), got {X.shape}"
    assert Y.ndim == 2 and Y.shape[1] == 28, f"Y must be (N,28), got {Y.shape}"

    (Xtr, Ytr), (Xva, Yva), (Xte, Yte) = split_data(X, Y, seed=seed)

    # normalize
    Xtr_n, Ytr_n, x_scaler, y_scaler = normalize_data(Xtr, Ytr)
    Xva_n = x_scaler.transform(Xva)
    Yva_n = y_scaler.transform(Yva)
    Xte_n = x_scaler.transform(Xte)
    Yte_n = y_scaler.transform(Yte)

    # model
    model = build_regressor_19_to_28()
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss="mse",
        metrics=["mae"]
    )

    callbacks = [
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.5, patience=8, min_lr=1e-6
        ),
        keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=20, restore_best_weights=True
        ),
    ]

    history = model.fit(
        Xtr_n, Ytr_n,
        validation_data=(Xva_n, Yva_n),
        batch_size=batch,
        epochs=epochs,
        callbacks=callbacks,
        verbose=1
    )

    # predictions in original scale
    Ypred_n = model.predict(Xte_n, batch_size=8192, verbose=0)
    Ypred = y_scaler.inverse_transform(Ypred_n)
    Ytrue = y_scaler.inverse_transform(Yte_n)

    if do_plots:
        plot_training_history(history)
        plot_true_vs_pred(Ytrue, Ypred, n_points=20000, title="Test set: 28 outputs (flattened)")

    return model, (Xte_n, Ytrue, Ypred), (x_scaler, y_scaler), history




In [17]:
def upper_triangular_part(A, include_diagonal=True):
    """
    Extract the upper triangular part of a 7x7 matrix.

    Parameters
    ----------
    A : array_like, shape (7,7)
        Input matrix.
    include_diagonal : bool
        Whether to include the diagonal entries.

    Returns
    -------
    v : ndarray, shape (28,) if include_diagonal else (21,)
        Upper triangular entries in row-major order.
    """
    A = np.asarray(A)
    assert A.shape == (7, 7), "Input must be a 7x7 matrix"

    if include_diagonal:
        idx = np.triu_indices(7)
    else:
        idx = np.triu_indices(7, k=1)

    return A[idx]

X = np.concatenate([data['link_points'], data['etas'], data['drop_maxs'][:,None], data['drop_ones'][:,None]], axis=1)
G = data["g2_metrics"]        # shape (N, 7, 7)
idx = np.triu_indices(7)     # include_diagonal=True by default
Y = G[:, idx[0], idx[1]]   
X.shape, Y.shape

((200000, 19), (200000, 28))

In [None]:
# -------------------------
# Example usage (you provide X, Y)
# -------------------------
# X = ...  # (N,19)
# Y = ...  # (N,28)
model, (Xte_n, Ytrue, Ypred), scalers, history = train_regressor_19_to_28(X, Y)

Epoch 1/150
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - loss: 0.7370 - mae: 0.5044 - val_loss: 0.5444 - val_mae: 0.3605 - learning_rate: 0.0010
Epoch 2/150
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - loss: 0.5372 - mae: 0.3602 - val_loss: 0.5050 - val_mae: 0.3487 - learning_rate: 0.0010
Epoch 3/150
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - loss: 0.5017 - mae: 0.3492 - val_loss: 0.4747 - val_mae: 0.3422 - learning_rate: 0.0010
Epoch 4/150
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - loss: 0.4610 - mae: 0.3384 - val_loss: 0.4338 - val_mae: 0.3252 - learning_rate: 0.0010
Epoch 5/150
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - loss: 0.4219 - mae: 0.3213 - val_loss: 0.3952 - val_mae: 0.3106 - learning_rate: 0.0010
Epoch 6/150
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - loss: 0.3820 - mae: 0.3073 - val