# Latent Model Classification

Solution author: Asandei Stefan-Alexandru

In [25]:
import argparse
import random
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import RobustScaler

In [26]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

EM_ITERATIONS = 40
L2_REG = 1.2e-4

root_dir = Path("/home/stefan/ioai-prep/kits/aicc/round-0/latent-models")

In [27]:
class PenultimateNet(nn.Module):
    """The neural network architecture up to the penultimate layer."""

    def __init__(self, in_dim: int = 100, hidden: int = 64, out_dim: int = 10):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, out_dim),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)

# Data

In [28]:
def load_data(
    file_path: Path,
) -> tuple[pd.DataFrame, np.ndarray, np.ndarray]:
    """Loads and extracts ID, X, and Y data from the dataset CSV."""
    df = pd.read_csv(file_path)
    x = df.filter(regex=r"^x").values.astype(np.float32)
    y = df.filter(regex=r"^y").values.astype(np.float32)
    return df, x, y

In [29]:
df, X, Y = load_data(root_dir / "dataset.csv")

In [30]:
def generate_embeddings(
    model_path: Path, x_data: np.ndarray, device: torch.device
) -> np.ndarray:
    """Generates embeddings for input data using a pre-trained model."""
    model = PenultimateNet()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    with torch.no_grad():
        x_tensor = torch.from_numpy(x_data).to(device)
        embeddings = model(x_tensor).cpu().numpy()
    return embeddings

In [31]:
ZA = generate_embeddings(root_dir / "modelA_penultimate.pth", X, device)
ZB = generate_embeddings(root_dir / "modelB_penultimate.pth", X, device)

# Solution

In [32]:
def ridge_regression(
    Z: np.ndarray, Y: np.ndarray, weights: np.ndarray, l2: float
) -> tuple[np.ndarray, np.ndarray]:
    """
    Performs weighted ridge regression to find the linear head (W, b).

    Args:
        Z: Input embeddings (N, d).
        Y: Target logits (N, m).
        weights: Sample weights (N,).
        l2: L2 regularization strength.

    Returns:
        A tuple containing the weight matrix W (d, m) and bias vector b (m,).
    """
    w_col = weights.reshape(-1, 1)
    w_sum = w_col.sum()

    mu_z = (w_col * Z).sum(0) / w_sum
    mu_y = (w_col * Y).sum(0) / w_sum

    Zc = Z - mu_z
    Yc = Y - mu_y

    A = Zc.T @ (w_col * Zc) + l2 * np.eye(Z.shape[1])
    B = Zc.T @ (w_col * Yc)
    W = np.linalg.solve(A, B)
    b = mu_y - mu_z @ W
    return W, b


def fit_em_and_predict(
    ZA: np.ndarray, ZB: np.ndarray, Y: np.ndarray, n_iter: int, l2: float
) -> np.ndarray:
    """
    Fits a Gaussian Mixture Model using EM to assign labels.

    Args:
        ZA: Embeddings from model A.
        ZB: Embeddings from model B.
        Y: Observed logits.
        n_iter: Number of EM iterations.
        l2: L2 regularization for the M-step.

    Returns:
        An array of predicted labels (0 for A, 1 for B).
    """
    N, m = Y.shape

    # --- Initialization ---
    # Start by fitting linear heads to the whole dataset
    W_A, b_A = ridge_regression(ZA, Y, np.ones(N), l2)
    W_B, b_B = ridge_regression(ZB, Y, np.ones(N), l2)
    # Initial shared variance
    residuals_A = Y - (ZA @ W_A + b_A)
    sigma2 = float(np.mean(residuals_A**2))

    # --- EM Iterations ---
    for _ in range(n_iter):
        # E-step: Calculate responsibilities (gamma)
        rA = ((Y - (ZA @ W_A + b_A)) ** 2).sum(1)
        rB = ((Y - (ZB @ W_B + b_B)) ** 2).sum(1)

        # Numerically stable softmax for probabilities
        logp = np.stack([-rA / (2 * sigma2), -rB / (2 * sigma2)], axis=1)
        logp -= logp.max(axis=1, keepdims=True)
        gamma = np.exp(logp)
        gamma /= gamma.sum(axis=1, keepdims=True)

        # M-step: Update parameters using weighted regression
        W_A, b_A = ridge_regression(ZA, Y, gamma[:, 0], l2)
        W_B, b_B = ridge_regression(ZB, Y, gamma[:, 1], l2)
        rA_updated = ((Y - (ZA @ W_A + b_A)) ** 2).sum(1)
        rB_updated = ((Y - (ZB @ W_B + b_B)) ** 2).sum(1)
        sigma2 = (gamma[:, 0] @ rA_updated + gamma[:, 1] @ rB_updated) / (m * N)

    return gamma.argmax(axis=1)

In [33]:
labels = fit_em_and_predict(ZA, ZB, Y, EM_ITERATIONS, L2_REG)

# Submission

In [34]:
submission = pd.DataFrame(
    {"ID": df["ID"], "Source": np.where(labels == 0, "A", "B")}
)

submission.head()

Unnamed: 0,ID,Source
0,1,B
1,2,B
2,3,B
3,4,B
4,5,A


In [35]:
submission.to_csv("submission.csv", index=False)