In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from copy import deepcopy

from scipy.stats import ortho_group
from synthetic import shape_means, shape_sigmas

np.random.seed(0)

In [2]:
def get_gaussian_at_alpha(source_means, source_sigmas, target_means, target_sigmas, alpha):
    num_classes = source_means.shape[0]
    class_prob = 1.0 / num_classes
    y = np.argmax(np.random.multinomial(1, [class_prob] * num_classes))
    mean = source_means[y] * (1 - alpha) + target_means[y] * alpha
    sigma = source_sigmas[y] * (1 - alpha) + target_sigmas[y] * alpha
    x = np.random.multivariate_normal(mean, sigma)
    return x, y

In [3]:
d = 2
N_INTER = 8
N_train = 128
N_val = 64
means, var_list = [], []
for i in range(4):
    means.append(np.random.multivariate_normal(np.random.randint(2, size=d), np.eye(d)))
    means[i] = means[i] / np.linalg.norm(means[i])
    # Generate diagonal.
    diag = np.diag(np.random.uniform(0.1, 0.3, size=d))
    rot = ortho_group.rvs(d)
    var = np.matmul(rot, np.matmul(diag, np.linalg.inv(rot)))
    var_list.append(var)

for i in range(1):
    means[3][i] = means[0][i]

source_pairs = [get_gaussian_at_alpha(shape_means(means[:2]), shape_sigmas(var_list[2:], means[:2]), shape_means(means[2:]), shape_sigmas(var_list[2:], means[2:]), 0) for _ in range(N_train)]
source_x_train = np.stack([x for x, y in source_pairs])
source_y_train = np.stack([y for x, y in source_pairs])
source_pairs = [get_gaussian_at_alpha(shape_means(means[:2]), shape_sigmas(var_list[2:], means[:2]), shape_means(means[2:]), shape_sigmas(var_list[2:], means[2:]), 0) for _ in range(N_val)]
source_x_val = np.stack([x for x, y in source_pairs])
source_y_val = np.stack([y for x, y in source_pairs])

target_pairs = [get_gaussian_at_alpha(shape_means(means[:2]), shape_sigmas(var_list[2:], means[:2]), shape_means(means[2:]), shape_sigmas(var_list[2:], means[2:]), 1) for _ in range(N_train)]
target_x_train = np.stack([x for x, y in target_pairs])
target_y_train = np.stack([y for x, y in target_pairs])
target_pairs = [get_gaussian_at_alpha(shape_means(means[:2]), shape_sigmas(var_list[2:], means[:2]), shape_means(means[2:]), shape_sigmas(var_list[2:], means[2:]), 1) for _ in range(N_val)]
target_x_val = np.stack([x for x, y in target_pairs])
target_y_val = np.stack([y for x, y in target_pairs])

alphas = np.linspace(0, 1, N_INTER)
train_x = [source_x_train]
train_y = [source_y_train]
val_x = [source_x_val]
val_y = [source_y_val]
for i, alpha in enumerate(alphas):
    inter_pairs = [get_gaussian_at_alpha(shape_means(means[:2]), shape_sigmas(var_list[2:], means[:2]), shape_means(means[2:]), shape_sigmas(var_list[2:], means[2:]), alpha) for _ in range(N_train)]
    inter_x = np.stack([x for x, y in inter_pairs])
    inter_y = np.stack([y for x, y in inter_pairs])
    train_x.append(inter_x)
    train_y.append(inter_y)

    inter_pairs = [get_gaussian_at_alpha(shape_means(means[:2]), shape_sigmas(var_list[2:], means[:2]), shape_means(means[2:]), shape_sigmas(var_list[2:], means[2:]), alpha) for _ in range(N_val)]
    inter_x = np.stack([x for x, y in inter_pairs])
    inter_y = np.stack([y for x, y in inter_pairs])
    val_x.append(inter_x)
    val_y.append(inter_y)
train_x.append(target_x_train)
train_y.append(target_y_train)
val_x.append(target_x_val)
val_y.append(target_y_val)


In [4]:
class Model:
    def __init__(self, type, train_x, train_y, h_size=None):
        if type == "linear":
            self.model = LogisticRegression(random_state=0).fit(train_x, train_y)
        elif type == "mlp":
            self.model = MLPClassifier(hidden_layer_sizes=h_size, random_state=0, batch_size=8, max_iter=256).fit(train_x, train_y)
    
    def predict(self, x):
        return self.model.predict(x)
    
    def score(self, x, y):
        return self.model.score(x, y)
    
    def fit(self, x, y):
        return self.model.fit(x, y)
    
    def flatten(self):
        if isinstance(self.model, LogisticRegression):
            return deepcopy(np.hstack([self.model.coef_.flatten(), self.model.intercept_.flatten()]))
        elif isinstance(self.model, MLPClassifier):
            return deepcopy(
                np.hstack([
                    np.hstack([p.flatten() for p in self.model.coefs_]), 
                    np.hstack([p.flatten() for p in self.model.intercepts_])
                    ]))
    
    def set_weights(self, w):
        if isinstance(self.model, LogisticRegression):
            self.model.coef_ = w[:self.model.coef_.size].reshape(self.model.coef_.shape)
            self.model.intercept_ = w[self.model.coef_.size:].reshape(self.model.intercept_.shape)
        elif isinstance(self.model, MLPClassifier):
            start = 0
            for i, p in enumerate(self.model.coefs_):
                end = start + p.size
                self.model.coefs_[i] = w[start:end].reshape(self.model.coefs_[i].shape)
                start = end
            
            for i, p in enumerate(self.model.intercepts_):
                end = start + p.size
                self.model.intercepts_[i] = w[start:end].reshape(self.model.intercepts_[i].shape)
                start = end

In [None]:
weights = []
ewa_weights = []
for i, (x_tr, y_tr, x_v, y_v) in enumerate(zip(train_x[:-1], train_y[:-1], val_x[:-1], val_y[:-1])):
    if i == 0:
        clf = Model("linear", x_tr, y_tr)
    else:
        clf = Model("linear", x_tr, y_tr)
        clf.set_weights(weights[-1])
        clf.fit(x_tr, y_tr)
    
    ood_scores = np.array([clf.score(x, y) for x, y in zip(val_x[i + 1:], val_y[i + 1:])])
    print(f"FT   | TIME {i}: ID = {clf.score(x_v, y_v)}; OOD mean = {ood_scores.mean()}; OOD worst = {ood_scores.min()}; OOD next = {clf.score(val_x[i + 1], val_y[i + 1])}")

    if i == 0:
        ewa_weights.append(clf.flatten())
    else:
        ewa_weights.append(0.5 * ewa_weights[-1] + 0.5 * clf.flatten())
    
    ewa_clf = deepcopy(clf)
    ewa_clf.set_weights(ewa_weights[-1])
    ood_scores = np.array([ewa_clf.score(x, y) for x, y in zip(val_x[i + 1:], val_y[i + 1:])])
    print(f"CSAW | TIME {i}: ID = {ewa_clf.score(x_v, y_v)}; OOD mean = {ood_scores.mean()}; OOD worst = {ood_scores.min()}; OOD next = {ewa_clf.score(val_x[i + 1], val_y[i + 1])}")
    print()
    weights.append(clf.flatten())

In [None]:
# Plot the classification boundary using a line
def make_plot(x, y, ws, bs, styles, cmap='bwr'):
    for w, b, style in zip(ws, bs, styles):
        a = -w[0] / w[1]
        xx = np.linspace(x[:, 0].min() - 1, x[:, 0].max() + 1)
        yy = a * xx - b / w[1]
        plt.plot(xx, yy, style)

    # Plot the data points with different colors indicating the labels
    plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap, alpha=0.8)
    plt.show()

In [None]:
from sklearn.manifold import TSNE

In [None]:

X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3).fit_transform(np.stack(train_x).reshape((12 * 200, -1)))
train_x_embedded = [X_embedded[i * 200:(i + 1) * 200] for i in range(12)]

In [None]:
np.save("synthetic_1.npy", np.stack({
    "weights": weights,
    "intercepts": intercepts,
    "ewa_weights": ewa_weights,
    "ewa_intercepts": ewa_intercepts,
    "train_x": train_x,
    "train_y": train_y,
    "val_x": val_x,
    "val_y": val_y,
}))

In [None]:
from sklearn.metrics import log_loss

In [None]:
def calculate_loss_contours(
    w1, w2, w3, val_x, val_y, granularity=20, margin=0.2, model_ids=None, method='erm'
):
    """Runs the loss contour analysis.
    Creates plane based on the parameters of 3 models, and computes loss and accuracy
    contours on that plane. Specifically, computes 2 axes based on the 3 models, and
    computes metrics on points defined by those axes.
    Args:
        model1: Origin of plane.
        model2: Model used to define y axis of plane.
        model3: Model used to define x axis of plane.
        dataloader: Dataloader for the dataset to evaluate on.
        eval_fn: A function that takes a model, a dataloader, and a device, and returns
            a dictionary with two metrics: "loss" and "accuracy".
        device: Device that the model and data should be moved to for evaluation.
        granularity: How many segments to divide each axis into. The model will be
            evaluated at granularity*granularity points.
        margin: How much margin around models to create evaluation plane.
    """
    clf = LogisticRegression(random_state=0)
    # Define x axis
    u = w3 - w1
    dx = np.norm(u).item()
    u /= dx

    # Define y axis
    v = w2 - w1
    v -= np.dot(u, v) * u
    dy = np.norm(v).item()
    v /= dy

    # Define grid representing parameters that will be evaluated.
    coords = np.stack(get_xy(p, w1, u, v) for p in [w1, w2, w3])
    alphas = np.linspace(0.0 - margin, 1.0 + margin, granularity)
    betas = np.linspace(0.0 - margin, 1.0 + margin, granularity)
    losses = np.zeros((granularity, granularity))
    accuracies = np.zeros((granularity, granularity))
    grid = np.zeros((granularity, granularity, 2))

    # Evaluate parameters at every point on grid
    # progress = tqdm(total=granularity * granularity)
    for i, alpha in enumerate(alphas):
        for j, beta in enumerate(betas):
            p = w1 + alpha * dx * u + beta * dy * v
            y_pred = clf.predict_proba(val_x)[:, 1]
            grid[i, j] = [alpha * dx, beta * dy]
            losses[i, j] = loss = log_loss(y_val, y_pred)
            accuracies[i, j] = metrics["accuracy"]
            progress.update()
    progress.close()
    return {
        "grid": grid,
        "coords": coords,
        "losses": losses,
        "accuracies": accuracies,
        "model_ids": model_ids
    }

def get_xy(point, origin, vector_x, vector_y):
    """Return transformed coordinates of a point given parameters defining coordinate
    system.
    Args:
        point: point for which we are calculating coordinates.
        origin: origin of new coordinate system
        vector_x: x axis of new coordinate system
        vector_y: y axis of new coordinate system
    """
    return np.array(
        [
            np.dot(point - origin, vector_x).item(),
            np.dot(point - origin, vector_y).item(),
        ]
    )