# Model training stability analysis

### Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Import required libraries

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from datetime import date
from itertools import product
import os
import torchvision.models as tmodels
from functools import partial
import collections

### Define functions

Below code reused from https://colab.research.google.com/github/google-research/google-research/blob/master/representation_similarity/Demo.ipynb

"Similarity of Neural Network Representations Revisited"
Simon Kornblith, Mohammad Norouzi, Honglak Lee, Geoffrey Hinton
https://arxiv.org/abs/1905.00414

In [None]:
def gram_linear(x):
  """Compute Gram (kernel) matrix for a linear kernel.

  Args:
    x: A num_examples x num_features matrix of features.

  Returns:
    A num_examples x num_examples Gram matrix of examples.
  """
  return x.dot(x.T)


def gram_rbf(x, threshold=1.0):
  """Compute Gram (kernel) matrix for an RBF kernel.

  Args:
    x: A num_examples x num_features matrix of features.
    threshold: Fraction of median Euclidean distance to use as RBF kernel
      bandwidth. (This is the heuristic we use in the paper. There are other
      possible ways to set the bandwidth; we didn't try them.)

  Returns:
    A num_examples x num_examples Gram matrix of examples.
  """
  dot_products = x.dot(x.T)
  sq_norms = np.diag(dot_products)
  sq_distances = -2 * dot_products + sq_norms[:, None] + sq_norms[None, :]
  sq_median_distance = np.median(sq_distances)
  return np.exp(-sq_distances / (2 * threshold ** 2 * sq_median_distance))


def center_gram(gram, unbiased=False):
  """Center a symmetric Gram matrix.

  This is equvialent to centering the (possibly infinite-dimensional) features
  induced by the kernel before computing the Gram matrix.

  Args:
    gram: A num_examples x num_examples symmetric matrix.
    unbiased: Whether to adjust the Gram matrix in order to compute an unbiased
      estimate of HSIC. Note that this estimator may be negative.

  Returns:
    A symmetric matrix with centered columns and rows.
  """
  if not np.allclose(gram, gram.T):
    raise ValueError('Input must be a symmetric matrix.')
  gram = gram.copy()

  if unbiased:
    # This formulation of the U-statistic, from Szekely, G. J., & Rizzo, M.
    # L. (2014). Partial distance correlation with methods for dissimilarities.
    # The Annals of Statistics, 42(6), 2382-2412, seems to be more numerically
    # stable than the alternative from Song et al. (2007).
    n = gram.shape[0]
    np.fill_diagonal(gram, 0)
    means = np.sum(gram, 0, dtype=np.float64) / (n - 2)
    means -= np.sum(means) / (2 * (n - 1))
    gram -= means[:, None]
    gram -= means[None, :]
    np.fill_diagonal(gram, 0)
  else:
    means = np.mean(gram, 0, dtype=np.float64)
    means -= np.mean(means) / 2
    gram -= means[:, None]
    gram -= means[None, :]

  return gram


def cka(gram_x, gram_y, debiased=False):
  """Compute CKA.

  Args:
    gram_x: A num_examples x num_examples Gram matrix.
    gram_y: A num_examples x num_examples Gram matrix.
    debiased: Use unbiased estimator of HSIC. CKA may still be biased.

  Returns:
    The value of CKA between X and Y.
  """
  gram_x = center_gram(gram_x, unbiased=debiased)
  gram_y = center_gram(gram_y, unbiased=debiased)

  # Note: To obtain HSIC, this should be divided by (n-1)**2 (biased variant) or
  # n*(n-3) (unbiased variant), but this cancels for CKA.
  scaled_hsic = gram_x.ravel().dot(gram_y.ravel())

  normalization_x = np.linalg.norm(gram_x)
  normalization_y = np.linalg.norm(gram_y)
  return scaled_hsic / (normalization_x * normalization_y)


def _debiased_dot_product_similarity_helper(
    xty, sum_squared_rows_x, sum_squared_rows_y, squared_norm_x, squared_norm_y,
    n):
  """Helper for computing debiased dot product similarity (i.e. linear HSIC)."""
  # This formula can be derived by manipulating the unbiased estimator from
  # Song et al. (2007).
  return (
      xty - n / (n - 2.) * sum_squared_rows_x.dot(sum_squared_rows_y)
      + squared_norm_x * squared_norm_y / ((n - 1) * (n - 2)))


def feature_space_linear_cka(features_x, features_y, debiased=False):
  """Compute CKA with a linear kernel, in feature space.

  This is typically faster than computing the Gram matrix when there are fewer
  features than examples.

  Args:
    features_x: A num_examples x num_features matrix of features.
    features_y: A num_examples x num_features matrix of features.
    debiased: Use unbiased estimator of dot product similarity. CKA may still be
      biased. Note that this estimator may be negative.

  Returns:
    The value of CKA between X and Y.
  """
  features_x = features_x - np.mean(features_x, 0, keepdims=True)
  features_y = features_y - np.mean(features_y, 0, keepdims=True)

  dot_product_similarity = np.linalg.norm(features_x.T.dot(features_y)) ** 2
  normalization_x = np.linalg.norm(features_x.T.dot(features_x))
  normalization_y = np.linalg.norm(features_y.T.dot(features_y))

  if debiased:
    n = features_x.shape[0]
    # Equivalent to np.sum(features_x ** 2, 1) but avoids an intermediate array.
    sum_squared_rows_x = np.einsum('ij,ij->i', features_x, features_x)
    sum_squared_rows_y = np.einsum('ij,ij->i', features_y, features_y)
    squared_norm_x = np.sum(sum_squared_rows_x)
    squared_norm_y = np.sum(sum_squared_rows_y)

    dot_product_similarity = _debiased_dot_product_similarity_helper(
        dot_product_similarity, sum_squared_rows_x, sum_squared_rows_y,
        squared_norm_x, squared_norm_y, n)
    normalization_x = np.sqrt(_debiased_dot_product_similarity_helper(
        normalization_x ** 2, sum_squared_rows_x, sum_squared_rows_x,
        squared_norm_x, squared_norm_x, n))
    normalization_y = np.sqrt(_debiased_dot_product_similarity_helper(
        normalization_y ** 2, sum_squared_rows_y, sum_squared_rows_y,
        squared_norm_y, squared_norm_y, n))

  return dot_product_similarity / (normalization_x * normalization_y)

### Prepare data

In [None]:
norm_param_dataset_ref = "AVG"
dataset_name = "TEST_0_FINAL"

In [None]:
# Retrieve normalisation parameters 

norm_param_df = pd.read_csv('/content/drive/MyDrive/KASHIKO/DATASET/TRG_DATASET_NORM_PARAM.csv')

meanR = norm_param_df.loc[norm_param_df["Dataset"] == str(norm_param_dataset_ref), "meanR"].item()
meanG = norm_param_df.loc[norm_param_df["Dataset"] == str(norm_param_dataset_ref), "meanG"].item()
meanB = norm_param_df.loc[norm_param_df["Dataset"] == str(norm_param_dataset_ref), "meanB"].item()

stdR = norm_param_df.loc[norm_param_df["Dataset"] == str(norm_param_dataset_ref), "stdR"].item()
stdG = norm_param_df.loc[norm_param_df["Dataset"] == str(norm_param_dataset_ref), "stdG"].item()
stdB = norm_param_df.loc[norm_param_df["Dataset"] == str(norm_param_dataset_ref), "stdB"].item()

In [None]:
dataset = datasets.ImageFolder(
    '/content/drive/MyDrive/KASHIKO/DATASET/' + dataset_name,
    transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((meanR, meanG, meanB), (stdR, stdG, stdB))
    ])
)
_, short_dataset = torch.utils.data.random_split(dataset, [len(dataset) - 10, 10])
_, long_dataset = torch.utils.data.random_split(dataset, [len(dataset) - 100, 100])
_, extra_long_dataset = torch.utils.data.random_split(dataset, [len(dataset) - 2000, 2000])

### Load models

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 12, 5)
        self.bn1 = nn.BatchNorm2d(12)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(12, 24, 5)
        self.bn2 = nn.BatchNorm2d(24)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(24*53*53, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)
    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = x.view(-1,24*53*53)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
net0 = Net()
net1 = Net()
net2 = Net()
net3 = Net()

In [None]:
state_dict1 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/testmodel_2021-05-28_05:59:14_ trg_dataset1 batch_size=100 learning_rate=0.001 scheduler_step_size=5 scheduler_gamma=1 weight_decay=0 epoch_number=14 accuracy=97.77070063694268.pth')
state_dict2 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/testmodel_2021-05-28_07:34:20_ trg_dataset1 batch_size=100 learning_rate=0.001 scheduler_step_size=5 scheduler_gamma=1 weight_decay=0 epoch_number=11 accuracy=98.51380042462846.pth')
state_dict3 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/testmodel_2021-05-28_08:37:05_ trg_dataset1 batch_size=100 learning_rate=0.001 scheduler_step_size=5 scheduler_gamma=1 weight_decay=0 epoch_number=14 accuracy=98.19532908704883.pth')


In [None]:
net1.load_state_dict(state_dict1)
net2.load_state_dict(state_dict2)
net3.load_state_dict(state_dict3)

### Generate activation file for each model

In [None]:
# Prepare loader for short_dataset
loader = torch.utils.data.DataLoader(
        short_dataset,
        batch_size=10,
        shuffle=False,
        num_workers=2,
        drop_last=True)

In [None]:
# For net0 (with short_dataset)
activations0 = collections.defaultdict(list)
def save_activation0(name, mod, inp, out0):
    activations0[name].append(out0.cpu())
    
for name, m in net0.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation0, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation0, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation0, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net0.eval()
        out0 = net0(images)

activations0 = {name: torch.cat(outputs, 0) for name, outputs in activations0.items()}
torch.save(activations0,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations0_short.pt')

In [None]:
# For net1 (with short_dataset)
activations1 = collections.defaultdict(list)
def save_activation1(name, mod, inp, out1):
    activations1[name].append(out1.cpu())
    
for name, m in net1.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation1, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation1, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation1, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net1.eval()
        out1 = net1(images)

activations1 = {name: torch.cat(outputs, 0) for name, outputs in activations1.items()}
torch.save(activations1,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations1_short.pt')

In [None]:
# For net2 (with short_dataset)
activations2 = collections.defaultdict(list)
def save_activation2(name, mod, inp, out2):
    activations2[name].append(out2.cpu())
    
for name, m in net2.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation2, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation2, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation2, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net2.eval()
        out2 = net2(images)

activations2 = {name: torch.cat(outputs, 0) for name, outputs in activations2.items()}
torch.save(activations2,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations2_short.pt')

In [None]:
# For net3 (with short_dataset)
activations3 = collections.defaultdict(list)
def save_activation3(name, mod, inp, out3):
    activations3[name].append(out3.cpu())
    
for name, m in net3.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation3, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation3, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation3, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net3.eval()
        out3 = net3(images)

activations3 = {name: torch.cat(outputs, 0) for name, outputs in activations3.items()}
torch.save(activations3,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations3_short.pt')

In [None]:
# Prepare loader for long_dataset
loader = torch.utils.data.DataLoader(
        long_dataset,
        batch_size=10,
        shuffle=False,
        num_workers=2,
        drop_last=True)

In [None]:
# Reload models
net0 = Net()
net1 = Net()
net2 = Net()
net3 = Net()
net1.load_state_dict(state_dict1)
net2.load_state_dict(state_dict2)
net3.load_state_dict(state_dict3)

In [None]:
# For net0 (with long_dataset)
activations0 = collections.defaultdict(list)
def save_activation0(name, mod, inp, out0):
    activations0[name].append(out0.cpu())
    
for name, m in net0.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation0, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation0, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation0, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net0.eval()
        out0 = net0(images)

activations0 = {name: torch.cat(outputs, 0) for name, outputs in activations0.items()}
torch.save(activations0,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations0_long.pt')

In [None]:
# For net1 (with long_dataset)
activations1 = collections.defaultdict(list)
def save_activation1(name, mod, inp, out1):
    activations1[name].append(out1.cpu())
    
for name, m in net1.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation1, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation1, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation1, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net1.eval()
        out1 = net1(images)

activations1 = {name: torch.cat(outputs, 0) for name, outputs in activations1.items()}
torch.save(activations1,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations1_long.pt')

In [None]:
# For net2 (with long_dataset)
activations2 = collections.defaultdict(list)
def save_activation2(name, mod, inp, out2):
    activations2[name].append(out2.cpu())
    
for name, m in net2.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation2, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation2, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation2, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net2.eval()
        out2 = net2(images)

activations2 = {name: torch.cat(outputs, 0) for name, outputs in activations2.items()}
torch.save(activations2,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations2_long.pt')

In [None]:
# For net3 (with long_dataset)
activations3 = collections.defaultdict(list)
def save_activation3(name, mod, inp, out3):
    activations3[name].append(out3.cpu())
    
for name, m in net3.named_modules():
    if type(m)==nn.Conv2d:
        m.register_forward_hook(partial(save_activation3, name))
    elif type(m)==nn.Linear:
        m.register_forward_hook(partial(save_activation3, name))
    elif type(m)==nn.BatchNorm2d:
        m.register_forward_hook(partial(save_activation3, name))
        
# Forward pass of the full dataset
with torch.no_grad():
    for images, labels in loader:
        net3.eval()
        out3 = net3(images)

activations3 = {name: torch.cat(outputs, 0) for name, outputs in activations3.items()}
torch.save(activations3,'/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations3_long.pt')

### Perform CKA analysis on Conv and BatchNorm layers (using short_dataset)

In [None]:
# Retrieve stored activations
ACT0 = collections.defaultdict(list)
ACT0 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations0_short.pt')

ACT1 = collections.defaultdict(list)
ACT1 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations1_short.pt')

ACT2 = collections.defaultdict(list)
ACT2 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations2_short.pt')

ACT3 = collections.defaultdict(list)
ACT3 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations3_short.pt')

In [None]:
# Conv1
net0_conv1_activations = ACT0['conv1']
net1_conv1_activations = ACT1['conv1']
net2_conv1_activations = ACT2['conv1']
net3_conv1_activations = ACT3['conv1']

avg_acts0 = np.mean(net0_conv1_activations.numpy(), axis=(1,2))
avg_acts1 = np.mean(net1_conv1_activations.numpy(), axis=(1,2))
avg_acts2 = np.mean(net2_conv1_activations.numpy(), axis=(1,2))
avg_acts3 = np.mean(net3_conv1_activations.numpy(), axis=(1,2))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts0))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts0)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts0, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts0), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts0, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts2))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts2)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts2, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts2), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts2, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts2), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts2, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts2, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts2), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts2, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
# Conv2
net0_conv2_activations = ACT0['conv2']
net1_conv2_activations = ACT1['conv2']
net2_conv2_activations = ACT2['conv2']
net3_conv2_activations = ACT3['conv2']

avg_acts0 = np.mean(net0_conv2_activations.numpy(), axis=(1,2))
avg_acts1 = np.mean(net1_conv2_activations.numpy(), axis=(1,2))
avg_acts2 = np.mean(net2_conv2_activations.numpy(), axis=(1,2))
avg_acts3 = np.mean(net3_conv2_activations.numpy(), axis=(1,2))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts0))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts0)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts0, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts0), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts0, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts2))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts2)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts2, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts2), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts2, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts2), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts2, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts2, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts2), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts2, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
# BN1
net0_bn1_activations = ACT0['bn1']
net1_bn1_activations = ACT1['bn1']
net2_bn1_activations = ACT2['bn1']
net3_bn1_activations = ACT3['bn1']

avg_acts0 = np.mean(net0_bn1_activations.numpy(), axis=(1,2))
avg_acts1 = np.mean(net1_bn1_activations.numpy(), axis=(1,2))
avg_acts2 = np.mean(net2_bn1_activations.numpy(), axis=(1,2))
avg_acts3 = np.mean(net3_bn1_activations.numpy(), axis=(1,2))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts0))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts0)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts0, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts0), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts0, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts2))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts2)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts2, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts2), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts2, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts2), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts2, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts2, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts2), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts2, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
# BN2
net0_bn2_activations = ACT0['bn2']
net1_bn2_activations = ACT1['bn2']
net2_bn2_activations = ACT2['bn2']
net3_bn2_activations = ACT3['bn2']

avg_acts0 = np.mean(net0_bn2_activations.numpy(), axis=(1,2))
avg_acts1 = np.mean(net1_bn2_activations.numpy(), axis=(1,2))
avg_acts2 = np.mean(net2_bn2_activations.numpy(), axis=(1,2))
avg_acts3 = np.mean(net3_bn2_activations.numpy(), axis=(1,2))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts0))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts0)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts0, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts0), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts0, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts2))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts2)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts2, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts2), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts2, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts1), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts1, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts1, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts1), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts1, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(avg_acts2), gram_linear(avg_acts3))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(avg_acts2, avg_acts3)
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(avg_acts2, 0.3), gram_rbf(avg_acts3, 0.3))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(avg_acts2), gram_linear(avg_acts3), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(avg_acts2, avg_acts3, debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

### CKA analysis for Fully Connected Layers (using long_dataset)

In [None]:
# Retrieve stored activations
ACT0 = collections.defaultdict(list)
ACT0 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations0_long.pt')

ACT1 = collections.defaultdict(list)
ACT1 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations1_long.pt')

ACT2 = collections.defaultdict(list)
ACT2 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations2_long.pt')

ACT3 = collections.defaultdict(list)
ACT3 = torch.load('/content/drive/MyDrive/KASHIKO/MODELS/stability_analysis_activations3_long.pt')

In [None]:
net0_fc1_activations = ACT0['fc1']
net1_fc1_activations = ACT1['fc1']
net2_fc1_activations = ACT2['fc1']
net3_fc1_activations = ACT3['fc1']

In [None]:
cka_from_examples = cka(gram_linear(net1_fc1_activations.numpy()), gram_linear(net0_fc1_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc1_activations.numpy(), net0_fc1_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc1_activations.numpy(), 0.5), gram_rbf(net0_fc1_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc1_activations.numpy()), gram_linear(net0_fc1_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc1_activations.numpy(), net0_fc1_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net1_fc1_activations.numpy()), gram_linear(net2_fc1_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc1_activations.numpy(), net2_fc1_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc1_activations.numpy(), 0.5), gram_rbf(net2_fc1_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc1_activations.numpy()), gram_linear(net2_fc1_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc1_activations.numpy(), net2_fc1_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net1_fc1_activations.numpy()), gram_linear(net3_fc1_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc1_activations.numpy(), net3_fc1_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc1_activations.numpy(), 0.5), gram_rbf(net3_fc1_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc1_activations.numpy()), gram_linear(net3_fc1_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc1_activations.numpy(), net3_fc1_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net2_fc1_activations.numpy()), gram_linear(net3_fc1_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net2_fc1_activations.numpy(), net3_fc1_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net2_fc1_activations.numpy(), 0.5), gram_rbf(net3_fc1_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net2_fc1_activations.numpy()), gram_linear(net3_fc1_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net2_fc1_activations.numpy(), net3_fc1_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
net0_fc2_activations = ACT0['fc2']
net1_fc2_activations = ACT1['fc2']
net2_fc2_activations = ACT2['fc2']
net3_fc2_activations = ACT3['fc2']

In [None]:
cka_from_examples = cka(gram_linear(net1_fc2_activations.numpy()), gram_linear(net0_fc2_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc2_activations.numpy(), net0_fc2_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc2_activations.numpy(), 0.5), gram_rbf(net0_fc2_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc2_activations.numpy()), gram_linear(net0_fc2_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc2_activations.numpy(), net0_fc2_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net1_fc2_activations.numpy()), gram_linear(net2_fc2_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc2_activations.numpy(), net2_fc2_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc2_activations.numpy(), 0.5), gram_rbf(net2_fc2_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc2_activations.numpy()), gram_linear(net2_fc2_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc2_activations.numpy(), net2_fc2_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net1_fc2_activations.numpy()), gram_linear(net3_fc2_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc2_activations.numpy(), net3_fc2_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc2_activations.numpy(), 0.5), gram_rbf(net3_fc2_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc2_activations.numpy()), gram_linear(net3_fc2_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc2_activations.numpy(), net3_fc2_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net2_fc2_activations.numpy()), gram_linear(net3_fc2_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net2_fc2_activations.numpy(), net3_fc2_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net2_fc2_activations.numpy(), 0.5), gram_rbf(net3_fc2_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net2_fc2_activations.numpy()), gram_linear(net3_fc2_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net2_fc2_activations.numpy(), net3_fc2_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
net0_fc3_activations = ACT0['fc3']
net1_fc3_activations = ACT1['fc3']
net2_fc3_activations = ACT2['fc3']
net3_fc3_activations = ACT3['fc3']

In [None]:
cka_from_examples = cka(gram_linear(net1_fc3_activations.numpy()), gram_linear(net0_fc3_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc3_activations.numpy(), net0_fc3_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc3_activations.numpy(), 0.5), gram_rbf(net0_fc3_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc3_activations.numpy()), gram_linear(net0_fc3_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc3_activations.numpy(), net0_fc3_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net1_fc3_activations.numpy()), gram_linear(net2_fc3_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc3_activations.numpy(), net2_fc3_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc3_activations.numpy(), 0.5), gram_rbf(net2_fc3_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc3_activations.numpy()), gram_linear(net2_fc3_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc3_activations.numpy(), net2_fc3_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net1_fc3_activations.numpy()), gram_linear(net3_fc3_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net1_fc3_activations.numpy(), net3_fc3_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net1_fc3_activations.numpy(), 0.5), gram_rbf(net3_fc3_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net1_fc3_activations.numpy()), gram_linear(net3_fc3_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net1_fc3_activations.numpy(), net3_fc3_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

In [None]:
cka_from_examples = cka(gram_linear(net2_fc3_activations.numpy()), gram_linear(net3_fc3_activations.numpy()))
print('Linear CKA from Examples: {:.5f}'.format(cka_from_examples))

cka_from_features = feature_space_linear_cka(net2_fc3_activations.numpy(), net3_fc3_activations.numpy())
print('Linear CKA from Features: {:.5f}'.format(cka_from_features))

rbf_cka = cka(gram_rbf(net2_fc3_activations.numpy(), 0.5), gram_rbf(net3_fc3_activations.numpy(), 0.5))
print('RBF CKA: {:.5f}'.format(rbf_cka))

cka_from_examples_debiased = cka(gram_linear(net2_fc3_activations.numpy()), gram_linear(net3_fc3_activations.numpy()), debiased=True)
print('Linear CKA from Examples (Debiased): {:.5f}'.format(cka_from_examples_debiased))

cka_from_features_debiased = feature_space_linear_cka(net2_fc3_activations.numpy(), net3_fc3_activations.numpy(), debiased=True)
print('Linear CKA from Features (Debiased): {:.5f}'.format(cka_from_features_debiased))

### Perform Results Comparison

In [None]:
# Prepare loader for long_dataset
loader = torch.utils.data.DataLoader(
        extra_long_dataset,
        batch_size=10,
        shuffle=False,
        num_workers=2,
        drop_last=True)

In [None]:
# Reload models
net1 = Net()
net2 = Net()
net3 = Net()
net1.load_state_dict(state_dict1)
net2.load_state_dict(state_dict2)
net3.load_state_dict(state_dict3)

In [None]:
total = 0
same12 = 0
same13 = 0
same23 = 0
correct1 = 0
correct2 = 0
correct3 = 0

m = nn.Softmax(dim=1)
with torch.no_grad():
    for images, labels in loader:
        net1.eval()
        out1 = net1(images)
        net2.eval()
        out2 = net2(images)
        net3.eval()
        out3 = net3(images)

        _, predicted1 = torch.max(out1.data, 1)
        _, predicted2 = torch.max(out2.data, 1)
        _, predicted3 = torch.max(out3.data, 1)

        total += labels.size(0)
        correct1 += (predicted1 == labels).sum().item()
        correct2 += (predicted2 == labels).sum().item()
        correct3 += (predicted3 == labels).sum().item()

        same12 += (predicted1 == predicted2).sum().item()
        same13 += (predicted1 == predicted3).sum().item()
        same23 += (predicted3 == predicted2).sum().item()
        
test_accuracy1 = 100 * correct1 / total
test_accuracy2 = 100 * correct2 / total
test_accuracy3 = 100 * correct3 / total

same12_accuracy = 100 * same12 / total
same13_accuracy = 100 * same13 / total
same23_accuracy = 100 * same23 / total


print('Accuracy Net1: {:.5f}'.format(test_accuracy1))
print('Accuracy Net2: {:.5f}'.format(test_accuracy2))
print('Accuracy Net3: {:.5f}'.format(test_accuracy3))

print('Consistency Net1 vs Net2: {:.5f}'.format(same12_accuracy))
print('Consistency Net1 vs Net3: {:.5f}'.format(same13_accuracy))
print('Consistency Net2 vs Net3: {:.5f}'.format(same23_accuracy))