In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
import os
import math
import random
from itertools import product
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms

# Import MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/UKP/mnist_experiments/datasets/',
                                           download=True,
                                        train=True,
                                        transform=transforms.ToTensor())

test_dataset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/UKP/mnist_experiments/datasets/',
                                           download=True,
                                        transform=transforms.ToTensor())



In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
device

In [None]:
# Generate matrix of all train data images
train_data = []
train_data_n = 20000
for i in range(train_data_n):
    train_data.append(train_dataset[i][0].reshape(-1, 28*28))
train_data = torch.vstack(train_data).to(device)

num_data = len(train_dataset)

In [None]:
width_subset = 100*np.arange(1, 11)
depth_subset = np.arange(1, 11)

# number of samples in each representation
n = 20000

subset = "train"
mode = "eval"

reps_folder = f"/content/drive/MyDrive/UKP-experiments/mnist_experiments/reps/{subset}/{n}_{mode}"
if not os.path.exists(reps_folder):
    os.makedirs(reps_folder)

model_names = []
modeldir = "/content/drive/MyDrive/UKP/mnist_experiments/models/widthdepth/"
filenames = np.sort(os.listdir(modeldir))

reps_saved = np.zeros(len(filenames), dtype=bool)
if os.path.exists(f"{reps_folder}/reps_saved.npy"):
    reps_saved = np.load(f"{reps_folder}/reps_saved.npy")
    print(f"{np.sum(reps_saved)} existing reps", flush=True)

i = 0
for filename in filenames:
    name = filename.split(".")[0]
    splits = name.split('_')
    width = int(splits[0][5:])
    depth = int(splits[1][5:])
    if width in width_subset and depth in depth_subset and not reps_saved[i]:
        model_names.append(name)
    i = i + 1
model_names = np.sort(model_names)
total_models = len(model_names)



In [None]:
i=0
for model_name in model_names:
    print(f"Computing representation for {model_name} based on {subset} data", flush=True)

    model_path = f'{modeldir}{model_name}.pth'
    model = torch.load(model_path).to(device)
    model_lastlayer = model[0:-1]
    rep = model_lastlayer(train_data).detach().cpu().numpy().T

    np.save(f"{reps_folder}/{model_name}_rep.npy", rep)

    del model
    del model_lastlayer
    del rep
    torch.cuda.empty_cache()

    reps_saved[i] = True
    np.save(f"{reps_folder}/reps_saved.npy", reps_saved)
    i = i + 1

In [None]:
# Generate matrix of all test data images
test_data = []
test_data_n = 10000
for i in range(test_data_n):
    test_data.append(test_dataset[i][0].reshape(-1, 28*28))
test_data = torch.vstack(test_data).to(device)

In [None]:
# number of samples in each representation
n = test_data_n

subset = "test"
mode = "eval"

reps_folder = f"/content/drive/MyDrive/UKP/mnist_experiments/reps/{subset}/{n}_{mode}"
if not os.path.exists(reps_folder):
    os.makedirs(reps_folder)

In [None]:
width_subset = 100*np.arange(1, 11)
depth_subset = np.arange(1, 11)

model_names = []
modeldir = "/content/drive/MyDrive/UKP/mnist_experiments/models/widthdepth/"
filenames = np.sort(os.listdir(modeldir))

reps_saved = np.zeros(len(filenames), dtype=bool)
if os.path.exists(f"{reps_folder}/reps_saved.npy"):
    reps_saved = np.load(f"{reps_folder}/reps_saved.npy")
    print(f"{np.sum(reps_saved)} existing reps", flush=True)

i = 0
for filename in filenames:
    name = filename.split(".")[0]
    splits = name.split('_')
    width = int(splits[0][5:])
    depth = int(splits[1][5:])
    if width in width_subset and depth in depth_subset and not reps_saved[i]:
        model_names.append(name)
    i = i + 1
model_names = np.sort(model_names)
total_models = len(model_names)


In [None]:
i=0
for model_name in model_names:
    print(f"Computing representation for {model_name} based on {subset} data", flush=True)

    model_path = f'{modeldir}{model_name}.pth'
    model = torch.load(model_path).to(device)
    model_lastlayer = model[0:-1]
    rep = model_lastlayer(test_data).detach().cpu().numpy().T

    np.save(f"{reps_folder}/{model_name}_rep.npy", rep)

    del model
    del model_lastlayer
    del rep
    torch.cuda.empty_cache()

    reps_saved[i] = True
    np.save(f"{reps_folder}/reps_saved.npy", reps_saved)
    i = i + 1