In [6]:
import sys
import torch
import numpy as np
import pickle as pkl
from tqdm import tqdm

sys.path.append('../../../')

from experiments.assumptions.degeneracy.script import eigenvalue_result, eigenvalue_results_large, plot_rank_train, rank_over_training
from models.supervised.mlp.model import MLP
from models.supervised.bimt.model import BioMLP



In [2]:
np.random.seed(2)
torch.manual_seed(2)

<torch._C.Generator at 0x7fd0803284b0>

In [6]:
mode = "moon"
model_name = "mlp"
size = "2_wide"
models_path = f"../../../models/supervised/{model_name}/saved_models"
if size == "overfit":
	with open(f'{models_path}/2_wide/{mode}/dataset.pkl', 'rb') as f:
		dataset = pkl.load(f)
else:
	with open(f'{models_path}/{size}/{mode}/dataset.pkl', 'rb') as f:
		dataset = pkl.load(f)

for size in ["mixup_vanilla", "vanilla",  "overfit", "2_wide"]:
	if size == "overfit":
		epochs = [60, 80, 199, 9999, 999, 5]
	else:
		epochs = [5, 60, 80, 199]
	for epoch in epochs:
		if size == "vanilla" or size == "mixup_vanilla":
			model = MLP(2,7,10,2)
		elif size == "overfit":
			model = MLP(2,7,2,1)
		elif size == "2_wide":
			model = MLP(2,7,2,2)
		print(f"Size: {size}, Epoch: {epoch}")
		model.eval()
		model.load_state_dict(torch.load(f'{models_path}/{size}/{mode}/model_{epoch}.pth'))
		if size == "overfit":
			model.num_layers -= 1
			model.layers = model.layers[:-1]
		save_path = f"figures/{model_name}/{mode}/{size}/{epoch}/"
		eigenvalue_result(dataset.X, model, N=50, labels=dataset.y, wrt="output_wise", sigma=0.05, precision=7, save_path=save_path)
		eigenvalue_result(dataset.X, model, N=50, labels=dataset.y, wrt="layer_wise", sigma=0.05, precision=7, save_path=save_path)

Size: vanilla, Epoch: 5
Size: vanilla, Epoch: 60
Size: vanilla, Epoch: 80
Size: vanilla, Epoch: 199


In [None]:
mode = "blobs"
model_name = "mlp"
size = "vanilla"
with open(f'{models_path}/{size}/{mode}/dataset.pkl', 'rb') as f:
	dataset = pkl.load(f)
epochs = [5, 60, 80, 199]

for size in ["vanilla"]:
	for epoch in epochs:
		if size == "vanilla":
			model = MLP(2,7,4,4)
		print(f"Size: {size}, Epoch: {epoch}")
		model.eval()
		model.load_state_dict(torch.load(f'{models_path}/{size}/{mode}/model_{epoch}.pth'))
		save_path = f"figures/{model_name}/{mode}/{size}/{epoch}/"
		eigenvalue_result(dataset.X, model, N=50, labels=dataset.y, wrt="output_wise", sigma=0.05, precision=7, save_path=save_path)
		eigenvalue_result(dataset.X, model, N=50, labels=dataset.y, wrt="layer_wise", sigma=0.05, precision=7, save_path=save_path)

In [4]:
mode="moon"
model_name = "bimt"
size = "vanilla"

model = BioMLP(shp=[2,20,20,2])
models_path = f"../../../models/supervised/{model_name}/saved_models"

res_q_25, res_med, res_q_75 = [], [], []
with open(f'{models_path}/{size}/{mode}/dataset.pkl', 'rb') as f:
	dataset = pkl.load(f)

for epoch in tqdm(range(0, 10000, 100)):
	model.load_state_dict(torch.load(f'{models_path}/{size}/{mode}/model_{epoch}.pth'))
	model.eval()
	save_path = f"figures/{model_name}/{mode}/{size}/{epoch}/"
	
	q_25, med, q_75 = eigenvalue_results_large(dataset.X, model, N=50, wrt="output_wise", sigma=0.05, precision=7, save_path=save_path)
	res_q_25.append(q_25)
	res_med.append(med)
	res_q_75.append(q_75)


	

100%|██████████| 100/100 [02:32<00:00,  1.52s/it]


In [None]:
plot_rank_train(res_q_25, res_med, res_q_75, savepath=f"figures/{model_name}/{mode}/{size}/")


In [7]:
mode="moon"
model_name = "mlp"
size = "overfit"


models_path = f"../../../models/supervised/{model_name}/saved_models"

res_q_25, res_med, res_q_75 = [], [], []
with open(f'{models_path}/2_wide/{mode}/dataset.pkl', 'rb') as f:
	dataset = pkl.load(f)

for epoch in tqdm(range(0, 10000, 100)):
	model = MLP(2,7,2,1)

	model.load_state_dict(torch.load(f'{models_path}/{size}/{mode}/model_{epoch}.pth'))
	if size == "overfit":
		model.num_layers -= 1
		model.layers = model.layers[:-1]
	model.eval()
	save_path = f"figures/{model_name}/{mode}/{size}/{epoch}/"
	
	q_25, med, q_75 = rank_over_training(dataset.X, model, N=50, wrt="output_wise", sigma=0.05, precision=7)
	res_q_25.append(q_25)
	res_med.append(med)
	res_q_75.append(q_75)

100%|██████████| 100/100 [00:35<00:00,  2.81it/s]


In [8]:
plot_rank_train(res_q_25, res_med, res_q_75, savepath=f"figures/{model_name}/{mode}/{size}/")


In [8]:
from models.data.mnist import MNISTDataset
from models.supervised.cnn.model import CNN
import torch.nn as nn
import os
from torch.utils.data import DataLoader
from torch.utils.data import Subset

dataset = MNISTDataset(train=True, root="../../../data")
batch_size = 128
N_batches = batch_size//8
subset_mnist = np.random.randint(0, len(dataset), batch_size**2//8)
random_subset = Subset(dataset, subset_mnist)

val_data = DataLoader(random_subset, batch_size=batch_size, shuffle=False)
mode="moon"
model_name = "cnn"
size = "vanilla"

cnn_layers = [(1, 16, 3, 1), (16, 32, 3, 1)]
fc_layers = [(32 * 24 * 24, 128, nn.ReLU()), (128, 64, nn.ReLU())]
output_dim = 10

model = CNN(cnn_layers, fc_layers, output_dim)
models_path = f"../../../models/supervised/{model_name}/saved_models"

res_q_25, res_med, res_q_75 = [], [], []


tmp = os.listdir(f"{models_path}/{size}")

epochs = []
for i in tmp:
    if i[-3:] == "pth":
        epochs.append(int(i.split('_')[1].split('.')[0]))
epochs = sorted(epochs)

In [12]:
for epoch in tqdm(epochs):
	model.load_state_dict(torch.load(f'{models_path}/{size}/model_{epoch}.pth'))
	model.eval()
	save_path = f"figures/{model_name}/{size}/{epoch}/"
	q_25, med, q_75 = [], [], []
	for X, y in val_data:
		q_25_tmp, med_tmp, q_75_tmp = eigenvalue_results_large(X.detach().numpy(), model, N=50, wrt="output_wise", sigma=0.05, precision=7, save_path=save_path, sampling="heat")
		q_25 += q_25_tmp
		med += med_tmp
		q_75 += q_75_tmp

	res_q_25.append(q_25)
	res_med.append(med)
	res_q_75.append(q_75)


  0%|          | 0/301 [00:00<?, ?it/s]

(128, 1, 28, 28)


  0%|          | 0/301 [00:00<?, ?it/s]

Using 11 nearest neighbors
(2500, 28, 28)
torch.Size([2500, 28, 28])





RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[1, 2500, 28, 28] to have 1 channels, but got 2500 channels instead

In [4]:
points = np.random.randn(128, 1, 28, 28)
points = sample_points_heat_kernel(points, num_samples=100, t=0.1, connect_components=1)
points.shape

Using 11 nearest neighbors




(100, 28, 28)