In [1]:
import pickle
import numpy as np

In [2]:
dataset = 'cifar2'
proj_dim = 4096

In [3]:
train_index_path = f'./data/{dataset}/idx-train.pkl'

In [4]:
# Load train index
with open(train_index_path, 'rb')  as handle:
    idx_train = pickle.load(handle)
print("len(idx_train):", len(idx_train))

len(idx_train): 10000


In [5]:
# load lds subset index
mask_array_list = []
for i in range(256):
    with open(f'./data/{dataset}/lds_val/sub-idx-{i}.pkl', 'rb')  as handle:
        sub_idx_train = pickle.load(handle)
    mask_array = np.in1d(idx_train, sub_idx_train)
    mask_array_list.append(mask_array)
lds_mask_array = np.stack(mask_array_list)
print("lds_mask_array.shape:", lds_mask_array.shape)

lds_mask_array.shape: (256, 10000)


In [20]:
# load lds subset model output
loss_array_list = []
for i in range(256):
    seed_loss_list = []
    for seed in [0,1,2]:
        with open(f'./saved/models/{dataset}/lds-val/index-{i}-seed-{seed}/test.pkl', 'rb')  as handle:
            loss_list = pickle.load(handle)
        # -log(p/(1-p))
        margins = np.concatenate(loss_list, axis=-1)
        seed_loss_list.append(margins)  
    loss_array_list.append(np.stack(seed_loss_list))  
lds_loss_array = np.stack(loss_array_list)
print("lds_loss_array.shape:", lds_loss_array.shape)

lds_loss_array.shape: (256, 3, 2000)


In [15]:
# load origin model output
loss_array_list = []
for seed in [0,1,2]:
    with open(f'./saved/models/{dataset}/origin/seed-{seed}/test.pkl', 'rb')  as handle:
        loss_list = pickle.load(handle)
    # -log(p/(1-p))
    loss_array = np.concatenate(loss_list, axis=-1)
    loss_array_list.append(loss_array)
origin_loss_array = np.stack(loss_array_list)
print("origin_loss_array.shape:", origin_loss_array.shape)


origin_loss_array.shape: (3, 2000)


In [10]:
lds_testset_correctness = lds_loss_array.mean(axis=1)
print("lds_testset_correctness.shape:", lds_testset_correctness.shape)

lds_testset_correctness.shape: (256,)


In [12]:
# load grad
train_grad_list = []
for seed in [0,1,2]:
    train_grad_seed = np.memmap(
        f'./saved/grad/{dataset}/seed-{seed}/train-{proj_dim}.npy', 
        dtype=np.float32, 
        mode='r',
        shape=(10000, proj_dim)
    )
    train_grad_list.append(train_grad_seed)
train_grad = np.stack(train_grad_list)
print("train_grad.shape:", train_grad.shape)


dstore_keys.shape: (3, 10000, 4096)


In [14]:
test_grad_list = []
for seed in [0,1,2]:
    test_grad_seed = np.memmap(
        f'./saved/grad/{dataset}/seed-{seed}/test-{proj_dim}.npy', 
        dtype=np.float32, 
        mode='r',
        shape=(2000, proj_dim)
    )
    test_grad_list.append(test_grad_seed)
test_grad = np.stack(test_grad_list)
print("test_grad.shape:", test_grad.shape)


test_grad.shape: (3, 2000, 4096)


In [None]:
# calculate the score
lds_list = []
lamb_list = [


    # 1e-2, 2e-2, 3e-2, 4e-2, 5e-2, 6e-2, 7e-2, 8e-2, 9e-2,
    # 1e-1, 2e-1, 3e-1, 4e-1, 5e-1, 6e-1, 7e-1, 8e-1, 9e-1,
    # 0,
    1e0, 2e0, 3e0, 4e0, 5e0, 6e0, 7e0, 8e0, 9e0,
    1e1, 2e1, 3e1, 4e1, 5e1, 6e1, 7e1, 8e1, 9e1,
    1e2, 2e2, 3e2, 4e2, 5e2, 6e2, 7e2, 8e2, 9e2,
    1e3, 2e3, 3e3, 4e3, 5e3, 6e3, 7e3, 8e3, 9e3,
    1e4, 2e4, 3e4, 4e4, 5e4, 6e4, 7e4, 8e4, 9e4,
    1e5, 2e5, 3e5, 4e5, 5e5, 6e5, 7e5, 8e5, 9e5,
    1e6, 2e6, 3e6, 4e6, 5e6, 6e6, 7e6, 8e6, 9e6,
]
rs_list = []
ps_list = []
best_scores = None
best_lds = -np.inf

In [None]:
kernel = train_grad.T@train_grad