In [14]:
import numpy as np
import torch
from matplotlib import pyplot as plt
import os
from tqdm import tqdm, trange
# import autograd.numpy as np
import pickle
import sys
import config
from rfm import *
from IPython.display import clear_output

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= '0' # specify which GPU(s) to be used
# os.environ['CUDA_LAUNCH_BLOCKING']='1'
 
torch.backends.cudnn.deterministic = True
dtype =	 torch.float
device = torch.device("cuda:0")
torch.manual_seed(42)
np.random.seed(42)

debug = False

# define the early stopping criterion
def early_stopping(validation_losses, epoch):
    i = np.argmin(validation_losses)
    # print(i)
    if epoch - i > 10:
        return True
    else:
        return False

# define the pre-processing function
def pre_process(torchset,n_samples,num_classes=10, normalize=False):
    indices = list(np.random.choice(len(torchset),n_samples))
    trainset = []
    for ix in indices:
        x,y = torchset[ix]
        ohe_y = torch.zeros(num_classes)
        ohe_y[y] = 1
        if normalize:
            trainset.append(((x/np.linalg.norm(x)).reshape(-1),ohe_y))
        else:
            trainset.append((x.reshape(-1),ohe_y))
    return trainset


In [15]:

dataset = np.load(config.resource_configs['Higgs_path'])
print('signal : background =',np.sum(dataset[:,0]),':',dataset.shape[0]-np.sum(dataset[:,0]))
print('signal :',np.sum(dataset[:,0])/dataset.shape[0]*100,'%')
# split into signal and background and move to gpu
dataset = torch.from_numpy(dataset).to(device=device, dtype=dtype)
dataset_P = dataset[dataset[:,0]==0][:, 1:] # background (5170877, 28)
dataset_Q = dataset[dataset[:,0]==1][:, 1:] # signal     (5829122, 28) 
n_org_list = config.train_param_configs['n_tr_list']
repeats = config.train_param_configs['repeats']
n_tr_list = []
for n in n_org_list:
    for i in range(repeats):
        n_tr_list.append(n+i)

batch_size = config.train_param_configs['batch_size']
N_epoch = config.train_param_configs['N_epoch']
checkpoints_path = config.expr_configs['checkpoints_path']
zero_in_gpu = torch.zeros(1).to(device=device, dtype=dtype)
one_in_gpu = torch.ones(1).to(device=device, dtype=dtype)


signal : background = 5829123.0 : 5170877.0
signal : 52.99202727272727 %


In [16]:
n_tr = int(5000)
print('------ n =', n_tr, '------')
# pre-process
trainset = []
for i in range(n_tr):
    trainset.append((dataset_P[i], zero_in_gpu))
    trainset.append((dataset_Q[i], one_in_gpu))
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
validateset = []
for i in range(10000):
    validateset.append((dataset_P[n_tr+i], zero_in_gpu))
    validateset.append((dataset_Q[n_tr+i], one_in_gpu))
test_loader = torch.utils.data.DataLoader(validateset, batch_size=10000, shuffle=False)

------ n = 5000 ------


In [17]:

def euclidean_distances_M(samples, centers, M, squared=True):
    samples_norm = (samples @ M)  * samples
    samples_norm = torch.sum(samples_norm, dim=1, keepdim=True)
    if samples is centers:
        centers_norm = samples_norm
    else:
        centers_norm = (centers @ M) * centers
        centers_norm = torch.sum(centers_norm, dim=1, keepdim=True)
    centers_norm = torch.reshape(centers_norm, (1, -1))
    distances = samples @ (M @ torch.t(centers))
    distances = -2 * distances
    distances = distances + samples_norm
    distances = distances + centers_norm
    if not squared:
        # distances.clamp_(min=0)
        distances = torch.clamp(distances, min=0)
        distances = torch.sqrt(distances)
    return distances

def laplace_kernel_M(samples, centers, bandwidth, M):
    kernel_mat = euclidean_distances_M(samples, centers, M, squared=False) #####
    # kernel_mat.clamp_(min=0)
    # gamma = 1. / bandwidth
    # kernel_mat.mul_(-gamma)
    kernel_mat = - kernel_mat / bandwidth
    # kernel_mat.exp_()
    kernel_mat = torch.exp(kernel_mat)
    return kernel_mat

In [19]:
L = 10
reg = 1e-3
P = torch.eye(28, device=device, dtype=dtype)
P.requires_grad = True
M = P.t() @ P
optimizer = torch.optim.SGD([P], lr=0.001)
torch.autograd.set_detect_anomaly(False)
X_test, y_test = next(iter(test_loader))
loss_list = []
test_loss_list = []
for i in range(501):
    train_loader_iter = iter(train_loader)
    train_loader_iter2 = iter(train_loader)
    for (X_train, y_train), (X_train2, y_train2) in zip(train_loader_iter, train_loader_iter2):
        M = P.t() @ P
        K_train = laplace_kernel_M(X_train, X_train, L, M)
        K_inverse = torch.inverse(K_train + reg * torch.eye(len(K_train), device = device, dtype=dtype))
        # sol = solve(K_train + reg * torch.eye(len(K_train), device = device, dtype=dtype), y_train).T # Find the inverse matrix, alpha in the paper
        # M  = get_grads(X_train, sol, L, M, batch_size=batch_size, device=device, dtype=dtype, verbose=False)
        # validation
        K_train2 = laplace_kernel_M(X_train2, X_train, L, M)
        preds = K_train2 @ K_inverse @ y_train
        loss = torch.mean(torch.square(preds - y_train2))
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        with torch.no_grad():
            K_test = laplace_kernel_M(X_test, X_train, L, M)
            preds_test = K_test @ K_inverse @ y_train
            test_loss = torch.mean(torch.square(preds_test - y_test))
            loss_list.append(loss.item())
            test_loss_list.append(test_loss.item())

            plt.plot(loss_list, label='train')
            plt.plot(test_loss_list, label='test')
            plt.legend()
            plt.show()
            plt.close()
            print(i, X_train.shape)
            print('loss =', loss.item())
            print('test_loss =', test_loss.item())
            count = torch.sum(y_test == (preds_test>0.5)).item()
            print("Round " + str(i) + " Acc: ", count / len(y_test))
            clear_output(wait=True)


KeyboardInterrupt: 