In [20]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

import torch

from falkon import LogisticFalkon

from utils import read_data, get_data_path, standardize, get_logflk_config, plot_reco

In [2]:
# load data
features = ['t_1','t_2','t_3','t_4','theta'] #,'n_hits']
data_key = "Thr 75%" #["Thr 75%","Thr 50%","Thr 25%","Ca 75%","Ca 50%","Ca 25%"]

reference_path = get_data_path("Ref")
data_keys = ["Thr 75%"] #["Ref","Thr 75%","Thr 50%","Thr 25%","Ca 75%","Ca 50%","Ca 25%"]

reference = read_data(reference_path,features=features,
                      rnd=np.random.default_rng(0))
data = read_data(get_data_path(data_key),features=features,
                        rnd=np.random.default_rng(1))


In [3]:
# data parameters
N0=2000
N1=500
N = N0+N1
weight=N1/N0

ref_weights = weight*np.ones((N0,1))
data_weights = np.ones((N1,1))

In [5]:
# initialize dataset
X = np.zeros(shape=(N,len(features)))
X[:N0,:] = np.random.default_rng().choice(reference,size=N0,replace=False)
X[N0:N0+N1,:] = np.random.default_rng().choice(data,size=N1,replace=False)

Xnorm = standardize(X)

Y = np.zeros(shape=(N,1))
Y[N0:,:] = np.ones((N1,1))

Xtorch = torch.from_numpy(Xnorm)
Ytorch = torch.from_numpy(Y)

In [25]:
# hyperparameters
M = 2500
flk_sigma = 2
lam = 1e-10

flk_config = get_logflk_config(M,flk_sigma,[lam],weight=weight,iter=[100000],seed=None,cpu=False)
model = LogisticFalkon(**flk_config)

In [26]:
# train, predict and compute loglikelihood-ratio
st_time = time.time()
model.fit(Xtorch, Ytorch)
dt = round(time.time()-st_time,2)
preds = model.predict(Xtorch).numpy()
t = 2 * np.sum(preds[Y.flatten()==1])

print(f"t = {t}, training time = {dt} sec")

Iteration 0 - penalty 1.000000e-10 - sub-iterations 100000
t = 875.3184722200967, training time = 180.19 sec
