In [1]:
import numpy as np
import pandas as pd
import torch

from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

import pdb

from trainer import *
from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
def perform_iterations(iterations, dataset_name, kernel, reg_values, datasets, **kwargs):
    IKM = IterativeKernelModel(dataset_name, kernel=kernel, datasets=datasets)
    results = {}
    for it in range(1, iterations+1):
        print(f"iteration: {it}")
        if it>1:
            yhat, preds = normalize(yhat, preds)
            IKM.D_train = np.diag(yhat.flatten(), k=0)
            IKM.D_test = np.diag(preds.flatten(), k=0)
            IKM.make_kernel_matrices(ind=it-1, kernel=kernel)
            weights = list(np.ones(it)/(it))
            IKM.combine_kernels(weights)
        else:
            weights = list(np.ones(it))
        yhat, preds, res = IKM.solve(reg)
        res["weights"], res["dataset name"] = weights, dataset_name
        results[f"iter {it}"] = res
    return results

In [None]:
dataset_name = "CIFAR2"
labels = [0, 3]
ratio = 1.0
max_iter = 2
file_path = f"./results/{dataset_name}/{"_".join(labels)}/level_{max_iter}_{ratio}"


In [None]:
setup = {"max iterations": max_iter,
         "file_path": file_path,
         "kernel 1": ["rbf", "linear"],
         "kernel 2": ["rbf", "linear"],
         "g 2": ["identity"],
         "mixing 2": [[0.75, 0.25], [0.50, 0.50], [0.25, 0.75], [0.00, 1.00]]
         "log_regs 1": [-2, -1, -0.5, 0, 0.5, 1, 2],
         "log_regs 2": [-2, -1, -0.5, 0, 0.5, 1, 2], 
         }

In [None]:
datasets = load_dataset(dataset_name, ratio=ratio, labels=labels)
IKM = IterativeKernelModel(dataset_name, datasets=datasets)

In [None]:
def log(report, result, **kwargs):
    file_path = kwargs.get("file_path")
    logs = torch.load(file_path)
    logs[report] = result
    torch.save(logs, file_path)

In [1]:
def choose_kernel(IKM, iter_number, report, **kwargs):
    for kernel in setup[f"kernel {i}"]:
        if kernel == "rbf":
            new_report = report + "-> rbf"
            pass
        elif kernel == "linear":
            new_report = report + "-> rbf"
            pass
        if iter_number == 1:
            IKM.set_Ds()
            IKM.make_kernel_matrices(0, kernel, **kwargs)
            cross_validate(IKM, iter_number, new_report, **kwargs)
        else:
            choose_g(IKM, iter_number, new_report, kernel, **kwargs)

In [None]:
def choose_g(IKM, iter_number, report, kernel, **kawrgs):
    yhat, preds = kwargs.get("yhat"), kwargs.get("preds")
    ## Normalize the predictions
    yhat, preds = normalize_preds(yhat, preds)
    for g in setup[f"g {iter_number}"]:
        if g == "identity":
            new_report = report + "-> g identity"
        else:
            pass
        
        IKM.set_Ds(yhat, preds)
        IKM.make_kernel_matrices(ind=iter_number, kernel=kernel)

In [None]:
def mix_kernels(IKM, iter_number, report, **kwargs):
    if iter_number > 1:
        for weights in setup[f"mixing {iter_number}"]:
            new_report = report + f"-> mix ({", ".join(weights)})"
            IKM.combine_kernels(weights)
            cross_validate(IKM, iter_number, new_report, **kwargs)

In [None]:
def cross_validate(IKM, iter_number, report, **kwargs):
    avg_diag = IKM.avg_diag_of_kernel()
    for log_reg_ratio in setup[f"log_regs {iter_number}"]:
        reg = avg_diag * (10**log_reg_ratio)
        new_report = report + f"-> reg {reg}"
        yhat, preds, res = IKM.solve(reg)
        log(new_report, res, **kwargs)
        perform_iteration(IKM, iter_number+1, new_report, yhat=yhat, preds=preds, **kwargs)

In [None]:
def perform_iteration(IKM, iter_number, report, **kwargs):
    max_iter = setup["max iterations"]
    if iter_number < max_iter:
        new_report = report + f"{iter_number}: "
        choose_kernel(IKM, iter_number, new_report, **kwargs)
    print("Done!")

In [4]:
results = {}

In [5]:
dataset_name = "CIFAR2"
kernel = "rbf"
labels = [0, 3]
ratio = 1.0

datasets = load_dataset(dataset_name, ratio=ratio, labels=labels)
results = {}
actions = ["1"]
for reg_1 in 10**np.array([ -1.0, -0.5, 0, 0.5, 1]):
    
    IKM = IterativeKernelModel(dataset_name, kernel=kernel, datasets=datasets)
    IKM.normalize_kernels()
    actions.append(str(reg_1))
    yhat, preds, res = IKM.solve(reg)
    # results[f"var: {var}, reg: {reg}"] = res
    results[reg] = res
    
results[var] = var_results

Train samples: 10000, Test samples: 2000
(10000, 10000) (2000, 10000)
Solving kernel regression with 10000 observations and regularization param 0.010000
iteration took 0.175088 seconds
Training Error is 0.000063
Test Error is 0.629568
Training Accuracy is 1.000000
Test Accuracy is 0.828000
Solving kernel regression with 10000 observations and regularization param 0.100000
iteration took 0.099666 seconds
Training Error is 0.005370
Test Error is 0.637723
Training Accuracy is 1.000000
Test Accuracy is 0.825500
Solving kernel regression with 10000 observations and regularization param 0.316228
iteration took 0.078795 seconds
Training Error is 0.038185
Test Error is 0.655245
Training Accuracy is 1.000000
Test Accuracy is 0.823500
Solving kernel regression with 10000 observations and regularization param 1.000000
iteration took 0.053973 seconds
Training Error is 0.173976
Test Error is 0.696888
Training Accuracy is 0.999700
Test Accuracy is 0.816000
Solving kernel regression with 10000 obser

In [5]:
setups = {}
labels_list = [[3, 0]] #, [3, 5]
for labels in labels_list:
    for ratio in [1]: #0.1, 0.25, 0.5, 0.8, 
        setups[f"labels {labels}, {ratio*100}% data"] = {"dataset_name": "CIFAR2",
                                                         "iterations": 3,
                                                         "reg": 2,
                                                         "labels": labels,
                                                         "ratio": ratio}
        

In [6]:
for case, setup in setups.items():
    print(f"---------------- {case} ----------------")
    dataset_name = setup["dataset_name"]
    datasets = load_dataset(dataset_name, ratio=setup["ratio"], labels=setup["labels"])
    reg, iterations = setup["reg"], setup["iterations"]
    res = {}
    for kernel in ["rbf"]: #, "ntk"
        print(f"------ {kernel} -----")
        res[kernel] = perform_iterations(iterations, dataset_name, kernel, reg, datasets)
    # print("------ NN -----")
    # res["2-layer NN 1000"] = train_test_NN(datasets, epochs=20)
    # results[case] = res
    # torch.save(results, "./res.tr")

---------------- labels [3, 0], 100% data ----------------
------ rbf -----
Train samples: 10000, Test samples: 2000
(10000, 10000) (2000, 10000)
iteration: 1
Solving kernel regression with 10000 observations and regularization param 2.000000
iteration took 0.161237 seconds
Training Error is 0.289569
Test Error is 0.479349
Training Accuracy is 0.943900
Test Accuracy is 0.846000
iteration: 2
(10000, 10000) (2000, 10000)
Solving kernel regression with 10000 observations and regularization param 2.000000
iteration took 0.161886 seconds
Training Error is 0.108074
Test Error is 0.419345
Training Accuracy is 0.988500
Test Accuracy is 0.869500
iteration: 3
(10000, 10000) (2000, 10000)
Solving kernel regression with 10000 observations and regularization param 2.000000
iteration took 0.162341 seconds
Training Error is 0.050902
Test Error is 0.411818
Training Accuracy is 0.999000
Test Accuracy is 0.874500


In [10]:
accuracy_summary = {}

In [19]:
categories = "Cat, Airplane"
labels = [3, 0]
case_summary = {}
for kernel in ["rbf", "ntk"]:
    for it in range(1, 4):
        acc_list = []
        for ratio in [0.1, 0.25, 0.5, 0.8, 1]:
            res = results[f"labels {labels}, {ratio*100}% data"]
            res = res[kernel]
            res = res[f"iter {it}"]
            acc_list.append(res["Test accuracy"])
        case_summary[kernel + f"-{it}"] = acc_list
acc_list = []
for ratio in [0.1, 0.25, 0.5, 0.8, 1]:
    res = results[f"labels {labels}, {ratio*100}% data"]
    res = res["2-layer NN 1000"]
    acc_list.append(res["Test accuracy"])
case_summary["NN"] = acc_list
accuracy_summary[categories] = case_summary

In [21]:
accuracy_summary

{'Cat, Airplane': {'rbf-1': [0.753, 0.7965, 0.823, 0.839, 0.846],
  'rbf-2': [0.7775, 0.8105, 0.853, 0.8585, 0.8695],
  'rbf-3': [0.7785, 0.8095, 0.8525, 0.865, 0.8745],
  'ntk-1': [0.763, 0.803, 0.841, 0.84, 0.8535],
  'ntk-2': [0.762, 0.8025, 0.8415, 0.84, 0.852],
  'ntk-3': [0.7615, 0.8025, 0.8415, 0.84, 0.852],
  'NN': [75.05, 80.0, 83.1, 84.0, 83.05]},
 'Cat, Dog': {'rbf-1': [0.6095, 0.619, 0.628, 0.645, 0.647],
  'rbf-2': [0.6025, 0.6175, 0.6325, 0.6425, 0.649],
  'rbf-3': [0.598, 0.616, 0.6285, 0.6405, 0.647],
  'ntk-1': [0.558, 0.5945, 0.6045, 0.6165, 0.6355],
  'ntk-2': [0.5595, 0.594, 0.604, 0.6175, 0.636],
  'ntk-3': [0.5585, 0.594, 0.6045, 0.6175, 0.636],
  'NN': [57.15,
   58.550000000000004,
   60.050000000000004,
   62.150000000000006,
   60.9]}}

In [25]:
list(map(lambda x: f"{int(100*x)}%", [0.1, 0.25, 0.5, 0.8, 1]))

['10%', '25%', '50%', '80%', '100%']