In [6]:
from itertools import product, combinations
import matplotlib.pyplot as plt
import numpy as np
from src import k_tree

# from src.utils import data as dt
import torch
from src.k_tree import Ktree
from src.utils.data import loadData, loadData_3d
from src.metrics import Linf_simple

# from src import ellipses as el
# from . import ellipses

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data = np.load("./data/ellipses/10000el_1_3.npy", allow_pickle=True)
data = np.array([data[i].ellipse.to_vector() for i in range(len(data))])

# make data torch
if type(data) is not torch.Tensor:
    data = torch.tensor(data).to(device)
n = 100  # get randomly 5000 data
data = data[torch.randperm(data.size(0))[:n]]
x_min = data[:, 2].min().item()
x_max = data[:, 2].max().item()
y_min = data[:, 3].min().item()
y_max = data[:, 3].max().item()
a_max = data[:, 0].max().item()
b_max = data[:, 1].max().item()
max_ab = max(a_max, b_max)
ab_norm = max((x_max - x_min), (y_max - y_min))
bbox = torch.tensor([x_min - max_ab, x_max + max_ab, y_min - max_ab, y_max + max_ab])
print(bbox)
data[:, 0] /= ab_norm
data[:, 1] /= ab_norm
data[:, 2] = (data[:, 2] - bbox[0]) / (bbox[1] - bbox[0])
data[:, 3] = (data[:, 3] - bbox[2]) / (bbox[3] - bbox[2])
x_min = data[:, 2].min().item()
x_max = data[:, 2].max().item()
y_min = data[:, 3].min().item()
y_max = data[:, 3].max().item()
a_max = data[:, 0].max().item()
b_max = data[:, 1].max().item()
max_ab = max(a_max, b_max)
ab_norm = max((x_max - x_min), (y_max - y_min))
bbox = torch.tensor([x_min - max_ab, x_max + max_ab, y_min - max_ab, y_max + max_ab])
print(bbox)

tensor([  36.2173, 3145.0347,   15.4830, 3115.6570])
tensor([-1.8616e-06,  1.0000e+00,  8.2548e-07,  1.0000e+00])


In [7]:
import torch

# from src.k_tree_poly import Ktree as Ktree_pls
from src.k_tree_poly_copy import Ktree as Ktree
from src.utils.objects.squares import loadData as loadSquares
from src.utils import plot_tools as pt
from src.utils import accuracy as acc

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dim = 2  # space dimension

k = 3  # number of centroids to generate in the Clustering model
clustering_args = {
    "epochs": 10,  # number of epochs
    "pre_processing": 10,
    "number_of_centroids": k,  # number of centroids to generate in the Clustering model
    "dimension": dim,  # space dimension
    "object_id": "ellipses",  # object id
}
# n = 50
n = 30
un_args = {
    "N": n,  # number of points to sample
    "M": n**2 - 1,  # number of points to return
    "epsilon": 0.2,  # the epsilon ball. Increase this to get more points (as var increases)
    # .15
}
critic_args = {
    "optimizer_lr": 5e-3,  # optimiser learning rate
    "epochs": 200,  # number of epochs
    "width": 300,  # width of the model's linear layers
    "depth": 5,  # depth of the model's linear layers
}

# threshold = 100 - 1
# threshold = 3 * k
threshold = 99
# threshold = 998
# threshold = 98

# Initialise the k-tree structure.
from src.metrics import distance_ellipse_2_point

# pass data to the k-tree
distance_function = distance_ellipse_2_point
ktree = Ktree(
    threshold,
    data,
    distance_function,
    clustering_args,
    un_args,
    critic_args,
    device,
    dim,
)
# pass data to device
# data = data.to(device)
print("Starting to create the tree...")
print("=" * 20)
# ktree.create_tree(save_path_prefix="./models/line_segments/2d/5k/", plot=False)
ktree.create_tree()

Starting to create the tree...

Creating critic for node 0 that has 100 data, which is more than the threshold 99.
Bounding box for node 0: [[-1.8615601629013356e-06, 1.0000018415418612], [4.333250286628473e-06, 0.999995662712708]]
Creating clustering for node 0 with 3 centroids.
Initial divergence: 26.46194839477539
Starting iteration 1
Centroids:  tensor([[0.1029, 0.1702],
        [0.9135, 0.3122],
        [0.2707, 0.6595]], device='cuda:0')
Iteration 1, divergence: 0.26461946964263916
Starting iteration 2
Centroids:  tensor([[0.2017, 0.1973],
        [0.7943, 0.3572],
        [0.3994, 0.7200]], device='cuda:0')
Iteration 2, divergence: 0.2438584864139557
Starting iteration 3
Centroids:  tensor([[0.2057, 0.2309],
        [0.7772, 0.2903],
        [0.4597, 0.7750]], device='cuda:0')
Iteration 3, divergence: 0.23236531019210815
Starting iteration 4
Centroids:  tensor([[0.1963, 0.2437],
        [0.7574, 0.2031],
        [0.5072, 0.7783]], device='cuda:0')
Iteration 4, divergence: 0.2234

In [8]:
# Report some tree stats.
leaves = ktree.get_leaves()
number_of_nodes = ktree.number_of_nodes

height = max([len(leaf.index) for leaf in leaves])
print(f"Tree height is {height}.")

leaf_sizes = [len(leaf.data) for leaf in leaves]
print(f"Created {len(leaves)} leaves with sizes")
print(leaf_sizes)
print(number_of_nodes)

Tree height is 2.
Created 3 leaves with sizes
[27, 29, 44]
4


In [10]:
acc.random_queries(ktree, 100, 1, 1)

The number of queries per layer are:
[100.]
The percentage of correct predictions per layer is:
[99.]
The mean percentage of correct predictions per layer is:
[99.]
