# Testing speed of the tree algorithm

# Implement a simple network

In [1]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

sys.path.append('../')

from src_experiment import NeuralNet
from geobin import RegionTree, TreeNode

In [2]:
hidden_sizes = [3,3,3,3]
num_classes = 2
net = NeuralNet(
    input_size = 2,
    hidden_sizes = hidden_sizes,
    num_classes = num_classes,
)
state_dict = net.state_dict()

In [55]:
state_dict

OrderedDict([('l1.weight',
              tensor([[ 0.3904, -0.3482],
                      [ 0.0645,  0.2520],
                      [-0.5539,  0.2152]])),
             ('l1.bias', tensor([ 0.6127, -0.2192,  0.5396])),
             ('l2.weight',
              tensor([[ 0.3706,  0.5680, -0.3667],
                      [ 0.4503,  0.2940, -0.1641],
                      [-0.2588,  0.0394,  0.0509]])),
             ('l2.bias', tensor([ 0.5687,  0.3363, -0.4780])),
             ('l3.weight',
              tensor([[-0.1251,  0.1612, -0.1366],
                      [-0.2574, -0.2062,  0.2257],
                      [-0.2012, -0.4189,  0.1741]])),
             ('l3.bias', tensor([ 0.3748, -0.4890,  0.1762])),
             ('l4.weight',
              tensor([[ 0.4694, -0.0697,  0.0697],
                      [-0.4948,  0.1845, -0.3198],
                      [-0.3617,  0.2146, -0.4510]])),
             ('l4.bias', tensor([-0.2737,  0.1438, -0.3408])),
             ('l5.weight',
              te

In [3]:
# Without feasibility check
start = time.time()
tree = RegionTree(state_dict)
tree.build_tree(verbose=True, check_feasibility=False)
end = time.time()
time_taken = end-start


Building tree...


Layer 1: 100%|██████████| 8/8 [00:00<00:00, 533.86it/s]
Layer 2: 100%|██████████| 8/8 [00:00<00:00, 765.80it/s]
Layer 3: 100%|██████████| 8/8 [00:00<00:00, 321.18it/s]
Layer 4: 100%|██████████| 8/8 [00:00<00:00, 94.50it/s]
Layer 5: 100%|██████████| 4/4 [00:00<00:00, 10.97it/s]


In [4]:
print(f"Hidden layers: {hidden_sizes}")
print(f"Output: {num_classes}")
theoretical_max = 2**np.cumsum(np.append(np.array(hidden_sizes), num_classes))
print(f"Theoretical max size:    {(np.append(1, theoretical_max))}")
print(f"Actual size of tree:         {np.array(tree.size)}")


Hidden layers: [3, 3, 3, 3]
Output: 2
Theoretical max size:    [    1     8    64   512  4096 16384]
Actual size of tree:         [    1     8    64   512  4096 16384]


In [5]:
def check_speed_comparison(hidden_sizes, num_classes, check=False):
    net = NeuralNet(
    input_size = 2,
    hidden_sizes = hidden_sizes,
    num_classes = num_classes,
    )
    state_dict = net.state_dict()
    
    # Standard way
    start = time.time()
    tree = RegionTree(state_dict)
    tree.build_tree(verbose=False, check_feasibility=check)
    end = time.time()
    time_taken = end-start

    print("--------------------------------------")
    print(f"Checking feasibility: {str(check)}")
    print(f"Hidden layers: {hidden_sizes}")
    print(f"Output: {num_classes}")
    theoretical_max = 2**np.cumsum(np.append(np.array(hidden_sizes), num_classes))
    print(f"Theoretical max size:   {(np.append(1, theoretical_max))}")
    print(f"Actual size of tree:    {np.array(tree.size)}")
    print(f"Time taken: {time_taken:.3f}s")
    print("--------------------------------------")

    
def compare_equal_configs(hidden_sizes, num_classes):
    check_speed_comparison(hidden_sizes, num_classes, check=False)
    check_speed_comparison(hidden_sizes, num_classes, check=True)

In [6]:
compare_equal_configs(
    hidden_sizes=[7,5,3],
    num_classes=1
)

--------------------------------------
Checking feasibility: False
Hidden layers: [7, 5, 3]
Output: 1
Theoretical max size:   [    1   128  4096 32768 65536]
Actual size of tree:    [    1   128  4096 32768 65536]
Time taken: 1.782s
--------------------------------------
--------------------------------------
Checking feasibility: True
Hidden layers: [7, 5, 3]
Output: 1
Theoretical max size:   [    1   128  4096 32768 65536]
Actual size of tree:    [  1 128 928 600 238]
Time taken: 1.287s
--------------------------------------


In [7]:
check_speed_comparison(
    hidden_sizes=[5,5,5],
    num_classes=1,
    check=True
)

--------------------------------------
Checking feasibility: True
Hidden layers: [5, 5, 5]
Output: 1
Theoretical max size:   [    1    32  1024 32768 65536]
Actual size of tree:    [   1   32  512 2080  170]
Time taken: 1.815s
--------------------------------------


## Testing speed of dataloaders

In [50]:
from src_experiment import moon_path, wbc_path, get_moons_data, get_wbc_data
import torch
_, data = get_wbc_data(label_noise=0.0, batch_size=32)

# test params
experiment = "wbc"
arch = "decreasing"
dropout = 0.0
noise = 0.0
run_number = 0
path_fn = moon_path if experiment == "moon" else wbc_path
data_fn = get_moons_data if experiment == "moon" else get_wbc_data

# Load the state dicts
epochs = [0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,74]
state_dicts = {epoch: path_fn(arch, dropout, noise, run_number) /"state_dicts"/f"epoch{epoch}.pth" for epoch in epochs}

In [51]:
def check_speed_comparison_real(check=False):

    # Standard way
    start = time.time()
    state_dict = torch.load(state_dicts[0])
    tree = RegionTree(state_dict)
    tree.build_tree(verbose=False, check_feasibility=check)
    intermediate = time.time()
    
    tree.pass_dataloader_through_tree(data)
    tree.collect_number_counts()
    counts = tree.get_number_counts()
    
    end = time.time()
    time_taken = end-start

    print("----------Build and passing----------")
    print(f"Checking feasibility: {str(check)}")
    # print(f"Hidden layers: {hidden_sizes}")
    # print(f"Output: {num_classes}")
    # # theoretical_max = 2**np.cumsum(np.append(np.array(hidden_sizes), num_classes))
    # print(f"Theoretical max size:   {(np.append(1, theoretical_max))}")
    # print(f"Actual size of tree:    {np.array(tree.size)}")
    print(f"Total time taken:    {time_taken:.3f} s")
    print(f"Time building:       {intermediate-start:.3f} s")
    print(f"Time passing:        {end-intermediate:.3f} s")
    print("--------------------------------------")

In [54]:
check_speed_comparison_real(
    check=False,
)

----------Build and passing----------
Checking feasibility: False
Total time taken:    0.242 s
Time building:       0.221 s
Time passing:        0.021 s
--------------------------------------


In [28]:
compare_equal_configs(
    hidden_sizes=[7,5,3],
    num_classes=2
)

----------Build and passing----------
Checking feasibility: False
Hidden layers: [7, 5, 3]
Output: 2
Theoretical max size:   [     1    128   4096  32768 131072]
Actual size of tree:    [     1    128   4096  32768 131072]
Total time taken:    3.485 s
Time building:       3.113 s
Time passing:        0.372 s
--------------------------------------
----------Build and passing----------
Checking feasibility: True
Hidden layers: [7, 5, 3]
Output: 2
Theoretical max size:   [     1    128   4096  32768 131072]
Actual size of tree:    [  1 128 928 600 476]
Total time taken:    1.478 s
Time building:       1.466 s
Time passing:        0.012 s
--------------------------------------
