In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import math
import pandas as pd
import itertools
import copy
from sklearn.cluster import DBSCAN

In [3]:
from random import choice

In [4]:
np.random.seed(42)

In [630]:
a = ["ads", "asdf", "dsfagsfg", "asgd"]

[

# Constants and Data Loading

In [7]:
np.set_printoptions(precision=2)
np.set_printoptions(suppress=True)

In [8]:
max_number_of_tracks = 232
max_number_of_tracks_power_2 = 256
max_number_of_tracks_log_2 = 8
batch_size = 50
eps = 0.15

z0_file = "/media/lucas/QS/binaries-trk/OldKF_TTbar_170K_quality-1-trk-z0.bin"
pt_file = "/media/lucas/QS/binaries-trk/OldKF_TTbar_170K_quality-1-trk-pt.bin"
z0 = np.fromfile(z0_file, dtype=np.float32)
pt = np.fromfile(pt_file, dtype=np.float32)

### Class Initialization

In [165]:
class BatchedDBSCAN:
    def __init__(
        self,
        z0,
        pt,
        eps,
        batch_size,
        max_number_of_tracks,
        verbose: bool = False,
        save_intermediate: bool = False,
    ):

        self.eps = eps
        self.batch_size = batch_size
        self.verbose = verbose
        self.save_intermediate = save_intermediate
        self.z0_boundary = 21  # 21 cm is outside the detector acceptance
        self.pt_boundary = 0  # 0 pT won't contribute to the pT sum.
        self.minPts = 2  # This algorithm only works for a minimum number of 2 points

        self.max_number_of_tracks = int(max_number_of_tracks)
        self.n_batches = math.ceil(self.max_number_of_tracks / self.batch_size)

        # Max number of tracks including all batches
        self.max_n_tracks_batched = self.batch_size * self.n_batches
        self.max_n_clusters_batch = math.ceil(self.batch_size / self.minPts)
        self.max_n_clusters = math.ceil(self.max_n_tracks_batched / self.minPts)

        # Need to pad vectors to the max_number_of_tracks allowed so that it matches the fpga input
        n_pad = self.max_number_of_tracks - z0.shape[0]
        # if verbose:
        # print("original number of tracks: ", z0.shape)
        self.z0 = self.pad_vector(z0, n_pad, self.z0_boundary)
        self.pt = self.pad_vector(pt, n_pad, self.pt_boundary)

        # These are needed for the prefix sum
        self.max_number_of_tracks_power_2 = (
            1 << (self.max_number_of_tracks - 1).bit_length()
        )
        self.batch_size_power_2 = 1 << (self.batch_size - 1).bit_length()
        self.max_number_of_tracks_log_2 = np.log2(self.max_number_of_tracks_power_2)
        self.batch_size_log_2 = np.log2(self.batch_size_power_2)
        # self.n_batches = math.ceil(self.max_number_of_tracks / self.batch_size)
        self.results = {}
        self.results_sklearn = {}
        self.merged_list = []

    def pad_vector(self, vec, n_pad, value):
        """pads vector to a set size with given value"""

        vec_to_pad = value * np.ones(n_pad)
        vec = np.append(vec, vec_to_pad)

        return vec

    def build_tracks(self, z0, pt):
        """Builds tracks batchess"""

        # Shape is determined by the size of batch, z0, pT and label (not used atm)
        track_batch = np.zeros((self.batch_size, 3))

        track_batch[:, 0] = z0
        track_batch[:, 1] = pt

        # sort the tracks by z0
        track_batch = track_batch[track_batch[:, 0].argsort()]

        return track_batch

### Prefix sum

In [166]:
def prefix_sum(self, arr):
    """
    Calculates the prefix sum of pT.
    Warning, requires array to be of size thats log base of 2.
    """
    size_log2 = int(np.log2(arr.shape[0]))

    # up-sweep
    for d in range(0, size_log2, 1):
        step_size = 2**d
        double_step_size = step_size * 2

        for i in range(0, arr.shape[0], double_step_size):
            arr[i + double_step_size - 1] += arr[i + step_size - 1]

    # down-sweep
    arr[arr.shape[0] - 1] = 0
    d = size_log2 - 1

    while d >= 0:
        step_size = 2**d
        double_step_size = step_size * 2
        for i in range(0, arr.shape[0], double_step_size):
            tmp = arr[i + step_size - 1]
            arr[i + step_size - 1] = arr[i + double_step_size - 1]
            arr[i + double_step_size - 1] += tmp
        d -= 1

    return arr


BatchedDBSCAN.prefix_sum = prefix_sum

### left boundaries

In [167]:
def find_left_boundaries(self, tracks):

    left_boundaries = np.zeros(self.batch_size, dtype=bool)

    # first value is always a left boundary
    left_boundaries[0] = 1

    for i in range(1, self.batch_size):
        _t = tracks[i]

        if _t[0] - tracks[i - 1][0] > self.eps:
            tracks[i][2] = -1
            left_boundaries[i] = 1
        else:
            left_boundaries[i] = 0

    self.left_boundaries = left_boundaries
    return left_boundaries


BatchedDBSCAN.find_left_boundaries = find_left_boundaries

### right boundaries

In [267]:
def find_right_boundaries(self, left_boundaries, rs, tracks):

    max_tracks = self.batch_size

    boundaries = np.zeros((max_tracks, 7))
    is_noise = np.zeros((max_tracks, 1))

    for i in range(max_tracks - 1):

        left_edge = left_boundaries[i] and not (left_boundaries[i + 1])  # 1, 0
        right_edge = not (left_boundaries[i]) and left_boundaries[i + 1]  # 0, 1
        check_noise = (left_boundaries[i] == 1) and (left_boundaries[i + 1] == 1)

        if left_edge or right_edge:
            boundaries[i][0] = i
            boundaries[i][1] = rs[i]
            boundaries[i][2] = rs[i + 1]
            boundaries[i][3] = rs[i + 1] - rs[i]
            boundaries[i][4] = tracks[i, 0]
            boundaries[i][5] = tracks[i + 1, 0]
        elif check_noise:
            boundaries[i][0] = i
            boundaries[i][1] = rs[i]
            boundaries[i][2] = rs[i + 1]
            boundaries[i][3] = rs[i + 1] - rs[i]
            boundaries[i][4] = tracks[i, 0]
            boundaries[i][5] = tracks[i, 0]
            boundaries[i][6] = 1
            is_noise[i] = 1
        else:
            boundaries[i][0] = max_tracks
            boundaries[i][1] = 0
            boundaries[i][2] = 0
            boundaries[i][3] = 0
            boundaries[i][4] = 21
            boundaries[i][5] = 21

    # Check for the last boundary
    if left_boundaries[max_tracks - 1]:
        boundaries[max_tracks - 1][0] = max_tracks
        boundaries[max_tracks - 1][1] = 0
        boundaries[max_tracks - 1][2] = 0
        boundaries[max_tracks - 1][3] = 0
        boundaries[max_tracks - 1][4] = 21
        boundaries[max_tracks - 1][5] = 21
    else:
        boundaries[max_tracks - 1][0] = max_tracks - 1
        boundaries[max_tracks - 1][1] = rs[max_tracks - 1]
        boundaries[max_tracks - 1][2] = rs[max_tracks]
        boundaries[max_tracks - 1][3] = rs[max_tracks] - rs[max_tracks - 1]
        boundaries[max_tracks - 1][4] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][5] = tracks[max_tracks - 1, 0]

    # boundaries[:, 6] = is_noise
    # Sort boundaries by the index
    sort_idx = boundaries[:, 0].argsort()
    boundaries = boundaries[sort_idx]
    is_noise = is_noise[sort_idx]
    self.is_noise = is_noise

    return boundaries


BatchedDBSCAN.find_right_boundaries = find_right_boundaries

### Convert boundaries to cluster

In [169]:
# def convert_boundaries_to_clusters(self, boundaries: np.array) -> np.array:
#     n_boundaries = boundaries.shape[0]
#     n_clusters = math.ceil(n_boundaries / 2)
#     clusters = np.zeros((n_clusters, 6))
#     j = 0
#     for i in range(0, n_boundaries, 2):
#         pt_low = boundaries[i, 1]
#         pt_high = boundaries[i + 1, 2]
#         pt_sum = pt_high - pt_low
#         z0_low = boundaries[i, 4]
#         z0_high = boundaries[i + 1, 5]

#         clusters[j, 3] = pt_sum
#         clusters[j, 4] = z0_low
#         clusters[j, 5] = z0_high
#         j += 1
#     return clusters

In [186]:
def convert_boundaries_to_clusters(self, boundaries: np.array) -> np.array:
    n_boundaries = boundaries.shape[0]
    n_clusters = math.ceil(n_boundaries / 2)
    clusters = np.zeros((n_boundaries, 4))
    j = 0
    i = 0
    while i < n_boundaries:
        check_noise = boundaries[i, -1] == 1
        if check_noise:

            pt_low = boundaries[i, 1]
            pt_high = boundaries[i, 2]
            pt_sum = pt_high - pt_low
            z0_low = boundaries[i, 4]
            z0_high = boundaries[i, 5]

            clusters[j, self.pt_idx] = pt_sum
            clusters[j, self.z0_low_idx] = z0_low
            clusters[j, self.z0_high_idx] = z0_high
            clusters[j, self.noise_idx] = 1
            j += 1
            i += 1
        else:
            pt_low = boundaries[i, 1]
            pt_high = boundaries[i + 1, 2]
            pt_sum = pt_high - pt_low
            z0_low = boundaries[i, 4]
            z0_high = boundaries[i + 1, 4]

            clusters[j, self.pt_idx] = pt_sum
            clusters[j, self.z0_low_idx] = z0_low
            clusters[j, self.z0_high_idx] = z0_high
            j += 1
            i += 2
    return clusters


BatchedDBSCAN.convert_boundaries_to_clusters = convert_boundaries_to_clusters

### get vertex

In [171]:
def get_vertex(self, cluster_of_tracks: np.array) -> float:
    """
    Calculates the median z0 of the cluster of tracks
    """

    n_size = cluster_of_tracks.shape[0]

    if n_size % 2 == 0:
        return 0.5 * (
            cluster_of_tracks[n_size // 2] + cluster_of_tracks[n_size // 2 - 1]
        )
    else:
        return cluster_of_tracks[n_size // 2]


BatchedDBSCAN.get_vertex = get_vertex

### merge clusters

In [172]:
# def merge_clusters(self, clusters: np.array) -> np.array:

#     n_clusters = clusters.shape[0]
#     if self.n_batches == 1:
#         self.max_pt_i = np.argmax(clusters[:, 3])
#         self.max_pt = clusters[self.max_pt_i, 3]
#         self.merge_count = 0
#         return clusters

#     else:
#         max_pt = 0
#         max_pt_i = 0
#         merge_count = 0

#         comb = list(itertools.combinations(range(n_clusters), 2))

#         for i, j in comb:
#             if clusters[i, 4] >= 21:
#                 continue

#             if max_pt < clusters[i, 3]:
#                 max_pt = clusters[i, 3]
#                 max_pt_i = i

#             if clusters[j, 4] >= 21:
#                 continue

#             case1 = (clusters[i, 4] - self.eps) <= clusters[j, 5]
#             case2 = (clusters[i, 5] + self.eps) >= clusters[j, 4]

#             if case1 and case2:
#                 c1 = copy.copy(clusters[i, :])
#                 c2 = copy.copy(clusters[j, :])

#                 merge_count += 1
#                 # Expand boundaries of cluster after merging
#                 if clusters[j, 4] < clusters[i, 4]:
#                     clusters[i, 4] = clusters[j, 4]
#                 if clusters[j, 5] > clusters[i, 5]:
#                     clusters[i, 5] = clusters[j, 5]
#                 clusters[i, 3] += clusters[j, 3]
#                 clusters[i, 2] += clusters[j, 2]
#                 if max_pt < clusters[i, 3]:
#                     max_pt = clusters[i, 3]
#                     max_pt_i = i

#                 clusters[j, 3] = 0
#                 clusters[j, 4] = 21
#                 clusters[j, 5] = 21

#                 print(
#                     f"""merging cluster [{round(c1[4],2), round(c1[5],2), round(c1[3],2)}] and [{round(c2[4],2), round(c2[5],2), round(c2[3],2)}] --> [{round(clusters[i,4],2), round(clusters[i,5], 2), round(clusters[i,3],2)}]"""
#                 )

#         self.max_pt = max_pt
#         self.max_pt_i = max_pt_i
#         self.merge_count = merge_count
#         return clusters


# BatchedDBSCAN.merge_clusters = merge_clusters

In [173]:
def merge_clusters(self, clusters: np.array) -> np.array:

    n_clusters = clusters.shape[0]
    if self.n_batches == 1:
        self.max_pt_i = np.argmax(clusters[:, self.pt_idx])
        self.max_pt = clusters[self.max_pt_i, self.pt_idx]
        self.merge_count = 0
        return clusters

    else:
        max_pt = 0
        max_pt_i = 0
        merge_count = 0

        comb = list(itertools.combinations(range(n_clusters), 2))

        for i, j in comb:
            if clusters[i, self.z0_low_idx] >= 21:
                continue

            if max_pt < clusters[i, self.pt_idx]:
                max_pt = clusters[i, self.pt_idx]
                max_pt_i = i

            if clusters[j, self.z0_low_idx] >= 21:
                continue

            case1 = (clusters[i, self.z0_low_idx] - self.eps) <= clusters[
                j, self.z0_high_idx
            ]
            case2 = (clusters[i, self.z0_high_idx] + self.eps) >= clusters[
                j, self.z0_low_idx
            ]

            if case1 and case2:
                c1 = copy.copy(clusters[i, :])
                c2 = copy.copy(clusters[j, :])
                c1_noise = clusters[i, -1]
                c2_noise = clusters[j, -1]

                if c1_noise:
                    clusters[i, -1] = 0

                merge_count += 1
                # Expand boundaries of cluster after merging
                if clusters[j, self.z0_low_idx] < clusters[i, self.z0_low_idx]:
                    clusters[i, self.z0_low_idx] = clusters[j, self.z0_low_idx]
                if clusters[j, self.z0_high_idx] > clusters[i, self.z0_high_idx]:
                    clusters[i, self.z0_high_idx] = clusters[j, self.z0_high_idx]
                clusters[i, self.pt_idx] += clusters[j, self.pt_idx]

                # check if the pT_sum max is now higher
                if max_pt < clusters[i, self.pt_idx]:
                    max_pt = clusters[i, self.pt_idx]
                    max_pt_i = i

                # Erase merged cluster.
                clusters[j, self.pt_idx] = 0
                clusters[j, self.z0_low_idx] = 21
                clusters[j, self.z0_high_idx] = 21

                merged_str = f"""merging cluster [{round(c1[self.z0_low_idx],2), round(c1[self.z0_high_idx],2), round(c1[self.pt_idx],2)}] and [{round(c2[self.z0_low_idx],2), round(c2[self.z0_high_idx],2), round(c2[self.pt_idx],2)}] --> [{round(clusters[i,self.z0_low_idx],2), round(clusters[i,self.z0_high_idx], 2), round(clusters[i,self.pt_idx],2)}]"""
                self.merged_list.append(merged_str)

        self.max_pt = max_pt
        self.max_pt_i = max_pt_i
        self.merge_count = merge_count
        return clusters


BatchedDBSCAN.merge_clusters = merge_clusters

### initialize clusters

In [174]:
def initialize_clusters(self, max_n_clusters: int) -> np.array:

    # clusters = np.zeros((max_n_clusters, 7))
    # clusters[:, 4] = 21
    # clusters[:, 5] = 21

    clusters = np.zeros((max_n_clusters, 4))
    # pT, z0_low, z0_high, Noise
    self.pt_idx = 0
    self.z0_low_idx = 1
    self.z0_high_idx = 2
    self.noise_idx = 3

    clusters[:, self.z0_low_idx] = 21
    clusters[:, self.z0_high_idx] = 21

    return clusters


BatchedDBSCAN.initialize_clusters = initialize_clusters

### fit sklearn for test

In [613]:
def fitsklearn(self):
    start_idx = 0
    end_idx = start_idx + self.batch_size
    n_pad = (self.n_batches * self.batch_size) - self.z0.shape[0]
    self.z0 = self.pad_vector(self.z0, n_pad, 21)
    self.pt = self.pad_vector(self.pt, n_pad, 0)

    clusters_df = pd.DataFrame({})
    clusters = self.initialize_clusters(self.max_n_clusters)

    for i in range(self.n_batches):
        start_idx = i * self.batch_size
        end_idx = (i + 1) * self.batch_size
        z0_batch = self.z0[start_idx:end_idx]
        pt_batch = self.pt[start_idx:end_idx]

        _db = DBSCAN(eps=0.15, min_samples=2).fit(z0_batch.reshape(-1, 1))

        _results = pd.DataFrame({"z0": z0_batch, "pt": pt_batch, "label": _db.labels_})
        max_label = _results.label.max()
        n_noise = _results[_results.label == -1].shape[0]

        _results.loc[_results.label == -1, "label"] = np.arange(n_noise) + max_label + 1

        clusters_batch = _results.groupby(["label"]).agg(
            {"z0": [np.min, np.max], "pt": [np.sum, "count"]}
        )
        clusters_batch.columns = ["z0_low", "z0_high", "pt_sum", "ntracks"]
        clusters_batch["noise"] = 0
        clusters_batch.loc[clusters_batch.ntracks == 1, "noise"] = 1
        clusters_batch.drop(columns=["ntracks"], inplace=True)
        self.results_sklearn[i] = clusters_batch

        clusters_df = pd.concat([clusters_df, clusters_batch])

    n_clusters = clusters_df.shape[0]

    clusters[0:n_clusters, self.pt_idx] = clusters_df["pt_sum"]
    clusters[0:n_clusters, self.z0_low_idx] = clusters_df["z0_low"]
    clusters[0:n_clusters, self.z0_high_idx] = clusters_df["z0_high"]
    clusters[0:n_clusters, self.noise_idx] = clusters_df["noise"]

    # # clusters[0:n_clusters, 2] = clusters_df["ntracks"]
    # clusters[0:n_clusters, 3] = clusters_df["pt_sum"]
    # clusters[0:n_clusters, 4] = clusters_df["z0_min"]
    # clusters[0:n_clusters, 5] = clusters_df["z0_max"]

    clusters_merged = self.merge_clusters(clusters)
    self.clusters_merged = pd.DataFrame(
        clusters_merged, columns=["pt_sum", "z0_low", "z0_high", "noise"]
    )


BatchedDBSCAN.fitsklearn = fitsklearn

### fit

In [614]:
def fit(self):

    np.set_printoptions(precision=2)
    np.set_printoptions(suppress=True)

    start_idx = 0
    end_idx = start_idx + self.batch_size
    # Need to pad vectors to match the size of n_batches*batch_size
    n_pad = (self.n_batches * self.batch_size) - self.z0.shape[0]
    self.z0 = self.pad_vector(self.z0, n_pad, 21)
    self.pt = self.pad_vector(self.pt, n_pad, 0)

    # clusters = self.initialize_clusters(self.max_n_clusters)
    clusters = self.initialize_clusters(self.max_n_tracks_batched)
    self.z0_batches = {}
    self.pt_batches = {}
    self.rs_batches = {}
    self.left_boundaries_batches = {}
    self.boundaries_batches = {}
    self.clusters_batches = {}

    pv_cluster = np.zeros((1, 4))
    merge_count = 0
    for i in range(self.n_batches):

        start_idx = i * self.batch_size
        end_idx = (i + 1) * self.batch_size

        z0_batch = self.z0[start_idx:end_idx]
        pt_batch = self.pt[start_idx:end_idx]

        track_batch = self.build_tracks(z0_batch, pt_batch)
        self.tracks = track_batch

        rs_batch = self.pad_vector(
            track_batch[:, 1], self.batch_size_power_2 - self.batch_size, 0
        )

        rs_batch = self.prefix_sum(rs_batch)
        self.rs = rs_batch

        # Storing batches
        self.z0_batches[i] = track_batch[:, 0]
        self.pt_batches[i] = track_batch[:, 1]
        self.rs_batches[i] = rs_batch

        # Finding Left Boundaries
        left_boundaries = self.find_left_boundaries(track_batch)
        self.left_boundaries_batches[i] = left_boundaries

        # Finding Right Boundaries
        boundaries = self.find_right_boundaries(left_boundaries, rs_batch, track_batch)
        self.boundaries_batches[i] = boundaries

        self.boundaries = boundaries

        clusters_batch = self.convert_boundaries_to_clusters(boundaries)
        self.clusters_batches[i] = clusters_batch

        clusters[i * self.batch_size : (i + 1) * self.batch_size, :] = clusters_batch

        # if track_batch[-1, 0] == 21:
        #     break

        self.results[i] = clusters_batch
    clusters = self.merge_clusters(clusters)

    self.clusters = clusters

    # Find pv_cluster
    pv_cluster[0, :] = clusters[self.max_pt_i, :]

    print(self.max_pt, self.max_pt_i)
    print(f"Merged count: {self.merge_count}")

    pv_tracks = []

    for i in range(self.max_number_of_tracks):
        z0_trk = self.z0[i]

        if (z0_trk >= pv_cluster[0, self.z0_low_idx]) and (
            z0_trk <= pv_cluster[0, self.z0_high_idx]
        ):
            pv_tracks.append(z0_trk)

    median_vertex = self.get_vertex(np.array(pv_tracks))
    self.z0_pv = np.median(pv_tracks)

    print(f"mean: {np.mean(pv_tracks)}")
    print(f"median: {np.median(pv_tracks)}")
    print(f"median2: {median_vertex}")


BatchedDBSCAN.fit = fit

### Testing

In [628]:
for bs in [50]:

    db_skl = BatchedDBSCAN(z0, pt, eps, bs, max_number_of_tracks, True, True)
    db_skl.fitsklearn()

    db = BatchedDBSCAN(z0, pt, eps, bs, max_number_of_tracks, True, True)

    db.fit()

2
2
3
3
3
0 0
Merged count: 249
mean: 7.957182785560345
median: 2.490234375
median2: 1.640625


In [627]:
for k in db_skl.results_sklearn.keys():
    db_skl.results_sklearn[k].to_pickle(f"cb_{k}.pkl")

In [138]:
pd.DataFrame(
    db.clusters_merged, columns=["y1", "y2", "ntracks", "pT", "z0_low", "z0_high", "y7"]
).to_pickle("sklearn_clusters_merged.pkl")

In [156]:
db.clusters_merged.sort_values(by="pt_sum", ascending=False)

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
3,110.971176,-2.519531,-1.40625,0.0
4,104.907081,-3.808594,-3.398438,0.0
2,40.733637,-0.058594,0.410156,0.0
5,19.354282,-3.164062,-2.695312,0.0
19,17.044699,3.457031,3.632812,0.0
1,14.611591,-0.585938,-0.292969,0.0
11,12.665438,2.8125,3.105469,0.0
24,10.708842,-0.117188,0.058594,0.0
25,8.528573,5.742188,5.859375,0.0
17,8.141763,-1.054688,-0.9375,0.0


# Am i finding the right clusters?

In [196]:
db.clusters_batches

{0: array([[ 7.96, -5.62, -5.51,  0.  ],
        [ 5.59, -5.1 , -4.92,  0.  ],
        [ 3.92, -4.92, -4.69,  0.  ],
        [ 4.56, -4.22, -4.04,  0.  ],
        [ 4.1 , -4.04, -3.69,  0.  ],
        [ 9.21, -3.52, -3.05,  0.  ],
        [ 4.06, -2.93, -2.58,  0.  ],
        [42.53, -2.4 , -1.88,  0.  ],
        [ 7.15, -1.7 , -1.46,  0.  ],
        [ 5.35, -1.41, -0.94,  0.  ],
        [ 6.19, -0.76, -0.41,  0.  ],
        [ 4.13, -0.29,  0.06,  0.  ],
        [10.61,  0.23,  0.41,  0.  ],
        [ 7.66,  0.88,  1.35,  0.  ],
        [ 7.99,  2.11,  2.81,  0.  ],
        [ 5.48,  3.57,  3.87,  0.  ],
        [ 4.85,  5.1 ,  5.74,  0.  ],
        [ 4.15,  6.5 ,  6.62,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  , 21.  , 21.  ,  0.  ],
        [ 0.  ,  0.  ,  0.  ,  0.  ],
        [

## Lets compare the clusters from batch zero between sklearn and my dbscan

In [200]:
db.z0_batches

{0: array([-5.62, -5.51, -5.1 , -4.92, -4.92, -4.69, -4.22, -4.04, -4.04,
        -3.69, -3.69, -3.63, -3.57, -3.52, -3.05, -2.93, -2.93, -2.58,
        -2.4 , -2.34, -2.23, -2.23, -2.23, -2.17, -2.05, -1.99, -1.88,
        -1.7 , -1.46, -1.41, -0.94, -0.76, -0.41, -0.29,  0.06,  0.23,
         0.23,  0.29,  0.41,  0.88,  1.35,  2.11,  2.81,  3.57,  3.87,
         5.1 ,  5.74,  6.5 ,  6.62,  7.03]),
 1: array([-9.2 , -6.09, -4.22, -3.69, -3.63, -3.63, -3.57, -3.52, -3.52,
        -3.46, -3.46, -3.4 , -3.16, -3.11, -2.93, -2.52, -2.34, -2.34,
        -2.23, -2.23, -2.11, -2.05, -1.99, -1.99, -1.82, -1.82, -1.76,
        -1.7 , -1.58, -1.05, -0.59, -0.12, -0.06, -0.06,  0.06,  0.29,
         0.29,  0.35,  0.35,  0.59,  0.64,  1.05,  1.7 ,  2.29,  2.99,
         3.57,  3.63,  5.27,  5.8 ,  6.56]),
 2: array([-8.26, -4.16, -3.81, -3.75, -2.81, -2.7 , -2.46, -2.29, -2.29,
        -1.76, -1.76, -1.7 , -1.46, -1.41, -1.  , -0.59, -0.53, -0.41,
        -0.06,  0.06,  0.12,  0.29,  0.35,  0.35,

In [206]:
z0_0 = db.z0_batches[0]

In [205]:
pt_0 = db.pt_batches[0]


In [210]:
rs_0 = db.rs_batches[0]

### running the sklearn dbscan

In [215]:
_db = DBSCAN(eps=0.15, min_samples=2).fit(z0_0.reshape(-1, 1))

_results = pd.DataFrame({"z0": z0_0, "pt": pt_0, "label": _db.labels_})
max_label = _results.label.max()
n_noise = _results[_results.label == -1].shape[0]

_results.loc[_results.label == -1, "label"] = np.arange(n_noise) + max_label + 1

clusters_batch = _results.groupby(["label"]).agg(
    {"z0": [np.min, np.max], "pt": [np.sum, "count"]}
)
clusters_batch.columns = ["z0_low", "z0_high", "pt_sum", "ntracks"]

Unnamed: 0_level_0,z0_low,z0_high,pt_sum,ntracks
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,-5.625,-5.507812,7.957812,2
1,-4.921875,-4.921875,4.852124,2
2,-4.042969,-4.042969,4.330626,2
3,-3.691406,-3.515625,25.086727,5
4,-3.046875,-2.929688,6.515635,3
5,-2.402344,-1.875,42.534632,9
6,-1.464844,-1.40625,6.899173,2
7,-0.410156,-0.292969,6.248494,2
8,0.234375,0.410156,10.609376,4
9,6.503906,6.621094,4.153777,2


In [257]:
clusters_batch.to_pickle("clusters_batch_0_skl.pkl")

### Running the accelerated dbscan

In [241]:
t = np.zeros((50, 3))
t[:, 0] = z0_0
t[:, 1] = pt_0

In [242]:
t[:, 0]

array([-5.62, -5.51, -5.1 , -4.92, -4.92, -4.69, -4.22, -4.04, -4.04,
       -3.69, -3.69, -3.63, -3.57, -3.52, -3.05, -2.93, -2.93, -2.58,
       -2.4 , -2.34, -2.23, -2.23, -2.23, -2.17, -2.05, -1.99, -1.88,
       -1.7 , -1.46, -1.41, -0.94, -0.76, -0.41, -0.29,  0.06,  0.23,
        0.23,  0.29,  0.41,  0.88,  1.35,  2.11,  2.81,  3.57,  3.87,
        5.1 ,  5.74,  6.5 ,  6.62,  7.03])

In [244]:
lb = db.find_left_boundaries(t)

In [247]:
help(db.find_right_boundaries)

Help on method find_right_boundaries in module __main__:

find_right_boundaries(left_boundaries, rs, tracks) method of __main__.BatchedDBSCAN instance



In [268]:
b = db.find_right_boundaries(lb, rs_0, t)

In [266]:
help(db.convert_boundaries_to_clusters)

Help on method convert_boundaries_to_clusters in module __main__:

convert_boundaries_to_clusters(boundaries: <built-in function array>) -> <built-in function array> method of __main__.BatchedDBSCAN instance



In [269]:
b

array([[  0.  ,   0.  ,   4.83,   4.83,  -5.62,  -5.51,   0.  ],
       [  1.  ,   4.83,   7.96,   3.13,  -5.51,  -5.1 ,   0.  ],
       [  2.  ,   7.96,  10.66,   2.7 ,  -5.1 ,  -5.1 ,   1.  ],
       [  3.  ,  10.66,  13.55,   2.89,  -4.92,  -4.92,   0.  ],
       [  4.  ,  13.55,  15.51,   1.96,  -4.92,  -4.69,   0.  ],
       [  5.  ,  15.51,  17.47,   1.96,  -4.69,  -4.69,   1.  ],
       [  6.  ,  17.47,  19.82,   2.35,  -4.22,  -4.22,   1.  ],
       [  7.  ,  19.82,  22.03,   2.21,  -4.04,  -4.04,   0.  ],
       [  8.  ,  22.03,  24.15,   2.12,  -4.04,  -3.69,   0.  ],
       [  9.  ,  24.15,  26.12,   1.98,  -3.69,  -3.69,   0.  ],
       [ 13.  ,  42.14,  49.23,   7.1 ,  -3.52,  -3.05,   0.  ],
       [ 14.  ,  49.23,  51.34,   2.11,  -3.05,  -2.93,   0.  ],
       [ 16.  ,  53.77,  55.75,   1.97,  -2.93,  -2.58,   0.  ],
       [ 17.  ,  55.75,  57.84,   2.09,  -2.58,  -2.58,   1.  ],
       [ 18.  ,  57.84,  60.63,   2.8 ,  -2.4 ,  -2.34,   0.  ],
       [ 26.  ,  97.91, 1

In [270]:
_b = pd.DataFrame(
    b, columns=["idx", "pt_l", "pt_h", "diff", "z0_low", "z0_high", "noise"]
)

In [272]:
_b

Unnamed: 0,idx,pt_l,pt_h,diff,z0_low,z0_high,noise
0,0.0,0.0,4.826959,4.826959,-5.625,-5.507812,0.0
1,1.0,4.826959,7.957812,3.130853,-5.507812,-5.097656,0.0
2,2.0,7.957812,10.656403,2.698591,-5.097656,-5.097656,1.0
3,3.0,10.656403,13.549811,2.893408,-4.921875,-4.921875,0.0
4,4.0,13.549811,15.508527,1.958716,-4.921875,-4.6875,0.0
5,5.0,15.508527,17.466553,1.958026,-4.6875,-4.6875,1.0
6,6.0,17.466553,19.816681,2.350128,-4.21875,-4.21875,1.0
7,7.0,19.816681,22.027795,2.211114,-4.042969,-4.042969,0.0
8,8.0,22.027795,24.147307,2.119512,-4.042969,-3.691406,0.0
9,9.0,24.147307,26.124864,1.977557,-3.691406,-3.691406,0.0


In [254]:
c = db.convert_boundaries_to_clusters(b)

In [258]:
_c = pd.DataFrame({})

In [259]:
_c["z0_low"] = c[:, 1]
_c["z0_high"] = c[:, 2]
_c["pt_sum"] = c[:, 0]
_c["noise"] = c[:, 3]

In [260]:
_c

Unnamed: 0,z0_low,z0_high,pt_sum,noise
0,-5.625,-5.507812,7.957812,0.0
1,-5.097656,-4.921875,5.591999,0.0
2,-4.921875,-4.6875,3.916742,0.0
3,-4.21875,-4.042969,4.561242,0.0
4,-4.042969,-3.691406,4.097069,0.0
5,-3.515625,-3.046875,9.206311,0.0
6,-2.929688,-2.578125,4.06041,0.0
7,-2.402344,-1.875,42.534632,0.0
8,-1.699219,-1.464844,7.153902,0.0
9,-1.40625,-0.9375,5.354043,0.0


In [256]:
pd.DataFrame(c, columns=["pt_sum", "z0_low", "z0_high", "noise"])

Unnamed: 0,pt_sum,z0_low,z0_high,noise
0,7.957812,-5.625,-5.507812,0.0
1,5.591999,-5.097656,-4.921875,0.0
2,3.916742,-4.921875,-4.6875,0.0
3,4.561242,-4.21875,-4.042969,0.0
4,4.097069,-4.042969,-3.691406,0.0
5,9.206311,-3.515625,-3.046875,0.0
6,4.06041,-2.929688,-2.578125,0.0
7,42.534632,-2.402344,-1.875,0.0
8,7.153902,-1.699219,-1.464844,0.0
9,5.354043,-1.40625,-0.9375,0.0


In [251]:
z0_0

array([-5.62, -5.51, -5.1 , -4.92, -4.92, -4.69, -4.22, -4.04, -4.04,
       -3.69, -3.69, -3.63, -3.57, -3.52, -3.05, -2.93, -2.93, -2.58,
       -2.4 , -2.34, -2.23, -2.23, -2.23, -2.17, -2.05, -1.99, -1.88,
       -1.7 , -1.46, -1.41, -0.94, -0.76, -0.41, -0.29,  0.06,  0.23,
        0.23,  0.29,  0.41,  0.88,  1.35,  2.11,  2.81,  3.57,  3.87,
        5.1 ,  5.74,  6.5 ,  6.62,  7.03])

In [252]:
pt_0

array([ 4.83,  3.13,  2.7 ,  2.89,  1.96,  1.96,  2.35,  2.21,  2.12,
        1.98,  1.98,  5.01,  9.02,  7.1 ,  2.11,  2.43,  1.97,  2.09,
        2.8 ,  3.1 ,  4.83,  7.7 ,  4.24,  2.15, 10.22,  5.04,  2.46,
        2.71,  4.44,  2.46,  2.9 ,  2.09,  4.1 ,  2.15,  1.97,  3.67,
        2.07,  2.45,  2.42,  4.87,  2.79,  4.8 ,  3.19,  3.3 ,  2.18,
        2.54,  2.31,  2.08,  2.08,  2.06])

In [249]:
b

array([[  0.  ,   0.  ,   4.83,   4.83,  -5.62,  -5.51],
       [  1.  ,   4.83,   7.96,   3.13,  -5.51,  -5.1 ],
       [  2.  ,   7.96,  10.66,   2.7 ,  -5.1 ,  -5.1 ],
       [  3.  ,  10.66,  13.55,   2.89,  -4.92,  -4.92],
       [  4.  ,  13.55,  15.51,   1.96,  -4.92,  -4.69],
       [  5.  ,  15.51,  17.47,   1.96,  -4.69,  -4.69],
       [  6.  ,  17.47,  19.82,   2.35,  -4.22,  -4.22],
       [  7.  ,  19.82,  22.03,   2.21,  -4.04,  -4.04],
       [  8.  ,  22.03,  24.15,   2.12,  -4.04,  -3.69],
       [  9.  ,  24.15,  26.12,   1.98,  -3.69,  -3.69],
       [ 13.  ,  42.14,  49.23,   7.1 ,  -3.52,  -3.05],
       [ 14.  ,  49.23,  51.34,   2.11,  -3.05,  -2.93],
       [ 16.  ,  53.77,  55.75,   1.97,  -2.93,  -2.58],
       [ 17.  ,  55.75,  57.84,   2.09,  -2.58,  -2.58],
       [ 18.  ,  57.84,  60.63,   2.8 ,  -2.4 ,  -2.34],
       [ 26.  ,  97.91, 100.37,   2.46,  -1.88,  -1.7 ],
       [ 27.  , 100.37, 103.08,   2.71,  -1.7 ,  -1.7 ],
       [ 28.  , 103.08, 107.52,

### Reworking the convert_boundaries_to_clusters function

In [None]:
def convert_boundaries_to_clusters(self, boundaries: np.array) -> np.array:
    n_boundaries = boundaries.shape[0]
    n_clusters = math.ceil(n_boundaries / 2)
    clusters = np.zeros((n_boundaries, 4))
    j = 0
    i = 0
    while i < n_boundaries:
        check_noise = boundaries[i, -1] == 1
        if check_noise:

            pt_low = boundaries[i, 1]
            pt_high = boundaries[i, 2]
            pt_sum = pt_high - pt_low
            z0_low = boundaries[i, 4]
            z0_high = boundaries[i, 5]

            clusters[j, self.pt_idx] = pt_sum
            clusters[j, self.z0_low_idx] = z0_low
            clusters[j, self.z0_high_idx] = z0_high
            clusters[j, self.noise_idx] = 1
            j += 1
            i += 1
        else:
            pt_low = boundaries[i, 1]
            pt_high = boundaries[i + 1, 2]
            pt_sum = pt_high - pt_low
            z0_low = boundaries[i, 4]
            z0_high = boundaries[i + 1, 4]

            clusters[j, self.pt_idx] = pt_sum
            clusters[j, self.z0_low_idx] = z0_low
            clusters[j, self.z0_high_idx] = z0_high
            j += 1
            i += 2
    return clusters


BatchedDBSCAN.convert_boundaries_to_clusters = convert_boundaries_to_clusters

In [273]:
is_noise = b[:, -1]

In [276]:
b

array([[  0.  ,   0.  ,   4.83,   4.83,  -5.62,  -5.51,   0.  ],
       [  1.  ,   4.83,   7.96,   3.13,  -5.51,  -5.1 ,   0.  ],
       [  2.  ,   7.96,  10.66,   2.7 ,  -5.1 ,  -5.1 ,   1.  ],
       [  3.  ,  10.66,  13.55,   2.89,  -4.92,  -4.92,   0.  ],
       [  4.  ,  13.55,  15.51,   1.96,  -4.92,  -4.69,   0.  ],
       [  5.  ,  15.51,  17.47,   1.96,  -4.69,  -4.69,   1.  ],
       [  6.  ,  17.47,  19.82,   2.35,  -4.22,  -4.22,   1.  ],
       [  7.  ,  19.82,  22.03,   2.21,  -4.04,  -4.04,   0.  ],
       [  8.  ,  22.03,  24.15,   2.12,  -4.04,  -3.69,   0.  ],
       [  9.  ,  24.15,  26.12,   1.98,  -3.69,  -3.69,   0.  ],
       [ 13.  ,  42.14,  49.23,   7.1 ,  -3.52,  -3.05,   0.  ],
       [ 14.  ,  49.23,  51.34,   2.11,  -3.05,  -2.93,   0.  ],
       [ 16.  ,  53.77,  55.75,   1.97,  -2.93,  -2.58,   0.  ],
       [ 17.  ,  55.75,  57.84,   2.09,  -2.58,  -2.58,   1.  ],
       [ 18.  ,  57.84,  60.63,   2.8 ,  -2.4 ,  -2.34,   0.  ],
       [ 26.  ,  97.91, 1

In [274]:
is_noise

array([0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
       0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [290]:
bound_i = 0
cluster_j = 0
clusters_t = np.zeros((50, 4))
for _ in range(b.shape[0]):

    noise = is_noise[bound_i]
    if noise:
        z0_low = b[bound_i, 4]
        z0_high = b[bound_i, 5]
        pt_sum = b[bound_i, 2] - b[bound_i, 1]
        print(cluster_j, z0_low, z0_high, pt_sum, 1)
        clusters_t[cluster_j, 0] = z0_low
        clusters_t[cluster_j, 1] = z0_high
        clusters_t[cluster_j, 2] = pt_sum
        clusters_t[cluster_j, 3] = noise
        bound_i += 1
        cluster_j += 1
    else:
        z0_low = b[bound_i, 4]
        z0_high = b[bound_i + 1, 4]
        pt_sum = b[bound_i + 1, 2] - b[bound_i, 1]
        print(cluster_j, z0_low, z0_high, pt_sum, 0)
        clusters_t[cluster_j, 0] = z0_low
        clusters_t[cluster_j, 1] = z0_high
        clusters_t[cluster_j, 2] = pt_sum
        clusters_t[cluster_j, 3] = noise

        bound_i += 2
        cluster_j += 1
    if bound_i >= b.shape[0]:
        break

0 -5.625 -5.5078125 7.957811594009399 0
1 -5.09765625 -5.09765625 2.698591470718384 1
2 -4.921875 -4.921875 4.852123975753784 0
3 -4.6875 -4.6875 1.9580256938934326 1
4 -4.21875 -4.21875 2.350127935409546 1
5 -4.04296875 -4.04296875 4.330626487731934 0
6 -3.69140625 -3.515625 25.086726665496826 0
7 -3.046875 -2.9296875 6.515634536743164 0
8 -2.578125 -2.578125 2.085667133331299 1
9 -2.40234375 -1.875 42.534632444381714 0
10 -1.69921875 -1.69921875 2.713099956512451 1
11 -1.46484375 -1.40625 6.8991734981536865 0
12 -0.9375 -0.9375 2.8956716060638428 1
13 -0.76171875 -0.76171875 2.0899858474731445 1
14 -0.41015625 -0.29296875 6.248493909835815 0
15 0.05859375 0.05859375 1.9747428894042969 1
16 0.234375 0.41015625 10.609376430511475 0
17 0.87890625 0.87890625 4.869300365447998 1
18 1.34765625 1.34765625 2.792957305908203 1
19 2.109375 2.109375 4.801905155181885 1
20 2.8125 2.8125 3.187483549118042 1
21 3.57421875 3.57421875 3.304168224334717 1
22 3.8671875 3.8671875 2.1751575469970703 1
2

In [291]:
clusters_t

array([[-5.62, -5.51,  7.96,  0.  ],
       [-5.1 , -5.1 ,  2.7 ,  1.  ],
       [-4.92, -4.92,  4.85,  0.  ],
       [-4.69, -4.69,  1.96,  1.  ],
       [-4.22, -4.22,  2.35,  1.  ],
       [-4.04, -4.04,  4.33,  0.  ],
       [-3.69, -3.52, 25.09,  0.  ],
       [-3.05, -2.93,  6.52,  0.  ],
       [-2.58, -2.58,  2.09,  1.  ],
       [-2.4 , -1.88, 42.53,  0.  ],
       [-1.7 , -1.7 ,  2.71,  1.  ],
       [-1.46, -1.41,  6.9 ,  0.  ],
       [-0.94, -0.94,  2.9 ,  1.  ],
       [-0.76, -0.76,  2.09,  1.  ],
       [-0.41, -0.29,  6.25,  0.  ],
       [ 0.06,  0.06,  1.97,  1.  ],
       [ 0.23,  0.41, 10.61,  0.  ],
       [ 0.88,  0.88,  4.87,  1.  ],
       [ 1.35,  1.35,  2.79,  1.  ],
       [ 2.11,  2.11,  4.8 ,  1.  ],
       [ 2.81,  2.81,  3.19,  1.  ],
       [ 3.57,  3.57,  3.3 ,  1.  ],
       [ 3.87,  3.87,  2.18,  1.  ],
       [ 5.1 ,  5.1 ,  2.54,  1.  ],
       [ 5.74,  5.74,  2.31,  1.  ],
       [ 6.5 ,  6.62,  4.15,  0.  ],
       [21.  , 21.  ,  0.  ,  0.  ],
 

In [369]:
def b_to_c(b, is_noise):
    bound_i = 0
    cluster_j = 0
    clusters_t = np.zeros((50, 4))
    for _ in range(b.shape[0]):
        # if bound_i == b.shape[0] - 1:
        #     break

        if b[bound_i, 4] == 21:
            break
        noise = is_noise[bound_i]

        print(bound_i)

        if noise:
            z0_low = b[bound_i, 4]
            z0_high = b[bound_i, 5]
            pt_sum = b[bound_i, 2] - b[bound_i, 1]
            # print(cluster_j, z0_low, z0_high, pt_sum, 1)
            clusters_t[cluster_j, 0] = z0_low
            clusters_t[cluster_j, 1] = z0_high
            clusters_t[cluster_j, 2] = pt_sum
            clusters_t[cluster_j, 3] = noise
            bound_i += 1
            cluster_j += 1
        else:
            z0_low = b[bound_i, 4]
            z0_high = b[bound_i + 1, 4]
            pt_sum = b[bound_i + 1, 2] - b[bound_i, 1]
            # print(cluster_j, z0_low, z0_high, pt_sum, 0)
            clusters_t[cluster_j, 0] = z0_low
            clusters_t[cluster_j, 1] = z0_high
            clusters_t[cluster_j, 2] = pt_sum
            clusters_t[cluster_j, 3] = noise

            bound_i += 2
            cluster_j += 1

    return clusters_t

**LOOKS ALMOST FIXED, GOTTA FIX THE LAST BOUNDARY NOW**

In [299]:
lb[-3:]

array([ True, False,  True])

In [297]:
lb[50 - 1]

True

In [321]:
def find_right_boundaries(self, left_boundaries, rs, tracks):

    max_tracks = self.batch_size

    boundaries = np.zeros((max_tracks, 7))
    is_noise = np.ones((max_tracks, 1))

    for i in range(max_tracks - 1):

        left_edge = left_boundaries[i] and not (left_boundaries[i + 1])  # 1, 0
        right_edge = not (left_boundaries[i]) and left_boundaries[i + 1]  # 0, 1
        check_noise = (left_boundaries[i] == 1) and (left_boundaries[i + 1] == 1)

        if left_edge or right_edge:
            boundaries[i][0] = i
            boundaries[i][1] = rs[i]
            boundaries[i][2] = rs[i + 1]
            boundaries[i][3] = rs[i + 1] - rs[i]
            boundaries[i][4] = tracks[i, 0]
            boundaries[i][5] = tracks[i + 1, 0]
            is_noise[i] = 0
        elif check_noise:
            boundaries[i][0] = i
            boundaries[i][1] = rs[i]
            boundaries[i][2] = rs[i + 1]
            boundaries[i][3] = rs[i + 1] - rs[i]
            boundaries[i][4] = tracks[i, 0]
            boundaries[i][5] = tracks[i, 0]
            boundaries[i][6] = 1
            # is_noise[i] = 1
        else:
            boundaries[i][0] = max_tracks
            boundaries[i][1] = 0
            boundaries[i][2] = 0
            boundaries[i][3] = 0
            boundaries[i][4] = 21
            boundaries[i][5] = 21
            is_noise[i] = 0

    # Check for the last boundary
    if left_boundaries[max_tracks - 1] and is_noise[max_tracks - 1] == 0:
        boundaries[max_tracks - 1][0] = max_tracks
        boundaries[max_tracks - 1][1] = 0
        boundaries[max_tracks - 1][2] = 0
        boundaries[max_tracks - 1][3] = 0
        boundaries[max_tracks - 1][4] = 21
        boundaries[max_tracks - 1][5] = 21
        print("1")
    elif left_boundaries[max_tracks - 1] and is_noise[max_tracks - 1] == 1:
        boundaries[max_tracks - 1][0] = max_tracks - 1
        boundaries[max_tracks - 1][1] = rs[max_tracks - 1]
        boundaries[max_tracks - 1][2] = rs[max_tracks]
        boundaries[max_tracks - 1][3] = rs[max_tracks] - rs[max_tracks - 1]
        boundaries[max_tracks - 1][4] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][5] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][6] = 1
        print("2")
    else:
        boundaries[max_tracks - 1][0] = max_tracks - 1
        boundaries[max_tracks - 1][1] = rs[max_tracks - 1]
        boundaries[max_tracks - 1][2] = rs[max_tracks]
        boundaries[max_tracks - 1][3] = rs[max_tracks] - rs[max_tracks - 1]
        boundaries[max_tracks - 1][4] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][5] = tracks[max_tracks - 1, 0]
        print("3")

    # boundaries[:, 6] = is_noise
    # Sort boundaries by the index
    sort_idx = boundaries[:, 0].argsort()
    boundaries = boundaries[sort_idx]
    is_noise = is_noise[sort_idx]
    self.is_noise = is_noise
    return boundaries


BatchedDBSCAN.find_right_boundaries = find_right_boundaries

In [342]:
bc = db.find_right_boundaries(lb, rs_0, t)

2


In [370]:
ct = b_to_c(bc, is_noise)

0
2
3
5
6
7
9
11
13
14
16
17
19
20
21
23
24
26
27
28
29
30
31
32
33
34
36


In [330]:
is_noise

array([0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
       0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [371]:
ct

array([[-5.62, -5.51,  7.96,  0.  ],
       [-5.1 , -5.1 ,  2.7 ,  1.  ],
       [-4.92, -4.92,  4.85,  0.  ],
       [-4.69, -4.69,  1.96,  1.  ],
       [-4.22, -4.22,  2.35,  1.  ],
       [-4.04, -4.04,  4.33,  0.  ],
       [-3.69, -3.52, 25.09,  0.  ],
       [-3.05, -2.93,  6.52,  0.  ],
       [-2.58, -2.58,  2.09,  1.  ],
       [-2.4 , -1.88, 42.53,  0.  ],
       [-1.7 , -1.7 ,  2.71,  1.  ],
       [-1.46, -1.41,  6.9 ,  0.  ],
       [-0.94, -0.94,  2.9 ,  1.  ],
       [-0.76, -0.76,  2.09,  1.  ],
       [-0.41, -0.29,  6.25,  0.  ],
       [ 0.06,  0.06,  1.97,  1.  ],
       [ 0.23,  0.41, 10.61,  0.  ],
       [ 0.88,  0.88,  4.87,  1.  ],
       [ 1.35,  1.35,  2.79,  1.  ],
       [ 2.11,  2.11,  4.8 ,  1.  ],
       [ 2.81,  2.81,  3.19,  1.  ],
       [ 3.57,  3.57,  3.3 ,  1.  ],
       [ 3.87,  3.87,  2.18,  1.  ],
       [ 5.1 ,  5.1 ,  2.54,  1.  ],
       [ 5.74,  5.74,  2.31,  1.  ],
       [ 6.5 ,  6.62,  4.15,  0.  ],
       [ 7.03,  7.03,  2.06,  1.  ],
 

**SEEMS LIKE IT HAS ALL BEEN FIXED**

# Let's rewrite the whole class now

In [375]:
class BatchedDBSCAN:
    def __init__(
        self,
        z0,
        pt,
        eps,
        batch_size,
        max_number_of_tracks,
        verbose: bool = False,
        save_intermediate: bool = False,
    ):

        self.eps = eps
        self.batch_size = batch_size
        self.verbose = verbose
        self.save_intermediate = save_intermediate
        self.z0_boundary = 21  # 21 cm is outside the detector acceptance
        self.pt_boundary = 0  # 0 pT won't contribute to the pT sum.
        self.minPts = 2  # This algorithm only works for a minimum number of 2 points

        self.max_number_of_tracks = int(max_number_of_tracks)
        self.n_batches = math.ceil(self.max_number_of_tracks / self.batch_size)

        # Max number of tracks including all batches
        self.max_n_tracks_batched = self.batch_size * self.n_batches
        self.max_n_clusters_batch = math.ceil(self.batch_size / self.minPts)
        self.max_n_clusters = math.ceil(self.max_n_tracks_batched / self.minPts)

        # Need to pad vectors to the max_number_of_tracks allowed so that it matches the fpga input
        n_pad = self.max_number_of_tracks - z0.shape[0]
        # if verbose:
        # print("original number of tracks: ", z0.shape)
        self.z0 = self.pad_vector(z0, n_pad, self.z0_boundary)
        self.pt = self.pad_vector(pt, n_pad, self.pt_boundary)

        # These are needed for the prefix sum
        self.max_number_of_tracks_power_2 = (
            1 << (self.max_number_of_tracks - 1).bit_length()
        )
        self.batch_size_power_2 = 1 << (self.batch_size - 1).bit_length()
        self.max_number_of_tracks_log_2 = np.log2(self.max_number_of_tracks_power_2)
        self.batch_size_log_2 = np.log2(self.batch_size_power_2)
        # self.n_batches = math.ceil(self.max_number_of_tracks / self.batch_size)
        self.results = {}
        self.results_sklearn = {}
        self.merged_list = []

    def pad_vector(self, vec, n_pad, value):
        """pads vector to a set size with given value"""

        vec_to_pad = value * np.ones(n_pad)
        vec = np.append(vec, vec_to_pad)

        return vec

    def build_tracks(self, z0, pt):
        """Builds tracks batchess"""

        # Shape is determined by the size of batch, z0, pT and label (not used atm)
        track_batch = np.zeros((self.batch_size, 3))

        track_batch[:, 0] = z0
        track_batch[:, 1] = pt

        # sort the tracks by z0
        track_batch = track_batch[track_batch[:, 0].argsort()]

        return track_batch

In [376]:
def prefix_sum(self, arr):
    """
    Calculates the prefix sum of pT.
    Warning, requires array to be of size thats log base of 2.
    """
    size_log2 = int(np.log2(arr.shape[0]))

    # up-sweep
    for d in range(0, size_log2, 1):
        step_size = 2**d
        double_step_size = step_size * 2

        for i in range(0, arr.shape[0], double_step_size):
            arr[i + double_step_size - 1] += arr[i + step_size - 1]

    # down-sweep
    arr[arr.shape[0] - 1] = 0
    d = size_log2 - 1

    while d >= 0:
        step_size = 2**d
        double_step_size = step_size * 2
        for i in range(0, arr.shape[0], double_step_size):
            tmp = arr[i + step_size - 1]
            arr[i + step_size - 1] = arr[i + double_step_size - 1]
            arr[i + double_step_size - 1] += tmp
        d -= 1

    return arr


BatchedDBSCAN.prefix_sum = prefix_sum

In [377]:
def find_left_boundaries(self, tracks):

    left_boundaries = np.zeros(self.batch_size, dtype=bool)

    # first value is always a left boundary
    left_boundaries[0] = 1

    for i in range(1, self.batch_size):
        _t = tracks[i]

        if _t[0] - tracks[i - 1][0] > self.eps:
            tracks[i][2] = -1
            left_boundaries[i] = 1
        else:
            left_boundaries[i] = 0

    self.left_boundaries = left_boundaries
    return left_boundaries


BatchedDBSCAN.find_left_boundaries = find_left_boundaries

In [378]:
def find_right_boundaries(self, left_boundaries, rs, tracks):

    max_tracks = self.batch_size

    boundaries = np.zeros((max_tracks, 7))
    is_noise = np.ones((max_tracks, 1))

    for i in range(max_tracks - 1):

        left_edge = left_boundaries[i] and not (left_boundaries[i + 1])  # 1, 0
        right_edge = not (left_boundaries[i]) and left_boundaries[i + 1]  # 0, 1
        check_noise = (left_boundaries[i] == 1) and (left_boundaries[i + 1] == 1)

        if left_edge or right_edge:
            boundaries[i][0] = i
            boundaries[i][1] = rs[i]
            boundaries[i][2] = rs[i + 1]
            boundaries[i][3] = rs[i + 1] - rs[i]
            boundaries[i][4] = tracks[i, 0]
            boundaries[i][5] = tracks[i + 1, 0]
            is_noise[i] = 0
        elif check_noise:
            boundaries[i][0] = i
            boundaries[i][1] = rs[i]
            boundaries[i][2] = rs[i + 1]
            boundaries[i][3] = rs[i + 1] - rs[i]
            boundaries[i][4] = tracks[i, 0]
            boundaries[i][5] = tracks[i, 0]
            boundaries[i][6] = 1
            # is_noise[i] = 1
        else:
            boundaries[i][0] = max_tracks
            boundaries[i][1] = 0
            boundaries[i][2] = 0
            boundaries[i][3] = 0
            boundaries[i][4] = 21
            boundaries[i][5] = 21
            is_noise[i] = 0

    # Check for the last boundary
    if left_boundaries[max_tracks - 1] and is_noise[max_tracks - 1] == 0:
        boundaries[max_tracks - 1][0] = max_tracks
        boundaries[max_tracks - 1][1] = 0
        boundaries[max_tracks - 1][2] = 0
        boundaries[max_tracks - 1][3] = 0
        boundaries[max_tracks - 1][4] = 21
        boundaries[max_tracks - 1][5] = 21
        print("1")
    elif left_boundaries[max_tracks - 1] and is_noise[max_tracks - 1] == 1:
        boundaries[max_tracks - 1][0] = max_tracks - 1
        boundaries[max_tracks - 1][1] = rs[max_tracks - 1]
        boundaries[max_tracks - 1][2] = rs[max_tracks]
        boundaries[max_tracks - 1][3] = rs[max_tracks] - rs[max_tracks - 1]
        boundaries[max_tracks - 1][4] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][5] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][6] = 1
        print("2")
    else:
        boundaries[max_tracks - 1][0] = max_tracks - 1
        boundaries[max_tracks - 1][1] = rs[max_tracks - 1]
        boundaries[max_tracks - 1][2] = rs[max_tracks]
        boundaries[max_tracks - 1][3] = rs[max_tracks] - rs[max_tracks - 1]
        boundaries[max_tracks - 1][4] = tracks[max_tracks - 1, 0]
        boundaries[max_tracks - 1][5] = tracks[max_tracks - 1, 0]
        print("3")

    # boundaries[:, 6] = is_noise
    # Sort boundaries by the index
    sort_idx = boundaries[:, 0].argsort()
    boundaries = boundaries[sort_idx]
    is_noise = is_noise[sort_idx]
    self.is_noise = is_noise
    return boundaries


BatchedDBSCAN.find_right_boundaries = find_right_boundaries

In [379]:
def get_vertex(self, cluster_of_tracks: np.array) -> float:
    """
    Calculates the median z0 of the cluster of tracks
    """

    n_size = cluster_of_tracks.shape[0]

    if n_size % 2 == 0:
        return 0.5 * (
            cluster_of_tracks[n_size // 2] + cluster_of_tracks[n_size // 2 - 1]
        )
    else:
        return cluster_of_tracks[n_size // 2]


BatchedDBSCAN.get_vertex = get_vertex

In [380]:
def merge_clusters(self, clusters: np.array) -> np.array:

    n_clusters = clusters.shape[0]
    if self.n_batches == 1:
        self.max_pt_i = np.argmax(clusters[:, self.pt_idx])
        self.max_pt = clusters[self.max_pt_i, self.pt_idx]
        self.merge_count = 0
        return clusters

    else:
        max_pt = 0
        max_pt_i = 0
        merge_count = 0

        comb = list(itertools.combinations(range(n_clusters), 2))

        for i, j in comb:
            if clusters[i, self.z0_low_idx] >= 21:
                continue

            if max_pt < clusters[i, self.pt_idx]:
                max_pt = clusters[i, self.pt_idx]
                max_pt_i = i

            if clusters[j, self.z0_low_idx] >= 21:
                continue

            case1 = (clusters[i, self.z0_low_idx] - self.eps) <= clusters[
                j, self.z0_high_idx
            ]
            case2 = (clusters[i, self.z0_high_idx] + self.eps) >= clusters[
                j, self.z0_low_idx
            ]

            if case1 and case2:
                c1 = copy.copy(clusters[i, :])
                c2 = copy.copy(clusters[j, :])
                c1_noise = clusters[i, -1]
                c2_noise = clusters[j, -1]

                if c1_noise:
                    clusters[i, -1] = 0

                merge_count += 1
                # Expand boundaries of cluster after merging
                if clusters[j, self.z0_low_idx] < clusters[i, self.z0_low_idx]:
                    clusters[i, self.z0_low_idx] = clusters[j, self.z0_low_idx]
                if clusters[j, self.z0_high_idx] > clusters[i, self.z0_high_idx]:
                    clusters[i, self.z0_high_idx] = clusters[j, self.z0_high_idx]
                clusters[i, self.pt_idx] += clusters[j, self.pt_idx]

                # check if the pT_sum max is now higher
                if max_pt < clusters[i, self.pt_idx]:
                    max_pt = clusters[i, self.pt_idx]
                    max_pt_i = i

                # Erase merged cluster.
                clusters[j, self.pt_idx] = 0
                clusters[j, self.z0_low_idx] = 21
                clusters[j, self.z0_high_idx] = 21

                merged_str = f"""merging cluster [{round(c1[self.z0_low_idx],2), round(c1[self.z0_high_idx],2), round(c1[self.pt_idx],2)}] and [{round(c2[self.z0_low_idx],2), round(c2[self.z0_high_idx],2), round(c2[self.pt_idx],2)}] --> [{round(clusters[i,self.z0_low_idx],2), round(clusters[i,self.z0_high_idx], 2), round(clusters[i,self.pt_idx],2)}]"""
                self.merged_list.append(merged_str)

        self.max_pt = max_pt
        self.max_pt_i = max_pt_i
        self.merge_count = merge_count
        return clusters


BatchedDBSCAN.merge_clusters = merge_clusters

In [381]:
def initialize_clusters(self, max_n_clusters: int) -> np.array:

    # clusters = np.zeros((max_n_clusters, 7))
    # clusters[:, 4] = 21
    # clusters[:, 5] = 21

    clusters = np.zeros((max_n_clusters, 4))
    # pT, z0_low, z0_high, Noise
    self.pt_idx = 0
    self.z0_low_idx = 1
    self.z0_high_idx = 2
    self.noise_idx = 3

    clusters[:, self.z0_low_idx] = 21
    clusters[:, self.z0_high_idx] = 21

    return clusters


BatchedDBSCAN.initialize_clusters = initialize_clusters

In [384]:
def convert_boundaries_to_clusters(self, boundaries: np.array):
    bound_i = 0
    cluster_j = 0
    clusters = np.zeros((50, 4))
    n_boundaries = boundaries.shape[0]
    for _ in range(n_boundaries):

        if boundaries[bound_i, 4] == 21:
            break
        noise = boundaries[bound_i, -1]

        if noise:
            z0_low = boundaries[bound_i, 4]
            z0_high = boundaries[bound_i, 5]
            pt_sum = boundaries[bound_i, 2] - boundaries[bound_i, 1]

            clusters[cluster_j, 0] = z0_low
            clusters[cluster_j, 1] = z0_high
            clusters[cluster_j, 2] = pt_sum
            clusters[cluster_j, 3] = noise
            bound_i += 1
            cluster_j += 1
        else:
            z0_low = boundaries[bound_i, 4]
            z0_high = boundaries[bound_i + 1, 4]
            pt_sum = boundaries[bound_i + 1, 2] - boundaries[bound_i, 1]

            clusters[cluster_j, 0] = z0_low
            clusters[cluster_j, 1] = z0_high
            clusters[cluster_j, 2] = pt_sum
            clusters[cluster_j, 3] = noise

            bound_i += 2
            cluster_j += 1

    return clusters


BatchedDBSCAN.convert_boundaries_to_clusters = convert_boundaries_to_clusters

In [512]:
def fitsklearn(self):
    start_idx = 0
    end_idx = start_idx + self.batch_size
    n_pad = (self.n_batches * self.batch_size) - self.z0.shape[0]
    self.z0 = self.pad_vector(self.z0, n_pad, 21)
    self.pt = self.pad_vector(self.pt, n_pad, 0)

    clusters_df = pd.DataFrame({})
    clusters = self.initialize_clusters(self.max_n_clusters)

    for i in range(self.n_batches):
        start_idx = i * self.batch_size
        end_idx = (i + 1) * self.batch_size
        z0_batch = self.z0[start_idx:end_idx]
        pt_batch = self.pt[start_idx:end_idx]

        _db = DBSCAN(eps=0.15, min_samples=2).fit(z0_batch.reshape(-1, 1))

        _results = pd.DataFrame({"z0": z0_batch, "pt": pt_batch, "label": _db.labels_})
        max_label = _results.label.max()
        n_noise = _results[_results.label == -1].shape[0]

        _results.loc[_results.label == -1, "label"] = np.arange(n_noise) + max_label + 1

        clusters_batch = _results.groupby(["label"]).agg(
            {"z0": [np.min, np.max], "pt": [np.sum, "count"]}
        )
        clusters_batch.columns = ["z0_low", "z0_high", "pt_sum", "ntracks"]
        clusters_batch["noise"] = 0
        clusters_batch.loc[clusters_batch["ntracks"] < 2, "noise"] = 1
        clusters_batch = clusters_batch.drop(columns=["ntracks"])

        self.results_sklearn[i] = clusters_batch

        clusters_df = pd.concat([clusters_df, clusters_batch])

    n_clusters = clusters_df.shape[0]

    clusters[0:n_clusters, self.pt_idx] = clusters_df["pt_sum"]
    clusters[0:n_clusters, self.z0_low_idx] = clusters_df["z0_low"]
    clusters[0:n_clusters, self.z0_high_idx] = clusters_df["z0_high"]
    clusters[0:n_clusters, self.noise_idx] = clusters_df["noise"]

    self.clusters_unmerged = pd.DataFrame(
        clusters.copy(), columns=["pt_sum", "z0_low", "z0_high", "noise"]
    )

    clusters_merged = self.merge_clusters(clusters)
    self.clusters_merged = pd.DataFrame(
        clusters_merged, columns=["pt_sum", "z0_low", "z0_high", "ntracks"]
    )
    self.clusters_merged = self.clusters_merged.sort_values(
        by="pt_sum", ascending=False
    )
    pv_z0_low = self.clusters_merged.iloc[0, 1]
    pv_z0_high = self.clusters_merged.iloc[0, 2]

    z0_pv = np.median(self.z0[(self.z0 >= pv_z0_low) & (self.z0 <= pv_z0_high)])
    self.z0_pv_skl = z0_pv


BatchedDBSCAN.fitsklearn = fitsklearn

In [393]:
def fit(self):

    np.set_printoptions(precision=2)
    np.set_printoptions(suppress=True)

    start_idx = 0
    end_idx = start_idx + self.batch_size
    # Need to pad vectors to match the size of n_batches*batch_size
    n_pad = (self.n_batches * self.batch_size) - self.z0.shape[0]
    self.z0 = self.pad_vector(self.z0, n_pad, 21)
    self.pt = self.pad_vector(self.pt, n_pad, 0)

    # clusters = self.initialize_clusters(self.max_n_clusters)
    clusters = self.initialize_clusters(self.max_n_tracks_batched)
    self.z0_batches = {}
    self.pt_batches = {}
    self.rs_batches = {}
    self.left_boundaries_batches = {}
    self.boundaries_batches = {}
    self.clusters_batches = {}

    pv_cluster = np.zeros((1, 4))
    merge_count = 0
    for i in range(self.n_batches):

        start_idx = i * self.batch_size
        end_idx = (i + 1) * self.batch_size

        z0_batch = self.z0[start_idx:end_idx]
        pt_batch = self.pt[start_idx:end_idx]

        track_batch = self.build_tracks(z0_batch, pt_batch)
        self.tracks = track_batch

        rs_batch = self.pad_vector(
            track_batch[:, 1], self.batch_size_power_2 - self.batch_size, 0
        )

        rs_batch = self.prefix_sum(rs_batch)
        self.rs = rs_batch

        # Storing batches
        self.z0_batches[i] = track_batch[:, 0]
        self.pt_batches[i] = track_batch[:, 1]
        self.rs_batches[i] = rs_batch

        # Finding Left Boundaries
        left_boundaries = self.find_left_boundaries(track_batch)
        self.left_boundaries_batches[i] = left_boundaries

        # Finding Right Boundaries
        boundaries = self.find_right_boundaries(left_boundaries, rs_batch, track_batch)
        self.boundaries_batches[i] = boundaries

        self.boundaries = boundaries

        clusters_batch = self.convert_boundaries_to_clusters(boundaries)
        self.clusters_batches[i] = clusters_batch

        clusters[i * self.batch_size : (i + 1) * self.batch_size, :] = clusters_batch

        # if track_batch[-1, 0] == 21:
        #     break

        self.results[i] = clusters_batch

    self.clusters_unmerged = clusters.copy()
    clusters = self.merge_clusters(clusters)

    self.clusters = clusters

    # Find pv_cluster
    pv_cluster[0, :] = clusters[self.max_pt_i, :]

    print(self.max_pt, self.max_pt_i)
    print(f"Merged count: {self.merge_count}")

    pv_tracks = []

    for i in range(self.max_number_of_tracks):
        z0_trk = self.z0[i]

        if (z0_trk >= pv_cluster[0, self.z0_low_idx]) and (
            z0_trk <= pv_cluster[0, self.z0_high_idx]
        ):
            pv_tracks.append(z0_trk)

    median_vertex = self.get_vertex(np.array(pv_tracks))
    self.z0_pv = np.median(pv_tracks)

    print(f"mean: {np.mean(pv_tracks)}")
    print(f"median: {np.median(pv_tracks)}")
    print(f"median2: {median_vertex}")


BatchedDBSCAN.fit = fit

### Testing

In [629]:
for bs in [50]:

    # db2_skl = BatchedDBSCAN(z0, pt, eps, bs, max_number_of_tracks, True, True)
    # db2_skl.fitsklearn()

    db2 = BatchedDBSCAN(z0, pt, eps, bs, max_number_of_tracks, True, True)

    db2.fit()

2
2
3
3
3
0 0
Merged count: 249
mean: 7.957182785560345
median: 2.490234375
median2: 1.640625


### Sklearn results

In [478]:
db2_skl.clusters_unmerged.to_pickle("skl_clusters_unmerged.pkl")

In [455]:
db2_skl.max_pt

110.97117602825165

In [456]:
db2_skl.z0_pv_skl

-2.021484375

In [460]:
db2_skl.clusters_unmerged.loc[db2_skl.clusters_unmerged.pt_sum > 0]

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
0,4.330626,-4.042969,-4.042969,2.0
1,6.248494,-0.410156,-0.292969,2.0
2,10.609376,0.234375,0.410156,4.0
3,42.534632,-2.402344,-1.875,9.0
4,25.086727,-3.691406,-3.515625,5.0
5,6.515635,-3.046875,-2.929688,3.0
6,7.957812,-5.625,-5.507812,2.0
7,6.899173,-1.464844,-1.40625,2.0
8,4.852124,-4.921875,-4.921875,2.0
9,4.153777,6.503906,6.621094,2.0


In [459]:
db2_skl.clusters_merged.loc[db2_skl.clusters_merged.pt_sum > 0].sort_values(
    by="pt_sum", ascending=False
)

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
3,110.971176,-2.519531,-1.40625,0.0
4,104.907081,-3.808594,-3.398438,0.0
2,40.733637,-0.058594,0.410156,0.0
5,19.354282,-3.164062,-2.695312,0.0
19,17.044699,3.457031,3.632812,0.0
1,14.611591,-0.585938,-0.292969,0.0
11,12.665438,2.8125,3.105469,0.0
24,10.708842,-0.117188,0.058594,0.0
25,8.528573,5.742188,5.859375,0.0
17,8.141763,-1.054688,-0.9375,0.0


### Analyzing merge function

In [447]:
def clusters_overlap(self, ci, cj) -> bool:
    # |---- c_i -----|   |---- c_j ----|
    case1 = ci[self.z0_low_idx] - self.eps <= cj[self.z0_high_idx]
    case2 = ci[self.z0_high_idx] + self.eps >= cj[self.z0_low_idx]

    overlap = case1 and case2

    return overlap


BatchedDBSCAN.clusters_overlap = clusters_overlap

In [499]:
def record_merging(self, ci, cj, cn):

    zi_low = round(ci[self.z0_low_idx], 2)
    zi_high = round(ci[self.z0_high_idx], 2)
    zi_pt = round(ci[self.pt_idx], 2)
    ci_str = f"[{zi_low}, {zi_high}, {zi_pt}]"

    zj_low = round(cj[self.z0_low_idx], 2)
    zj_high = round(cj[self.z0_high_idx], 2)
    zj_pt = round(cj[self.pt_idx], 2)
    cj_str = f"[{zj_low}, {zj_high}, {zj_pt}]"

    zn_low = round(cn[self.z0_low_idx], 2)
    zn_high = round(cn[self.z0_high_idx], 2)
    zn_pt = round(cn[self.pt_idx], 2)
    cn_str = f"[{zn_low}, {zn_high}, {zn_pt}]"

    merged_str = ci_str + " + " + cj_str + " -> " + cn_str
    self.merged_list.append(merged_str)


BatchedDBSCAN.record_merging = record_merging

### Testing merging

In [532]:
for bs in [50]:

    db2_skl = BatchedDBSCAN(z0, pt, eps, bs, max_number_of_tracks, True, True)
    db2_skl.fitsklearn()

In [534]:
db2_skl.clusters_merged.sort_values(by="z0_low")

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
47,2.812771,-9.199219,-9.199219,1.0
69,2.654077,-8.261719,-8.261719,1.0
48,2.223069,-6.09375,-6.09375,1.0
6,7.957812,-5.625,-5.507812,0.0
14,2.698591,-5.097656,-5.097656,1.0
8,4.852124,-4.921875,-4.921875,0.0
20,1.958026,-4.6875,-4.6875,1.0
12,13.367491,-4.21875,-4.042969,0.0
4,104.907081,-3.808594,-3.398438,0.0
5,19.354282,-3.164062,-2.695312,0.0


In [516]:
db2_skl.clusters_unmerged.shape[0]

125

In [525]:
comb = list(itertools.permutations(range(3), 2))

In [526]:
comb

[(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]

In [527]:
len(comb)

6

In [528]:
count = 0
for i in range(3):
    for j in range(3):
        if i == j:
            continue
        else:
            print(i, j)
            count += 1
print(count)

0 1
0 2
1 0
1 2
2 0
2 1
6


In [531]:
def merge_clusters(self, clusters: np.array) -> np.array:

    n_clusters = clusters.shape[0]
    if self.n_batches == 1:
        self.max_pt_i = np.argmax(clusters[:, self.pt_idx])
        self.max_pt = clusters[self.max_pt_i, self.pt_idx]
        self.merge_count = 0
        return clusters

    else:
        max_pt = 0
        max_pt_i = 0
        merge_count = 0

        comb = list(itertools.permutations(range(n_clusters), 2))
        self.comb = comb

        for i, j in comb:
            # skip if cluster  is outside detector
            if (clusters[i, self.z0_low_idx] >= 21) or (
                clusters[j, self.z0_low_idx] >= 21
            ):
                continue

            ci = copy.copy(clusters[i, :])
            cj = copy.copy(clusters[j, :])
            overlap = self.clusters_overlap(clusters[i, :], clusters[j, :])

            if overlap:

                ci_noise = clusters[i, -1]
                cj_noise = clusters[j, -1]

                if ci_noise:
                    clusters[i, -1] = 0

                merge_count += 1

                # Expand boundaries of cluster after merging
                if clusters[j, self.z0_low_idx] < clusters[i, self.z0_low_idx]:
                    clusters[i, self.z0_low_idx] = clusters[j, self.z0_low_idx]
                if clusters[j, self.z0_high_idx] > clusters[i, self.z0_high_idx]:
                    clusters[i, self.z0_high_idx] = clusters[j, self.z0_high_idx]

                # Add the pT of the cluster being merged.
                clusters[i, self.pt_idx] += clusters[j, self.pt_idx]

                # Erase merged cluster.
                clusters[j, self.pt_idx] = 0
                clusters[j, self.z0_low_idx] = 21
                clusters[j, self.z0_high_idx] = 21
                clusters[j, self.noise_idx] = 0

                self.record_merging(ci, cj, clusters[i, :])
                # print("----overlap detected------")
                # print(ci, cj, clusters[i, :], overlap)
                # print("--------------------------")
            # else:
            # print(ci, cj, overlap)

            # check if the pT_sum max is now higher
            if max_pt < clusters[i, self.pt_idx]:
                max_pt = clusters[i, self.pt_idx]
                max_pt_i = i

        self.max_pt = max_pt
        self.max_pt_i = max_pt_i
        self.merge_count = merge_count
        return clusters


BatchedDBSCAN.merge_clusters = merge_clusters

In [504]:
db2_skl.clusters_unmerged

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
0,4.330626,-4.042969,-4.042969,2.0
1,6.248494,-0.410156,-0.292969,2.0
2,10.609376,0.234375,0.410156,4.0
3,42.534632,-2.402344,-1.875,9.0
4,25.086727,-3.691406,-3.515625,5.0
5,6.515635,-3.046875,-2.929688,3.0
6,7.957812,-5.625,-5.507812,2.0
7,6.899173,-1.464844,-1.40625,2.0
8,4.852124,-4.921875,-4.921875,2.0
9,4.153777,6.503906,6.621094,2.0


In [505]:
db2_skl.clusters_merged

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
3,110.971176,-2.519531,-1.40625,0.0
4,104.907081,-3.808594,-3.398438,0.0
2,40.733637,-0.058594,0.410156,0.0
5,19.354282,-3.164062,-2.695312,0.0
19,17.044699,3.457031,3.632812,0.0
1,14.611591,-0.585938,-0.292969,0.0
11,12.665438,2.8125,3.105469,0.0
24,10.708842,-0.117188,0.058594,0.0
25,8.528573,5.742188,5.859375,0.0
17,8.141763,-1.054688,-0.9375,0.0


In [475]:
db2_skl.clusters_unmerged.values

array([[ 13.37,  -4.22,  -4.04,   0.  ],
       [ 17.67,  -0.59,  -0.29,   0.  ],
       [ 51.44,  -0.12,   0.41,   0.  ],
       [122.67,  -2.58,  -1.41,   0.  ],
       [104.91,  -3.81,  -3.4 ,   0.  ],
       [ 19.35,  -3.16,  -2.7 ,   0.  ],
       [  7.96,  -5.62,  -5.51,   2.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  4.85,  -4.92,  -4.92,   2.  ],
       [  6.15,   6.5 ,   6.62,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [ 14.75,   2.81,   3.11,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  2.7 ,  -5.1 ,  -5.1 ,   1.  ],
       [  2.54,   5.1 ,   5.1 ,   1.  ],
       [  6.42,   3.87,   3.87,   0.  ],
       [  8.14,  -1.05,  -0.94,   0.  ],
       [  5.26,   1.29,   1.35,   0.  ],
       [ 17.04,   3.46,   3.63,   0.  ],
       [  1.96,  -4.69,  -4.69,   1.  ],
       [  2.09,  -0.76,  -0.76,   1.  ],
       [  2.06,   7.03,   7.03,   1.  ],
       [  6.75,   1.99,   2.11,   0.  ],
       [  0.  , 

In [488]:
db2_skl.merged_list

['merging cluster [(-4.16, -4.04, 6.93)] and [(21.0, 21.0, 0.0)] --> [(-4.16, -4.04, 6.93)]',
 'merging cluster [(-0.59, -0.29, 14.61)] and [(21.0, 21.0, 0.0)] --> [(-0.59, -0.29, 14.61)]',
 'merging cluster [(0.23, 0.41, 24.13)] and [(21.0, 21.0, 0.0)] --> [(0.23, 0.41, 24.13)]',
 'merging cluster [(-0.06, 0.41, 31.85)] and [(21.0, 21.0, 0.0)] --> [(-0.06, 0.41, 31.85)]',
 'merging cluster [(-0.06, 0.41, 40.73)] and [(21.0, 21.0, 0.0)] --> [(-0.06, 0.41, 40.73)]',
 'merging cluster [(-2.4, -1.88, 64.23)] and [(21.0, 21.0, 0.0)] --> [(-2.4, -1.88, 64.23)]',
 'merging cluster [(-2.4, -1.58, 79.3)] and [(21.0, 21.0, 0.0)] --> [(-2.4, -1.58, 79.3)]',
 'merging cluster [(-2.52, -1.58, 81.76)] and [(21.0, 21.0, 0.0)] --> [(-2.52, -1.58, 81.76)]',
 'merging cluster [(-2.52, -1.58, 97.44)] and [(21.0, 21.0, 0.0)] --> [(-2.52, -1.58, 97.44)]',
 'merging cluster [(-2.52, -1.58, 102.68)] and [(21.0, 21.0, 0.0)] --> [(-2.52, -1.58, 102.68)]',
 'merging cluster [(-2.52, -1.41, 107.51)] and [(21.0,

In [474]:
clusters_merge_test = db2_skl.merge_clusters(db2_skl.clusters_unmerged.values)

0 1 not merged
0 2 not merged
0 3 not merged
0 4 not merged
0 5 not merged
0 6 not merged
0 7 not merged
0 8 not merged
0 9 not merged
0 10 not merged
0 11 not merged
0 12 merged
0 13 not merged
0 14 not merged
0 15 not merged
0 16 not merged
0 17 not merged
0 18 not merged
0 19 not merged
0 20 not merged
0 21 not merged
0 22 not merged
0 23 not merged
0 24 not merged
0 25 not merged
0 26 not merged
0 27 skipped
0 28 skipped
0 29 skipped
0 30 skipped
0 31 not merged
0 32 skipped
0 33 skipped
0 34 skipped
0 35 not merged
0 36 skipped
0 37 not merged
0 38 not merged
0 39 skipped
0 40 skipped
0 41 not merged
0 42 skipped
0 43 not merged
0 44 skipped
0 45 skipped
0 46 not merged
0 47 not merged
0 48 not merged
0 49 skipped
0 50 skipped
0 51 skipped
0 52 skipped
0 53 skipped
0 54 skipped
0 55 skipped
0 56 skipped
0 57 not merged
0 58 skipped
0 59 skipped
0 60 skipped
0 61 skipped
0 62 skipped
0 63 not merged
0 64 skipped
0 65 skipped
0 66 skipped
0 67 skipped
0 68 skipped
0 69 not merged
0 

In [436]:
# clusters_merge_test2 = db2_skl.merge_clusters(clusters_merge_test)

In [419]:
# clusters_merge_test3 = db2_skl.merge_clusters(clusters_merge_test2)

In [421]:
# clusters_merge_test4 = db2_skl.merge_clusters(clusters_merge_test3)

In [425]:
# np.array_equal(clusters_merge_test3, clusters_merge_test4)

True

In [470]:
db2_skl.comb

[(0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 5),
 (0, 6),
 (0, 7),
 (0, 8),
 (0, 9),
 (0, 10),
 (0, 11),
 (0, 12),
 (0, 13),
 (0, 14),
 (0, 15),
 (0, 16),
 (0, 17),
 (0, 18),
 (0, 19),
 (0, 20),
 (0, 21),
 (0, 22),
 (0, 23),
 (0, 24),
 (0, 25),
 (0, 26),
 (0, 27),
 (0, 28),
 (0, 29),
 (0, 30),
 (0, 31),
 (0, 32),
 (0, 33),
 (0, 34),
 (0, 35),
 (0, 36),
 (0, 37),
 (0, 38),
 (0, 39),
 (0, 40),
 (0, 41),
 (0, 42),
 (0, 43),
 (0, 44),
 (0, 45),
 (0, 46),
 (0, 47),
 (0, 48),
 (0, 49),
 (0, 50),
 (0, 51),
 (0, 52),
 (0, 53),
 (0, 54),
 (0, 55),
 (0, 56),
 (0, 57),
 (0, 58),
 (0, 59),
 (0, 60),
 (0, 61),
 (0, 62),
 (0, 63),
 (0, 64),
 (0, 65),
 (0, 66),
 (0, 67),
 (0, 68),
 (0, 69),
 (0, 70),
 (0, 71),
 (0, 72),
 (0, 73),
 (0, 74),
 (0, 75),
 (0, 76),
 (0, 77),
 (0, 78),
 (0, 79),
 (0, 80),
 (0, 81),
 (0, 82),
 (0, 83),
 (0, 84),
 (0, 85),
 (0, 86),
 (0, 87),
 (0, 88),
 (0, 89),
 (0, 90),
 (0, 91),
 (0, 92),
 (0, 93),
 (0, 94),
 (0, 95),
 (0, 96),
 (0, 97),
 (0, 98),
 (0, 99),
 (0, 100),
 (0, 101

In [466]:
clusters_merge_test

array([[ 13.37,  -4.22,  -4.04,   0.  ],
       [ 17.67,  -0.59,  -0.29,   0.  ],
       [ 51.44,  -0.12,   0.41,   0.  ],
       [142.02,  -3.16,  -1.41,   0.  ],
       [104.91,  -3.81,  -3.4 ,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  7.96,  -5.62,  -5.51,   2.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  4.85,  -4.92,  -4.92,   2.  ],
       [  6.15,   6.5 ,   6.62,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [ 14.75,   2.81,   3.11,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  2.7 ,  -5.1 ,  -5.1 ,   1.  ],
       [  2.54,   5.1 ,   5.1 ,   1.  ],
       [  6.42,   3.87,   3.87,   0.  ],
       [  8.14,  -1.05,  -0.94,   0.  ],
       [  5.26,   1.29,   1.35,   0.  ],
       [ 17.04,   3.46,   3.63,   0.  ],
       [  1.96,  -4.69,  -4.69,   1.  ],
       [  2.09,  -0.76,  -0.76,   1.  ],
       [  2.06,   7.03,   7.03,   1.  ],
       [  6.75,   1.99,   2.11,   0.  ],
       [  0.  , 

In [420]:
clusters_merge_test3

array([[ 13.37,  -4.22,  -4.04,   0.  ],
       [ 17.67,  -0.59,  -0.29,   0.  ],
       [ 51.44,  -0.12,   0.41,   0.  ],
       [142.02,  -3.16,  -1.41,   0.  ],
       [104.91,  -3.81,  -3.4 ,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  7.96,  -5.62,  -5.51,   2.  ],
       [  0.  ,  21.  ,  21.  ,   2.  ],
       [  4.85,  -4.92,  -4.92,   2.  ],
       [  6.15,   6.5 ,   6.62,   0.  ],
       [  0.  ,  21.  ,  21.  ,   1.  ],
       [ 14.75,   2.81,   3.11,   0.  ],
       [  0.  ,  21.  ,  21.  ,   0.  ],
       [  0.  ,  21.  ,  21.  ,   1.  ],
       [  2.7 ,  -5.1 ,  -5.1 ,   1.  ],
       [  2.54,   5.1 ,   5.1 ,   1.  ],
       [  6.42,   3.87,   3.87,   0.  ],
       [  8.14,  -1.05,  -0.94,   0.  ],
       [  5.26,   1.29,   1.35,   0.  ],
       [ 17.04,   3.46,   3.63,   0.  ],
       [  1.96,  -4.69,  -4.69,   1.  ],
       [  2.09,  -0.76,  -0.76,   1.  ],
       [  2.06,   7.03,   7.03,   1.  ],
       [  6.75,   1.99,   2.11,   0.  ],
       [  0.  , 

In [414]:
clusters_merge_test

array([[  6.93,  -4.16,  -4.04,   0.  ],
       [ 14.61,  -0.59,  -0.29,   0.  ],
       [ 40.73,  -0.06,   0.41,   0.  ],
       [110.97,  -2.52,  -1.41,   0.  ],
       [104.91,  -3.81,  -3.4 ,   0.  ],
       [ 19.35,  -3.16,  -2.7 ,   0.  ],
       [  7.96,  -5.62,  -5.51,   2.  ],
       [  6.9 ,  -1.46,  -1.41,   2.  ],
       [  4.85,  -4.92,  -4.92,   2.  ],
       [  6.15,   6.5 ,   6.62,   0.  ],
       [  2.09,  -2.58,  -2.58,   1.  ],
       [ 12.67,   2.81,   3.11,   0.  ],
       [  6.44,  -4.22,  -4.22,   0.  ],
       [  2.71,  -1.7 ,  -1.7 ,   1.  ],
       [  2.7 ,  -5.1 ,  -5.1 ,   1.  ],
       [  2.54,   5.1 ,   5.1 ,   1.  ],
       [  6.42,   3.87,   3.87,   0.  ],
       [  8.14,  -1.05,  -0.94,   0.  ],
       [  5.26,   1.29,   1.35,   0.  ],
       [ 17.04,   3.46,   3.63,   0.  ],
       [  1.96,  -4.69,  -4.69,   1.  ],
       [  2.09,  -0.76,  -0.76,   1.  ],
       [  2.06,   7.03,   7.03,   1.  ],
       [  6.75,   1.99,   2.11,   0.  ],
       [ 10.71, 

**THIS HAS BEEN FIXED IN ANOTHER NOTEBOOK**

# Testing the merge by brute-force

In [593]:
cu = db2_skl.clusters_unmerged.copy()

In [604]:
cur = cu.loc[cu.pt_sum > 0].sample(20, random_state=42).copy()

In [605]:
cur.reset_index(inplace=True, drop=True)

In [606]:
cur

Unnamed: 0,pt_sum,z0_low,z0_high,noise
0,25.086727,-3.691406,-3.515625,0.0
1,2.748021,4.042969,4.042969,1.0
2,2.792957,1.347656,1.347656,1.0
3,4.330626,-4.042969,-4.042969,0.0
4,13.517229,0.292969,0.351562,0.0
5,8.363097,-0.585938,-0.410156,0.0
6,2.085667,-2.578125,-2.578125,1.0
7,7.428988,3.574219,3.632812,0.0
8,2.350128,-4.21875,-4.21875,1.0
9,5.231855,-2.285156,-2.285156,0.0


So we have 20 clusters before merging. lets run it through the merging function several times to see if we converge to something.

In [607]:
cmr = db2_skl.merge_clusters(cur.values)

In [608]:
print(cmr)

[[25.09 -3.69 -3.52  0.  ]
 [ 2.75  4.04  4.04  1.  ]
 [ 2.79  1.35  1.35  1.  ]
 [ 4.33 -4.04 -4.04  0.  ]
 [13.52  0.29  0.35  0.  ]
 [ 8.36 -0.59 -0.41  0.  ]
 [ 2.09 -2.58 -2.58  1.  ]
 [ 7.43  3.57  3.63  0.  ]
 [ 6.44 -4.22 -4.22  0.  ]
 [ 5.23 -2.29 -2.29  0.  ]
 [ 2.81 -9.2  -9.2   1.  ]
 [ 4.71  0.59  0.64  0.  ]
 [ 6.15  6.5   6.62  0.  ]
 [ 0.   21.   21.    0.  ]
 [11.75 -3.16 -2.93  0.  ]
 [ 2.06  7.03  7.03  1.  ]
 [ 4.83 -1.46 -1.41  0.  ]
 [ 7.72 -0.06  0.12  0.  ]
 [ 0.   21.   21.    0.  ]
 [ 0.   21.   21.    0.  ]]


In [583]:
cmr2 = db2_skl.merge_clusters(cmr)

In [584]:
cmr2

array([[25.09, -3.69, -3.52,  0.  ],
       [ 2.75,  4.04,  4.04,  1.  ],
       [ 2.79,  1.35,  1.35,  1.  ],
       [ 4.33, -4.04, -4.04,  0.  ],
       [13.52,  0.29,  0.35,  0.  ],
       [ 8.36, -0.59, -0.41,  0.  ],
       [ 2.09, -2.58, -2.58,  1.  ],
       [ 7.43,  3.57,  3.63,  0.  ],
       [ 6.44, -4.22, -4.22,  0.  ],
       [ 5.23, -2.29, -2.29,  0.  ],
       [ 2.81, -9.2 , -9.2 ,  1.  ],
       [ 4.71,  0.59,  0.64,  0.  ],
       [ 6.15,  6.5 ,  6.62,  0.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [11.75, -3.16, -2.93,  0.  ],
       [ 2.06,  7.03,  7.03,  1.  ],
       [ 4.83, -1.46, -1.41,  0.  ],
       [ 7.72, -0.06,  0.12,  0.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  0.  ]])

In [585]:
cmr3 = db2_skl.merge_clusters(cmr2)

In [586]:
cm = pd.DataFrame(cmr, columns=["pt_sum", "z0_low", "z0_high", "noise"])

In [587]:
cm.sort_values(by="z0_low", inplace=True)

In [588]:
cm

Unnamed: 0,pt_sum,z0_low,z0_high,noise
10,2.812771,-9.199219,-9.199219,1.0
8,6.436254,-4.21875,-4.21875,0.0
3,4.330626,-4.042969,-4.042969,0.0
0,25.086727,-3.691406,-3.515625,0.0
14,11.751776,-3.164062,-2.929688,0.0
6,2.085667,-2.578125,-2.578125,1.0
9,5.231855,-2.285156,-2.285156,0.0
16,4.832341,-1.464844,-1.40625,0.0
5,8.363097,-0.585938,-0.410156,0.0
17,7.721261,-0.058594,0.117188,0.0


In [409]:
db2.noise_idx

3

In [386]:
db2.max_pt

0

In [387]:
db2.z0_pv

2.490234375

In [388]:
db2_skl.max_pt

110.97117602825165

In [390]:
db2_skl.clusters_merged

Unnamed: 0,pt_sum,z0_low,z0_high,ntracks
0,6.931237,-4.160156,-4.042969,0.0
1,14.611591,-0.585938,-0.292969,0.0
2,40.733637,-0.058594,0.410156,0.0
3,110.971176,-2.519531,-1.40625,0.0
4,104.907081,-3.808594,-3.398438,0.0
5,19.354282,-3.164062,-2.695312,0.0
6,7.957812,-5.625,-5.507812,2.0
7,6.899173,-1.464844,-1.40625,2.0
8,4.852124,-4.921875,-4.921875,2.0
9,6.150181,6.503906,6.621094,0.0


In [391]:
print("hello")

hello


In [395]:
db2.clusters_unmerged

array([[-5.62, -5.51,  7.96,  0.  ],
       [-5.1 , -5.1 ,  2.7 ,  1.  ],
       [-4.92, -4.92,  4.85,  0.  ],
       [-4.69, -4.69,  1.96,  1.  ],
       [-4.22, -4.22,  2.35,  1.  ],
       [-4.04, -4.04,  4.33,  0.  ],
       [-3.69, -3.52, 25.09,  0.  ],
       [-3.05, -2.93,  6.52,  0.  ],
       [-2.58, -2.58,  2.09,  1.  ],
       [-2.4 , -1.88, 42.53,  0.  ],
       [-1.7 , -1.7 ,  2.71,  1.  ],
       [-1.46, -1.41,  6.9 ,  0.  ],
       [-0.94, -0.94,  2.9 ,  1.  ],
       [-0.76, -0.76,  2.09,  1.  ],
       [-0.41, -0.29,  6.25,  0.  ],
       [ 0.06,  0.06,  1.97,  1.  ],
       [ 0.23,  0.41, 10.61,  0.  ],
       [ 0.88,  0.88,  4.87,  1.  ],
       [ 1.35,  1.35,  2.79,  1.  ],
       [ 2.11,  2.11,  4.8 ,  1.  ],
       [ 2.81,  2.81,  3.19,  1.  ],
       [ 3.57,  3.57,  3.3 ,  1.  ],
       [ 3.87,  3.87,  2.18,  1.  ],
       [ 5.1 ,  5.1 ,  2.54,  1.  ],
       [ 5.74,  5.74,  2.31,  1.  ],
       [ 6.5 ,  6.62,  4.15,  0.  ],
       [ 7.03,  7.03,  2.06,  1.  ],
 

In [392]:
db2.clusters

array([[-8.38, -9.2 , 72.46,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
       [ 0.  , 21.  , 21.  ,  0.  ],
       [ 0.  , 21.  , 21.  ,  1.  ],
 