In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import math
import pandas as pd
import itertools
import copy
from sklearn.cluster import DBSCAN

In [130]:
pd.set_option("display.max_rows", None)

In [3]:
np.random.seed(42)

In [4]:
np.set_printoptions(precision=2)
np.set_printoptions(suppress=True)

In [132]:
z0_file = "/media/lucas/QS/binaries-trk/OldKF_TTbar_170K_quality-0-trk-z0.bin"
pt_file = "/media/lucas/QS/binaries-trk/OldKF_TTbar_170K_quality-0-trk-pt.bin"
z0 = np.fromfile(z0_file, dtype=np.float32)
pt = np.fromfile(pt_file, dtype=np.float32)

In [133]:
df = pd.DataFrame({"z0": z0, "pt": pt})

In [134]:
db = DBSCAN(eps=0.15, min_samples=2).fit(z0.reshape(-1, 1))

In [135]:
label = db.labels_

In [136]:
df["label"] = label

In [137]:
df["noise"] = 0

In [138]:
df.loc[df.label == -1, "noise"] = 1

In [139]:
df.noise.value_counts()

0    147
1     12
Name: noise, dtype: int64

In [140]:
df.head()

Unnamed: 0,z0,pt,label,noise
0,-0.878906,2.623967,0,0
1,3.339844,3.435026,1,0
2,4.921875,3.774908,2,0
3,-0.46875,2.641448,3,0
4,4.980469,2.320653,2,0


In [141]:
df.sort_values(by="z0", inplace=True)

In [142]:
df

Unnamed: 0,z0,pt,label,noise
67,-7.441406,2.149052,15,0
140,-7.441406,2.573483,15,0
76,-6.972656,3.295341,-1,1
153,-6.5625,3.295341,18,0
119,-6.503906,3.310079,18,0
135,-6.328125,2.251928,12,0
57,-6.328125,2.309069,12,0
62,-6.269531,2.014883,12,0
43,-6.152344,3.104588,12,0
147,-6.035156,1.980026,12,0


In [143]:
left = np.zeros(df.shape[0], dtype=bool)

In [144]:
left[0] = 1

In [145]:
for i in range(1, df.shape[0]):
    _t = z0[i]

    if _t - z0[i - 1] > 0.15:
        left[i] = 1
    else:
        left[i] = 0

In [146]:
bound = np.zeros(df.shape[0])
noise = np.zeros(df.shape[0])

In [147]:
for i in range(df.shape[0] - 1):
    check1 = left[i] and not left[i + 1]
    check2 = not left[i] and left[i + 1]
    noise_check = left[i] and left[i + 1]
    if check1 or check2:
        bound[i] = i
    elif noise_check:
        bound[i] = -1
        noise[i] = 1
    else:
        bound[i] = df.shape[0]

    if left[df.shape[0] - 1]:
        bound[df.shape[0] - 1] = df.shape[0]
    else:
        bound[df.shape[0] - 1] = df.shape[0] - 1

In [148]:
df["noise_check"] = noise_check

In [149]:
df["bound"] = bound

In [150]:
df.head(20)

Unnamed: 0,z0,pt,label,noise,noise_check,bound
67,-7.441406,2.149052,15,0,False,-1.0
140,-7.441406,2.573483,15,0,False,-1.0
76,-6.972656,3.295341,-1,1,False,2.0
153,-6.5625,3.295341,18,0,False,3.0
119,-6.503906,3.310079,18,0,False,4.0
135,-6.328125,2.251928,12,0,False,5.0
57,-6.328125,2.309069,12,0,False,-1.0
62,-6.269531,2.014883,12,0,False,7.0
43,-6.152344,3.104588,12,0,False,8.0
147,-6.035156,1.980026,12,0,False,-1.0


In [151]:
left

array([ True,  True,  True, False,  True, False,  True,  True, False,
        True,  True,  True, False,  True, False,  True, False, False,
        True, False,  True, False,  True,  True, False, False, False,
       False,  True, False,  True,  True,  True,  True, False,  True,
        True,  True, False, False,  True,  True,  True, False,  True,
        True, False,  True, False, False,  True, False,  True,  True,
       False, False, False, False,  True,  True, False,  True, False,
        True, False,  True, False, False,  True, False, False,  True,
       False,  True,  True, False, False,  True,  True, False, False,
       False,  True, False, False,  True, False, False,  True, False,
       False, False,  True, False,  True,  True, False,  True, False,
        True,  True, False, False,  True, False,  True, False,  True,
       False, False, False,  True, False, False, False, False,  True,
       False, False, False,  True, False, False,  True,  True, False,
       False,  True,

In [152]:
df["left"] = left

In [153]:
df.head()

Unnamed: 0,z0,pt,label,noise,noise_check,bound,left
67,-7.441406,2.149052,15,0,False,-1.0,True
140,-7.441406,2.573483,15,0,False,-1.0,True
76,-6.972656,3.295341,-1,1,False,2.0,True
153,-6.5625,3.295341,18,0,False,3.0,False
119,-6.503906,3.310079,18,0,False,4.0,True


In [154]:
df["left"] = df["left"].astype(int)

In [155]:
df.reset_index(drop=True, inplace=True)

In [156]:
df[["z0", "noise", "left"]].head()

Unnamed: 0,z0,noise,left
0,-7.441406,0,1
1,-7.441406,0,1
2,-6.972656,1,1
3,-6.5625,0,0
4,-6.503906,0,1


In [157]:
for i in range(df.shape[0]):
    if df["noise"][i] == 1:
        if i == 0:
            print("[x, " + str(df["left"][i]) + ", " + str(df["left"][i + 1]) + "]")
        elif i == df.shape[0] - 1:
            print(
                "[" + str(df["left"][i - 1]) + ", " + str(df["left"][i]) + ", " + "x]"
            )
        else:
            print(
                "["
                + str(df["left"][i - 1])
                + ", "
                + str(df["left"][i])
                + ", "
                + str(df["left"][i + 1])
                + "]"
            )
        # print(df['left'][i-1],df["left"][i], df['left'][i+1])

[1, 1, 0]
[0, 0, 1]
[0, 1, 1]
[0, 1, 0]
[1, 0, 1]
[0, 1, 1]
[1, 1, 0]
[1, 0, 0]
[1, 1, 0]
[0, 1, 0]
[1, 0, 1]
[0, 1, x]


# Use the standard function to generate the left boundaries

In [158]:
from acceleratedDBSCAN import AccDBSCAN

In [159]:
max_number_of_tracks = 232
max_number_of_tracks_power_2 = 256
max_number_of_tracks_log_2 = 8
eps = 0.15

In [160]:
db_acc = AccDBSCAN(z0, pt, eps, max_number_of_tracks, True)

In [161]:
db_acc.fit()

data padded
tracks built
prefix sum done
data initialized...
left boundaries found...
right boundaries found...
vertices found...
scan complete.


In [162]:
!ls -lhtr

total 23M
-rwxr-xr-x 1 lucas lucas 252K Jan 24 14:03  TracksPerPhiSector.png
-rwxr-xr-x 1 lucas lucas 376K Jan 24 14:03  TrackParameters.png
-rwxr-xr-x 1 lucas lucas 108K Jan 24 14:03  residuals.png
-rwxr-xr-x 1 lucas lucas 6.8K Jan 24 14:03  README.md
-rwxr-xr-x 1 lucas lucas 637K Jan 24 14:03  L1TriggerInputData.png
-rwxr-xr-x 1 lucas lucas 3.2K Jan 24 14:03  histograms.py
-rwxr-xr-x 1 lucas lucas 8.9K Jan 24 14:03  analysis.py
-rwxr-xr-x 1 lucas lucas  954 Jan 24 14:03  fasthisto_optimization.py
-rwxr-xr-x 1 lucas lucas 1.7K Jan 24 14:03  dbscan_optimization.py
-rwxr-xr-x 1 lucas lucas  921 Jan 24 14:03  convert_root_to_pickle.py
-rwxr-xr-x 1 lucas lucas 2.2K Jan 24 14:03  checkFastHisto.py
-rwxr-xr-x 1 lucas lucas 5.1K Jan 24 14:03  analyze_files.py
drwxr-xr-x 2 lucas lucas 4.0K Jan 24 14:03  plots
-rwxr-xr-x 1 lucas lucas  16K Mar 28 16:56  analyze_hdbscan.py
-rwxr-xr-x 1 lucas lucas  11K Apr 29 16:24  clusters.npy
-rw-rw-r-- 1 lucas lucas 2.5K May  3 13:35  boundaries.npy
-rw-rw-

In [163]:
left_acc = np.load("left_boundaries.npy")

In [164]:
left_acc.shape

(232,)

In [165]:
left_acc

array([ True, False,  True,  True, False,  True, False, False, False,
       False,  True, False, False, False, False, False, False,  True,
        True, False, False, False, False,  True, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False,  True,  True, False,  True,  True,
        True,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False,  True, False, False,  True,  True,
        True, False,

In [166]:
df.shape[0]

159

In [167]:
left_acc[0:139]

array([ True, False,  True,  True, False,  True, False, False, False,
       False,  True, False, False, False, False, False, False,  True,
        True, False, False, False, False,  True, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False,  True,  True, False,  True,  True,
        True,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False,  True, False, False,  True,  True,
        True, False,

In [186]:
df["left_acc"] = left_acc[0 : df.shape[0]]

In [187]:
df["left_acc"] = df["left_acc"].astype(int)

In [188]:
df

Unnamed: 0,z0,pt,label,noise,noise_check,bound,left,left_acc
0,-7.441406,2.149052,15,0,False,-1.0,1,1
1,-7.441406,2.573483,15,0,False,-1.0,1,0
2,-6.972656,3.295341,-1,1,False,2.0,1,1
3,-6.5625,3.295341,18,0,False,3.0,0,1
4,-6.503906,3.310079,18,0,False,4.0,1,0
5,-6.328125,2.251928,12,0,False,5.0,0,1
6,-6.328125,2.309069,12,0,False,-1.0,1,0
7,-6.269531,2.014883,12,0,False,7.0,1,0
8,-6.152344,3.104588,12,0,False,8.0,0,0
9,-6.035156,1.980026,12,0,False,-1.0,1,0


In [189]:
df[["z0", "noise", "left", "left_acc"]].head(20)

Unnamed: 0,z0,noise,left,left_acc
0,-7.441406,0,1,1
1,-7.441406,0,1,0
2,-6.972656,1,1,1
3,-6.5625,0,0,1
4,-6.503906,0,1,0
5,-6.328125,0,0,1
6,-6.328125,0,1,0
7,-6.269531,0,1,0
8,-6.152344,0,0,0
9,-6.035156,0,1,0


In [205]:
col = "left_acc"

for i in range(df.shape[0]):
    if df["noise"][i] == 1:
        if i == 0:
            print("N: [x, " + str(df[col][i]) + ", " + str(df[col][i + 1]) + "]")
        elif i == df.shape[0] - 1:
            print("N: [" + str(df[col][i - 1]) + ", " + str(df[col][i]) + ", " + "x]")
        else:
            print(
                "N: ["
                + str(df[col][i - 1])
                + ", "
                + str(df[col][i])
                + ", "
                + str(df[col][i + 1])
                + "]"
            )
        # print(df['left'][i-1],df["left"][i], df['left'][i+1])
    else:
        if i == 0:
            print("C: [x, " + str(df[col][i]) + ", " + str(df[col][i + 1]) + "]")
        elif i == df.shape[0] - 1:
            print("C: [" + str(df[col][i - 1]) + ", " + str(df[col][i]) + ", " + "x]")
        else:
            print(
                "C: ["
                + str(df[col][i - 1])
                + ", "
                + str(df[col][i])
                + ", "
                + str(df[col][i + 1])
                + "]"
            )

C: [x, 1, 0]
C: [1, 0, 1]
N: [0, 1, 1]
C: [1, 1, 0]
C: [1, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 1]
N: [0, 1, 1]
C: [1, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 1]
C: [0, 1, 0]
C: [1, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]
C: [0, 0, 0]

In [191]:
for i in range(df.shape[0] - 1):
    l1 = df[col][i]
    l2 = df[col][i + 1]

    if (l1 == 1) and (l2 == 1):
        print(df["noise"][i])

1
1
1
1
1
1
1
1
1
1
1


In [192]:
max_tracks = 5

In [193]:
boundaries = np.zeros((max_tracks, 1))

In [194]:
for i in range(max_tracks - 1):
    print(i)

0
1
2
3


In [195]:
boundaries[3]

array([0.])

In [196]:
a = np.array([2, 1, 4, 3])
b = np.array([1, 2, 3, 4])

In [197]:
idx = a.argsort()

In [198]:
a[idx]

array([1, 2, 3, 4])

In [199]:
b[idx]

array([2, 1, 4, 3])

In [200]:
idx

array([1, 0, 3, 2])

In [201]:
np.argmax(a)

2

In [202]:
a[np.argmax(a)]

4

In [203]:
df

Unnamed: 0,z0,pt,label,noise,noise_check,bound,left,left_acc
0,-7.441406,2.149052,15,0,False,-1.0,1,1
1,-7.441406,2.573483,15,0,False,-1.0,1,0
2,-6.972656,3.295341,-1,1,False,2.0,1,1
3,-6.5625,3.295341,18,0,False,3.0,0,1
4,-6.503906,3.310079,18,0,False,4.0,1,0
5,-6.328125,2.251928,12,0,False,5.0,0,1
6,-6.328125,2.309069,12,0,False,-1.0,1,0
7,-6.269531,2.014883,12,0,False,7.0,1,0
8,-6.152344,3.104588,12,0,False,8.0,0,0
9,-6.035156,1.980026,12,0,False,-1.0,1,0
