In [81]:
import uproot
import awkward as ak
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from tqdm.notebook import tqdm
from collections import Counter

SEQ_LENGTH = 3
COLLIDING_BUNCHES = False

In [82]:
if COLLIDING_BUNCHES:
    filepath = f"data-sequences/output_1000_seq{SEQ_LENGTH}.root"

else:
    filepath = f"data-sequences/output_1000_seq{SEQ_LENGTH}_nc.root"

data = uproot.open(filepath + ":L1BMTFStubSequences").arrays()
df = ak.to_dataframe(data)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,nL1BMTFStub,L1BMTFStub_hwQual,L1BMTFStub_hwPhi,L1BMTFStub_hwPhiB,L1BMTFStub_hwEta,L1BMTFStub_hwQEta,L1BMTFStub_wheel,L1BMTFStub_sector,L1BMTFStub_station,orbitNumber,bunchCrossing,sequenceIndex
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,1,3,622,-300,0,0,-2,6,2,261882734,2463,0
1,0,1,3,623,-300,0,0,-2,6,2,261882734,2464,0
2,0,1,2,-22,-76,0,0,2,6,4,261882734,2465,0
3,0,2,6,1104,-152,0,0,1,3,4,261884234,2112,1
3,1,2,3,-176,-201,0,0,1,4,1,261884234,2112,1


In [83]:
dfgb = df.groupby("sequenceIndex")

index = 12

print(f"Index: {index}")
dfg = dfgb.get_group(index)
dfg_selected = dfg[["bunchCrossing", "L1BMTFStub_wheel", "L1BMTFStub_station"]]
print(dfg_selected)

print(f"\n\nSequence: {index}")
for sector, dfsector in dfg.groupby("L1BMTFStub_sector"):
    print(f'\tSector: {sector}, Wheel: {dfsector["L1BMTFStub_wheel"].values}, Station: {dfsector["L1BMTFStub_station"].values}, BX: {dfsector["bunchCrossing"].values}')

Index: 12
                bunchCrossing  L1BMTFStub_wheel  L1BMTFStub_station
entry subentry                                                     
36    0                  3138                 1                   3
      1                  3138                 1                   2
      2                  3138                 1                   4
      3                  3138                 2                   3
37    0                  3139                 1                   3
      1                  3139                 1                   1
38    0                  3140                 2                   4


Sequence: 12
	Sector: 2, Wheel: [1 1 1], Station: [3 2 3], BX: [3138 3138 3139]
	Sector: 3, Wheel: [1], Station: [4], BX: [3138]
	Sector: 11, Wheel: [2 1 2], Station: [3 1 4], BX: [3138 3139 3140]


In [84]:
[
    tuple(el.item() for el in key) 
    for key in dfg_selected.groupby(["bunchCrossing", "L1BMTFStub_station"]).indices
    ]

[(3138, 2), (3138, 3), (3138, 4), (3139, 1), (3139, 3), (3140, 4)]

In [85]:
def getSameBxTargetCollections(bxs):
    target_pairs = [
        [(bx, station) for station in range(ii, ii + SEQ_LENGTH)]
        for bx in bxs
        for ii in range(1, 4 - SEQ_LENGTH + 2)
    ]
    
    return target_pairs


# interesting ones !!!
def getMultiBxTargetCollections(bxs):
    target_pairs = [
        [(bx, station) for bx, station in zip(bxs, range(ii, ii + SEQ_LENGTH))]
        for ii in range(1, 4 - SEQ_LENGTH + 2)
    ]

    return target_pairs

def getInverseMultiBxTargetCollections(bxs):
    target_pairs = [
        [(bx, station) for bx, station in zip(bxs, range(ii + SEQ_LENGTH, ii, -1))]
        for ii in range(0, 4 - SEQ_LENGTH + 1)
    ]

    return target_pairs

In [86]:
multi_bx_selection = {}
inverse_multi_bx_selection = {}

for idx_seq, df_seq in tqdm(df.groupby("sequenceIndex")):
    pairs = [
        tuple(el.item() for el in key) 
        for key in df_seq.groupby(["bunchCrossing", "L1BMTFStub_station"]).indices
    ]

    multi_bx_collections = getMultiBxTargetCollections(df_seq["bunchCrossing"].unique().tolist())

    for tps in multi_bx_collections:
        if np.all([target_pair in pairs for target_pair in tps]):
            """
            Here we keep only the entries of the sequence sub-dataframe that 
            correspond to the identified (bx, station) pairs.
            This allows us then to groupby wheel and keep only the wheels that are
            at most distant 1 to one another.
            """
            mi = pd.MultiIndex.from_tuples(tps, names=["bunchCrossing", "L1BMTFStub_station"])
            df_temp = df_seq[
                df_seq.set_index(["bunchCrossing", "L1BMTFStub_station"]).index.isin(mi)
            ]

            if df_temp["L1BMTFStub_wheel"].max() - df_temp["L1BMTFStub_wheel"].min() > 1:
                if df_temp[df_temp["L1BMTFStub_wheel"] < df_temp["L1BMTFStub_wheel"].max()]["bunchCrossing"].nunique() < SEQ_LENGTH:
                    if df_temp[df_temp["L1BMTFStub_wheel"] > df_temp["L1BMTFStub_wheel"].min()]["bunchCrossing"].nunique() < SEQ_LENGTH:
                        continue

                    else:
                        df_temp = df_temp[df_temp["L1BMTFStub_wheel"] > df_temp["L1BMTFStub_wheel"].min()]
                        multi_bx_selection[idx_seq] = df_temp

                else:
                    df_temp = df_temp[df_temp["L1BMTFStub_wheel"] < df_temp["L1BMTFStub_wheel"].max()]
                    multi_bx_selection[idx_seq] = df_temp

            else:
                multi_bx_selection[idx_seq] = df_temp

    inverse_multi_bx_collections = getInverseMultiBxTargetCollections(df_seq["bunchCrossing"].unique().tolist())

    for tps in inverse_multi_bx_collections:
        if np.all([target_pair in pairs for target_pair in tps]):
            """
            Here we keep only the entries of the sequence sub-dataframe that 
            correspond to the identified (bx, station) pairs.
            This allows us then to groupby wheel and keep only the wheels that are
            at most distant 1 to one another.
            """
            mi = pd.MultiIndex.from_tuples(tps, names=["bunchCrossing", "L1BMTFStub_station"])
            df_temp = df_seq[
                df_seq.set_index(["bunchCrossing", "L1BMTFStub_station"]).index.isin(mi)
            ]

            if df_temp["L1BMTFStub_wheel"].max() - df_temp["L1BMTFStub_wheel"].min() > 1:
                if df_temp[df_temp["L1BMTFStub_wheel"] < df_temp["L1BMTFStub_wheel"].max()]["bunchCrossing"].nunique() < SEQ_LENGTH:
                    if df_temp[df_temp["L1BMTFStub_wheel"] > df_temp["L1BMTFStub_wheel"].min()]["bunchCrossing"].nunique() < SEQ_LENGTH:
                        continue

                    else:
                        df_temp = df_temp[df_temp["L1BMTFStub_wheel"] > df_temp["L1BMTFStub_wheel"].min()]
                        inverse_multi_bx_selection[idx_seq] = df_temp

                else:
                    df_temp = df_temp[df_temp["L1BMTFStub_wheel"] < df_temp["L1BMTFStub_wheel"].max()]
                    inverse_multi_bx_selection[idx_seq] = df_temp

            else:
                inverse_multi_bx_selection[idx_seq] = df_temp

  0%|          | 0/444 [00:00<?, ?it/s]

In [93]:
df_tot_mbs = pd.concat(multi_bx_selection, ignore_index=True)
df_tot_imbs = pd.concat(inverse_multi_bx_selection, ignore_index=True)

In [96]:
multi_bx_selection[242]

Unnamed: 0_level_0,Unnamed: 1_level_0,nL1BMTFStub,L1BMTFStub_hwQual,L1BMTFStub_hwPhi,L1BMTFStub_hwPhiB,L1BMTFStub_hwEta,L1BMTFStub_hwQEta,L1BMTFStub_wheel,L1BMTFStub_sector,L1BMTFStub_station,orbitNumber,bunchCrossing,sequenceIndex
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
726,2,3,4,-958,-227,64,0,0,5,2,262038481,2257,242
727,1,2,2,-549,148,0,0,0,10,3,262038481,2258,242
728,1,4,5,-321,142,0,0,0,10,4,262038481,2259,242
728,2,4,5,-321,142,0,0,0,10,4,262038481,2259,242


In [94]:
df_tot_mbs.head(50)

Unnamed: 0,nL1BMTFStub,L1BMTFStub_hwQual,L1BMTFStub_hwPhi,L1BMTFStub_hwPhiB,L1BMTFStub_hwEta,L1BMTFStub_hwQEta,L1BMTFStub_wheel,L1BMTFStub_sector,L1BMTFStub_station,orbitNumber,bunchCrossing,sequenceIndex
0,2,3,-176,-201,0,0,1,4,1,261884234,2112,1
1,3,2,-430,-211,0,0,1,4,2,261884234,2113,1
2,3,5,27,180,0,0,0,8,3,261884234,2114,1
3,2,2,-1077,-249,0,0,-2,5,2,261887293,2724,10
4,1,5,701,-194,0,0,-2,4,3,261887293,2725,10
5,3,6,616,161,0,0,-2,9,4,261887293,2726,10
6,4,3,202,399,0,0,1,2,2,261887998,3138,12
7,2,2,772,288,0,0,1,2,3,261887998,3139,12
8,1,2,-599,-279,0,0,2,11,4,261887998,3140,12
9,5,3,491,425,0,0,1,3,1,261888508,2506,14


In [88]:
if COLLIDING_BUNCHES:
    pkl_mbs = f"df_mbs_output_1000_seq{SEQ_LENGTH}.pkl"
    pkl_imbs = f"df_imbs_output_1000_seq{SEQ_LENGTH}.pkl"

else:
    pkl_mbs = f"df_mbs_output_1000_seq{SEQ_LENGTH}_nc.pkl"
    pkl_imbs = f"df_imbs_output_1000_seq{SEQ_LENGTH}_nc.pkl"

df_tot_mbs.to_pickle("selected-sequences/" + pkl_mbs)
df_tot_imbs.to_pickle("selected-sequences/" + pkl_imbs)