# Finding Anomalies for an Entire Era
This notebook is for compiling an Excel file detailing all of the anomalies in a single era. 

Specify the Era

1. Load the appropriate model
2. Import all runs/lumisections that pass the DCS flags
3. Predict on the lumisections
4. Loop over each run
    * Split the long combined lumisection, data, and predictions arrays with their respective run number
    * Store the predictions with the specific run in a dictionary
    * Keep a running list of each dictionary
5. Loop over each run
    * Calculate the losses and binary losses (Do this in separate loop so we can change the loss threshold)
6. Loop over each run and analyze the anomalies
    * Try to figure out a data storage format that we can run through the normal Excel creation format
7. Create an plot for each anomalous lumisection

Folders
* Folder --> /eos/user/a/alaperto/SWAN_projects/NMFolder/models/
* Data by Lyka --> /eos/user/l/llambrec/dialstools-output/
* Original Jake --> /eos/user/j/jomorris/SWAN_projects/NMF Testing/

In [22]:
import sys, inspect
import importlib
import functions
importlib.reload(functions)
from functions import *

called = set()

def trace_calls(frame, event, arg):
    if event != "call":
        return
    func = frame.f_code
    filename = inspect.getsourcefile(func)
    if filename and filename.endswith("functions.py"):
        called.add(func.co_name)
    return trace_calls

sys.settrace(trace_calls)


In [23]:
%%time
# imports

import os
import sys
import json
import time
import joblib
import importlib
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from nmf2d import NMF2D

#import functions
#importlib.reload(functions);
#from functions import *

optimized_powerGroupStringsList = np.array(['FPix_BmO_D3_ROG4','FPix_BmO_D2_ROG4','FPix_BmO_D1_ROG4','FPix_BmO_D3_ROG3','FPix_BmO_D2_ROG3','FPix_BmO_D1_ROG3','FPix_BmO_D3_ROG2','FPix_BmO_D2_ROG2','FPix_BmO_D1_ROG2','FPix_BmO_D3_ROG1','FPix_BmO_D2_ROG1','FPix_BmO_D1_ROG1','FPix_BmI_D3_ROG1','FPix_BmI_D2_ROG1','FPix_BmI_D1_ROG1','FPix_BmI_D3_ROG2','FPix_BmI_D2_ROG2','FPix_BmI_D1_ROG2','FPix_BmI_D3_ROG3','FPix_BmI_D2_ROG3','FPix_BmI_D1_ROG3','FPix_BmI_D3_ROG4','FPix_BmI_D2_ROG4','FPix_BmI_D1_ROG4','FPix_BpO_D1_ROG4','FPix_BpO_D2_ROG4','FPix_BpO_D3_ROG4','FPix_BpO_D1_ROG3','FPix_BpO_D2_ROG3','FPix_BpO_D3_ROG3','FPix_BpO_D1_ROG2','FPix_BpO_D2_ROG2','FPix_BpO_D3_ROG2','FPix_BpO_D1_ROG1','FPix_BpO_D2_ROG1','FPix_BpO_D3_ROG1','FPix_BpI_D1_ROG1','FPix_BpI_D2_ROG1','FPix_BpI_D3_ROG1','FPix_BpI_D1_ROG2','FPix_BpI_D2_ROG2','FPix_BpI_D3_ROG2','FPix_BpI_D1_ROG3','FPix_BpI_D2_ROG3','FPix_BpI_D3_ROG3','FPix_BpI_D1_ROG4','FPix_BpI_D2_ROG4','FPix_BpI_D3_ROG4'])
#A list of all of the quarters of the detector
QUARTERS = np.array([['FPix_BmI_D3_ROG1','FPix_BmI_D3_ROG2','FPix_BmI_D3_ROG3','FPix_BmI_D3_ROG4','FPix_BmI_D2_ROG1','FPix_BmI_D2_ROG2','FPix_BmI_D2_ROG3','FPix_BmI_D2_ROG4','FPix_BmI_D1_ROG1','FPix_BmI_D1_ROG2','FPix_BmI_D1_ROG3','FPix_BmI_D1_ROG4'], ['FPix_BmO_D3_ROG1','FPix_BmO_D3_ROG2','FPix_BmO_D3_ROG3','FPix_BmO_D3_ROG4','FPix_BmO_D2_ROG1','FPix_BmO_D2_ROG2','FPix_BmO_D2_ROG3','FPix_BmO_D2_ROG4','FPix_BmO_D1_ROG1','FPix_BmO_D1_ROG2','FPix_BmO_D1_ROG3','FPix_BmO_D1_ROG4'], ['FPix_BpI_D1_ROG1','FPix_BpI_D1_ROG2','FPix_BpI_D1_ROG3','FPix_BpI_D1_ROG4','FPix_BpI_D2_ROG1','FPix_BpI_D2_ROG2','FPix_BpI_D2_ROG3','FPix_BpI_D2_ROG4','FPix_BpI_D3_ROG1','FPix_BpI_D3_ROG2','FPix_BpI_D3_ROG3','FPix_BpI_D3_ROG4'], ['FPix_BpO_D1_ROG1','FPix_BpO_D1_ROG2','FPix_BpO_D1_ROG3','FPix_BpO_D1_ROG4','FPix_BpO_D2_ROG1','FPix_BpO_D2_ROG2','FPix_BpO_D2_ROG3','FPix_BpO_D2_ROG4','FPix_BpO_D3_ROG1','FPix_BpO_D3_ROG2','FPix_BpO_D3_ROG3','FPix_BpO_D3_ROG4']])

CPU times: user 1.55 ms, sys: 0 ns, total: 1.55 ms
Wall time: 1.8 ms


|        2024 Era     | C | D | E | E | F |   F  |   G  |   H  |   I  |   I  |
|:-------------------:|:-:|:-:|:-:|:-:|:-:|:----:|:----:|:----:|:----:|:----:|
|       Version       | 1 | 1 | 1 | 2 | 1 |   1  |   1  |   1  |   1  |   2  |
|        Period       | 1 | 1 | 1 | 1 | 1 |   2  |   2  |   2  |   2  |   2  |
|    Model Ring 1       | 1 | 1 | 1 | 1 | 1 |   2  |   2  |   2  |   2  |   2  |
|    Model Ring 2       | 5 | 5 | 5 | 5 | 5 |   7  |   7  |   7  |   7  |   7  |

|        2025 Era     | C | C | C | D | E | F |   G  |   H  |   I  |   J  |
|:-------------------:|:-:|:-:|:-:|:-:|:-:|:-:|:----:|:----:|:----:|:----:|
|       Version       | 1 | 1 | 2 | 1 | 1 | 1 |   1  |   1  |   1  |   1  |
|        Period       | 3 | 4 | 4 | 4 | 4 | X |   X  |   X  |   X  |   X  |
|    Model Ring 1       | 3 | 8 | 8 | 8 | 8 | X |   X  |   X  |   X  |   X  |
|    Model Ring 2       | 7 | 7 | 7 | 7 | 7 |     |     |     |     |     |


# Important Variables

In [24]:
#Select the era: import and search for anomalies
RING = 2
YEAR, ERA, VERSION, PERIOD = 2024, "I", 1, 2

#ring 1 --> 1 - 2 - 3 - 4
#ring 2 --> 5 - 7 - 7 - 7
number, model_period, type = 7, 3, 1
model_name = f'model_{number}_PXRing_{RING}_period_{model_period}_type_{type}.pkl'

#For both
ANOMALY_CUTOFF = 40 #Threshold on fraction of powergroup that is Bad --> Define LS as anomalous

# Model thresholds Ring 1
if RING == 1:
    EDITION = 1
    if YEAR == 2024:
        LOSS_THRESHOLD = 4e5 #Threshold on Loss of the ROC --> Define ROC as Bad
    elif YEAR == 2025:
        LOSS_THRESHOLD = 9e5
    #EDITION = 4 #Special test for 900 LS, model 8
# Model thresholds Ring 2
elif RING == 2:
    EDITION = 2
    LOSS_THRESHOLD = 1e5 #Threshold on Loss of the ROC --> Define ROC as Bad
    #EDITION = 3 #Special for splitting in half era 2024G_v1 and 2025D_v1

#Plotting Globals
DO_PLOTTING = False #Whether or not to plot EVERY anomalous lumisection in this era. WARNING: Takes a long time. ~13mins for 93 anomalies
SAVE_FIGS = False #Whether or not to also SAVE the plot of every anomalous lumisection. DO_PLOTTING must also be True for the images to be saved

#Some light calculation of important variables
file = f'../data/ZeroBias-Run{YEAR}{ERA}-PromptReco-v{VERSION}-DQMIO-PixelPhase1-Phase1_MechanicalView-PXForward-clusters_per_SignedDiskCoord_per_SignedBladePanelCoord_PXRing_{RING}.parquet'
oms_json = f'../omsdata/omsdata_Run{YEAR}{ERA}-v{VERSION}.json'
#ring_num = int(file[-9]) #The -9th character is ALWAYS the ring number for our data
ring_num = RING

#The directory name to use for 
DIR_NAME = f"../results/output_{YEAR}{ERA}_v{VERSION}_period_{PERIOD}_PXRing_{RING}_edition_{EDITION}" 
if not os.path.exists(DIR_NAME): os.makedirs(DIR_NAME)

## 1. Load the Model

In [25]:
nmf_file = f'../models/{model_name}'
nmf = joblib.load(nmf_file)

print(f"Loaded Model: {nmf_file}")
print(f"Model Shape: {nmf.xshape}")

Loaded Model: ../models/model_7_PXRing_2_period_3_type_1.pkl
Model Shape: [136, 48]


## 2. Import all runs/lumisections that pass the DCS flags
Import the entire era at once so we only have to go to the disk once. 

Use the OMS JSON to filter only the lumisections that pass the DCS flags. Helps to reduce unnecessary predictions on bad lumisections

In [26]:
%%time
#Any logic for when parts of the detector are disabled. 
extra_filters = []
#Logic for Era F period 1/2 and 2025 Era Cv1 period 3/4
if YEAR == 2024 and ERA == "F" and PERIOD == 1:
    extra_filters.append(('run_number', '<', 382799))
elif YEAR == 2024 and ERA == "F" and PERIOD == 2:
    extra_filters.append(('run_number', '>=', 382799))
elif YEAR == 2024 and ERA == "G" and RING == 2 and EDITION == 2: #Split in half 2024G_v1, too heavy
    extra_filters.append(('run_number', '<', 384684))
elif YEAR == 2024 and ERA == "G" and RING == 2 and EDITION == 3: #Split in half 2024G_v1, too heavy
    extra_filters.append(('run_number', '>', 384684))
elif YEAR == 2025 and ERA == "C" and VERSION == 1 and PERIOD == 3:
    extra_filters.append(('run_number', '<=', 392668))
elif YEAR == 2025 and ERA == "C" and VERSION == 1 and PERIOD == 4:
    extra_filters.append(('run_number', '>', 392668))
elif YEAR == 2025 and ERA == "C" and VERSION == 2 and PERIOD == 4:
    extra_filters.append(('run_number', '<', 393512))#Machine Development runs
elif YEAR == 2025 and ERA == "D" and RING == 2 and EDITION == 2: #Split in half 2025D_v1, too heavy
    extra_filters.append(('run_number', '<', 395432))
elif YEAR == 2025 and ERA == "D" and RING == 2 and EDITION == 3: #Split in half 2025D_v1, too heavy
    extra_filters.append(('run_number', '>', 395432))

#Testing the function to import AND filter an entire era at once
multi_lumi_data, all_runs, lumis, df = extract_data_whole_era(file, oms_json, extra_filters=extra_filters)
del df
all_runs, indices = np.unique(all_runs, return_index=True)
indices = indices[1:] #The first index in indices is always 0, since the first number is always unique, so we discard that. 
print(f"There are {len(all_runs)} Runs and {len(lumis)} Lumisections that Pass All DCS Flags: \n", all_runs)

There are 26 Runs and 13941 Lumisections that Pass All DCS Flags: 
 [386478 386505 386508 386509 386553 386554 386592 386593 386594 386604
 386605 386614 386615 386616 386617 386618 386629 386630 386640 386642
 386661 386668 386672 386673 386679 386693]
CPU times: user 10.1 s, sys: 5.68 s, total: 15.7 s
Wall time: 23.5 s


## 3. Predict on the lumisections

In [27]:
%%time

# Predict or not predict (30 mins for 100k LS)
FORCE_PREDICT = False

#Remove the cross so we can predict on it
multi_lumi_data_no_cross = remove_cross(multi_lumi_data, RING)

#Save predictions array to a file (since they take so long to produce)
pred_filename = f"{DIR_NAME}/Predictions"

if os.path.exists(pred_filename + '.npy') and FORCE_PREDICT == False:
    print(f"Predictions Already Exist!")
    print(f"Loading predictions from {pred_filename}.npy")
    mes_pred = np.load(pred_filename + '.npy')
    print(f"Shape: {mes_pred.shape}")
else:
    #If we don't already have a prediction directory then make it
    if FORCE_PREDICT: 
        print(f"FORCE_PREDICT is True. ---> Overwrite file at {pred_filename}.npy")
    else:
        print("Predictions don't exist yet! Starting Prediction. ")

    #Predict on the data
    print(f'Predicting...')
    start_time = time.time()
    mes_pred = nmf.predict(multi_lumi_data_no_cross)
    np.save(pred_filename, mes_pred)
    print(f"Done Predicting in {time.time() - start_time} Seconds!\n")

Predictions Already Exist!
Loading predictions from ../results/output_2024I_v1_period_2_PXRing_2_edition_2/Predictions.npy
Shape: (13941, 136, 48)
CPU times: user 486 ms, sys: 916 ms, total: 1.4 s
Wall time: 1.44 s


## 4. Loop over each run
    * Split the long combined lumisection, data, and predictions arrays with their respective run number
    * Store the predictions with the specific run in a dictionary
    * Keep a running list of each dictionary

In [28]:
%%time
#Verbose=0: No prints. Verbose=1: Some prints. Verbose>=2: Some very long prints
verbose = 0

#Split the long combined arrays of the lumisections, the data, and the predictions
lumisections = np.split(lumis, indices)
data_arr = np.split(multi_lumi_data_no_cross, indices)
pred_arr = np.split(mes_pred, indices)

#print(pred_arr[1].shape)

#Now our data is splitted into arrays by run number, we can loop over the run numbers, generate our data_dicts and put them in a data_dict_list.
#So it is in a format ready to be used to calculate the losses and anomalies. 
#Create a list to store all of the lumisections and predictions
data_dict_list = list(np.empty_like(all_runs)) 
for index, run_number in enumerate(all_runs):
    data_dict = {}
    data_dict["run_number"] = run_number
    data_dict["lumisections"] = lumisections[index]
    data_dict["data"] = data_arr[index]
    data_dict["predictions"] = pred_arr[index]
    #Add the cross into the predictions and data
    #data_dict["data_cross"] = add_cross(data_arr[index])
    #data_dict["predictions_cross"] = add_cross(pred_arr[index])
    
    #Add this run to the data dict list
    data_dict_list[index] = data_dict    
    #Printing
    if verbose>0:
        print(f"Run Number: {run_number}")
        print(f"\tThere are {len(lumisections[index])} Extracted Lumisections:\n\t{lumisections[index]}\n")
    if verbose>1:
        print(data_dict)
        
if verbose>0:
    print(data_dict_list[0])
elif verbose>1:
    print(data_dict_list)

CPU times: user 12.6 ms, sys: 7.94 ms, total: 20.5 ms
Wall time: 40.2 ms


## 5. Loop over each run
    * Calculate the losses and binary losses (Do this in separate loop so we can change the loss threshold)

In [29]:
%%time

#Loop over each data dictionary in the list and calculate the losses and binary losses. 
#Add these to the dictionaries as we go along. 
for index, data_dict in enumerate(data_dict_list):
    #Extract the needed info from the data_dict
    multi_lumi_data_no_cross = data_dict["data"]
    mes_pred = data_dict["predictions"]
    
    #Calculate losses
    losses = np.square(multi_lumi_data_no_cross - mes_pred)
    losses_binary = (losses > LOSS_THRESHOLD).astype(int)
    
    #Add the crosses back
    #losses_cross = add_cross(losses)
    losses_binary_cross = add_cross(losses_binary)
    
    #Add new entries to the data_dict
    #This will automatically update the dictionaries in the data_dict_list
    #data_dict["losses"] = losses_cross
    data_dict["losses_binary"] = losses_binary_cross

CPU times: user 4.29 s, sys: 10.3 ms, total: 4.3 s
Wall time: 4.36 s


## 6. Loop over each run and analyze the anomalies. 

### Analyze Each Lumisection of Each Run for Anomalies

In [30]:
%%time

testingtime = False
verbose = False

#Create lists for tracking the anomalous lumisections in all of the runs
all_anomalous_runs = []
all_anomalous_lumisections = []
all_anomalous_powergroups = []

for i, data_dict in enumerate(data_dict_list):
    #Extract the required info from the data_dict
    run_number = data_dict["run_number"]
    lumisections = data_dict["lumisections"]
    losses_binary_cross = data_dict["losses_binary"]
    
    #Currently a list of anomalous lumisections and their powergroups
    #These arrays are prevented from getting out of sync by still appending 
    #the anomalous lumisection even if that lumisection was already marked bad with a different powergroup
    anomalous_lumisections = []
    anomalous_powergroups = []

    for index, lumisection in enumerate(lumisections):
        if verbose: print(f"Index: {index} \t Lumisection: {lumisection}")

        for j, powergroup in enumerate(optimized_powerGroupStringsList):
            #if not testingtime: print(f"Power Group String: {powergroup}")
            powerGroupSlice, diskSlice = powerGroupToIndex(powergroup, RING)

            #Access each power group in each lumisection and see if more than 40% of the bins are on
            #A bit ugly, but this was the fastest way I found. Saved about .1 seconds over saving the powergroup data to another variable. 
            if int(np.sum(losses_binary_cross[index, powerGroupSlice, diskSlice].flatten())) >= int(ANOMALY_CUTOFF/100 * losses_binary_cross[index, powerGroupSlice, diskSlice].flatten().size):
                if verbose: print(f"Anomalous Power Group: {powergroup} \t in Lumisection: {lumisection} \t with Binary Sum: {np.sum(powerGroup_data)}")
                all_anomalous_runs.append(run_number)
                anomalous_lumisections.append(lumisection)
                anomalous_powergroups.append(powergroup)
                
            #This is used to pull out specific lumisection and check their binary loss occupancy
            #if run_number == 379660 and lumisection == 291:
            #    print(f"Powergroup: {powergroup}")
            #    print(f"Powergroup Size:{losses_binary_cross[index, powerGroupSlice, diskSlice].flatten().size}")
            #    print(f"Sum of Binary Loss:{np.sum(losses_binary_cross[index, powerGroupSlice, diskSlice].flatten())}\n")
            #    save_digis_png(losses_binary_cross[index], run_number, lumisection, RING)
                
    #Update the data_dict with the anomalous lumisections and powergroups
    #I don't have a plan for them currently, but it could be useful
    data_dict["anomalous_lumisections"] = np.array(anomalous_lumisections)
    data_dict["anomalous_powergroups"] = np.array(anomalous_powergroups)
    
    #EXTEND the anomalous lumisections and powegroups to the ALL list. Extend keeps the list flat
    all_anomalous_lumisections.extend(anomalous_lumisections)
    all_anomalous_powergroups.extend(anomalous_powergroups)
    
# print(all_anomalous_runs)
# print(all_anomalous_lumisections)
# print(all_anomalous_powergroups)
# print(data_dict_list[-2])
#num_anomalous_lumisections, all_anomalous_lumisections_unique = calcNumAnomalousLumisections(data_dict_list)
print(f'Resuming results...')
print(f"Era {YEAR}{ERA}_v{VERSION} (Run {all_runs[0]} to Run {all_runs[-1]})")
#print(f"{num_anomalous_lumisections} Anomalous Lumisections")
#For some reason the above number can disagree with the sum of the Num_LS column in Excel. But they are never far off.
#Might happen due to lumisections appearing twice in the excel file. Like when a single lumisection has both a single disk anomaly and a multi disk anomaly
#The lumisection could be double counted. There may be more cases where this happens. 

Resuming results...
Era 2024I_v1 (Run 386478 to Run 386693)
CPU times: user 2min 34s, sys: 1min 18s, total: 3min 52s
Wall time: 8min 20s


## Identify Anomaly Types
This section will identify runs in lumisections that stay the same then compare the powergroups and see if there are any multi-disk anomalies. 

If there are no repeating lumisections, then it is just a single disk anomaly. 

### Create a file summarizing all lumisections and their anomalies
#### Run_Number    Lumisection    PRT    Disk    Ring_Num    Anomaly_Type

In [31]:
%%time
#Identify multi disk anomalies
verbose = 0
#Create a pandas dataframe that we can use to track EACH anomalous lumisection in each run
headers = ["Run_Number", "Lumisection", "Powergroup", "Disk", "Ring_Num", "Anomaly_Type"]
all_detailed_anomaly_df = pd.DataFrame(columns=headers)

#Loop over each data dict in the data dict list
for index, data_dict in enumerate(data_dict_list):
    #Extract the relavant information
    run_number = data_dict["run_number"]
    anomalous_lumisections = data_dict["anomalous_lumisections"]
    anomalous_powergroups = data_dict["anomalous_powergroups"]
    
    #If there are no anomaous lumisections or powergroups then stop this iteration
    if anomalous_lumisections.size == 0 or anomalous_powergroups.size == 0:
        continue
    

    #Create dataframe of anomalous lumisections and powergroups
    anomaly_df = pd.DataFrame({"lumisections": anomalous_lumisections, "powergroups": anomalous_powergroups})

    if verbose>0: print("Anomaly Dataframe: \n", anomaly_df, '\n')

    #Create unique arrays to pare down duplicate data
    anomalous_lumisections_unique = np.unique(anomalous_lumisections)

    #Create a list of dictionaries for easier saving to text
    dictList = np.empty_like(anomalous_lumisections_unique, dtype=dict)

    detailed_anomaly_df = pd.DataFrame(columns=headers)

    # print("AHHHHHHH\n", detailed_anomaly_df)
    if verbose>0: print('-----------------------------------------')
    #Loop over each unique lumisection
    for index, lumisection in enumerate(anomalous_lumisections_unique):
        #These values are the same for single/multi disk anomalies
        dataDict = dict.fromkeys(headers)
        dataDict["Run_Number"] = run_number
        dataDict["Lumisection"] = lumisection
        dataDict["Ring_Num"] = ring_num

        #Get the lumisection and all of the anomaly powergroups
        #If there is only one powergroup, mark it Single Disk, preparing the detailed Pandas anomaly dataframe, then move on
        #If there is multiple powergroups, iterate through each pair, breaking on the first Multi-disk anomaly after preparing the detailed Pandas anomaly dataframe
        #If there is no Multi-disk anomaly despite there being multiple anomalies in one lumisection, prepare the detailed Pandas anomaly dataframe with EACH anomaly


        dataframe = anomaly_df[anomaly_df["lumisections"] == lumisection]
        if verbose>0: print(dataframe, '\n')
        powergroups = dataframe["powergroups"].to_list()
        if verbose>0: print(f"Powergroups: {powergroups}")

        #If there are 12 anomalous powergroups in one lumisection, check if it's a whole quarter out
        if len(powergroups) == 12:
            for quarter in QUARTERS:
                #If all of the powergroups are in one quarter, then we can save and break early
                if np.all(np.isin(powergroups, quarter)):
                    #Fill in the data dict
                    m_or_p, I_or_O, disk_number, part_number = analyzePowerGroupString(powergroups[0])
                    dataDict["Powergroup"] = ':'.join(powergroups) #Make a string of each powergroup separated by colons. A char not typically used in csv's. 
                    dataDict["Disk"] = "-1:-2:-3" if disk_number < 0 else "1:2:3"
                    #dataDict["Anomaly_Type"] = "Whole Quarter"
                    dataDict["Anomaly_Type"] = "Multi-Disk"
                    dataFrame = pd.Series(dataDict).to_frame().T
                    #print(dataFrame)
                    detailed_anomaly_df = pd.concat([detailed_anomaly_df, dataFrame])
                    #break out of this for loop for the quarters
                    break
            #then continue to the next unique anomaly
            continue

        #If there is only one powergroup, mark it as a Single disk anomaly
        if len(powergroups) == 1:
            #Fill in the data dict
            m_or_p, I_or_O, disk_number, part_number = analyzePowerGroupString(powergroups[0])
            dataDict["Powergroup"] = powergroups[0]
            dataDict["Disk"] = disk_number
            dataDict["Anomaly_Type"] = "Single-Disk"
            #Convert the data dict to a pandas dataframe and concat it to the detailed data frame 
            #(NOTE: If there are many lumisections, it is technically more effificient to create a list of these dataDicts and concat those all at once)
            if verbose>1: print("DATA DICT:", dataDict)
            dataFrame = pd.Series(dataDict).to_frame().T
            #print(dataFrame)
            detailed_anomaly_df = pd.concat([detailed_anomaly_df, dataFrame])
            #continue to the next loop
            continue

        #If there is more than one anomalous powergroup in that lumisection and it's NOT the whole quarter out
        all_powergroup_combos = itertools.combinations(powergroups, 2)
        #Loop over all pairs of powergroups and search for multi-disk anomalies
        dataDictList = [] #Create a list to store all of the possible Single Disk anomalies in case there are multiple anomalies but no Multi Disk anomaly
        for powergroup_combo in all_powergroup_combos:
            #print("ADFJNDKFN", powergroup_combo)
            anomaly_type = powerGroupsToAnomalyType(powergroup_combo[0], powergroup_combo[1])
            #Extract relevant information
            m_or_p_one, I_or_O_one, disk_number_one, part_number_one = analyzePowerGroupString(powergroup_combo[0])
            m_or_p_two, I_or_O_two, disk_number_two, part_number_two = analyzePowerGroupString(powergroup_combo[1])
            if anomaly_type == "Multi-Disk":
                #Fill in the data dict
                dataDict["Powergroup"] = powergroup_combo[0] + ':' + powergroup_combo[1]
                dataDict["Disk"] = str(disk_number_one) + ':' + str(disk_number_two)
                dataDict["Anomaly_Type"] = "Multi-Disk"
                #Convert the data dict to a pandas dataframe and concat it to the detailed data frame 
                #(NOTE: If there are many lumisections, it is technically more effificient to create a list of these dataDicts and concat those all at once)
                if verbose>1: print("DATA DICT:", dataDict)
                dataFrame = pd.Series(dataDict).to_frame().T
                #print(dataFrame)
                detailed_anomaly_df = pd.concat([detailed_anomaly_df, dataFrame])
                #break
            else:
                #Fill in a data dict for each anomaly
                dataDict["Powergroup"] = powergroup_combo[0]
                dataDict["Disk"] = str(disk_number_one)
                dataDict["Anomaly_Type"] = "Single-Disk"
                #Convert the data dict to a pandas dataframe and concat it to the detailed data frame 
                #(NOTE: If there are many lumisections, it is technically more effificient to create a list of these dataDicts and concat those all at once)
                if verbose>1: print("DATA DICT ONE:", dataDict)
                dataFrame = pd.Series(dataDict).to_frame().T
                if verbose>1: print(dataFrame)
                detailed_anomaly_df = pd.concat([detailed_anomaly_df, dataFrame])

                #Fill in second data dict
                dataDict["Powergroup"] = powergroup_combo[1]
                dataDict["Disk"] = str(disk_number_two)
                dataDict["Anomaly_Type"] = "Single-Disk"
                #Convert the data dict to a pandas dataframe and concat it to the detailed data frame 
                #(NOTE: If there are many lumisections, it is technically more effificient to create a list of these dataDicts and concat those all at once)
                if verbose>1: print("DATA DICT TWO:", dataDict)
                dataFrame = pd.Series(dataDict).to_frame().T
                #print(dataFrame)
                detailed_anomaly_df = pd.concat([detailed_anomaly_df, dataFrame])
            if verbose>0: print('----------------------------------------')
        if verbose>0: print('-----------------------------------------')

    #Remove all exact duplicate rows
    detailed_anomaly_df = detailed_anomaly_df.drop_duplicates()
    if verbose>0: print('\n\n')
    #print(detailed_anomaly_df)
    all_detailed_anomaly_df = pd.concat([all_detailed_anomaly_df, detailed_anomaly_df])
    #ensure the directory is created
    #detailed_anomaly_df.to_excel(f"{DIR_NAME}/Raw_PXRing_{RING}_{YEAR}{ERA}_Run_{run_number}.xlsx", index=False, engine='openpyxl')


CPU times: user 18.1 s, sys: 11 s, total: 29.1 s
Wall time: 1min 3s


## Here the first output (Raw excel)
Includes already Single-Multi Disk

396398	414	FPix_BpI_D1_ROG2	1	2	Single-Disk

396398	1036	FPix_BpI_D1_ROG1:FPix_BpI_D2_ROG1	1:2	2	Multi-Disk

In [32]:
sys.settrace(None)
import inspect

# Get all functions defined in my_lib
lib_funcs = {
    name for name, obj in inspect.getmembers(functions, inspect.isfunction)
    if inspect.getsourcefile(obj).endswith("functions.py")
}

print("✅ Used functions:", called)
print("🚫 Unused functions:", lib_funcs - called)

✅ Used functions: {'add_cross', 'remove_cross', 'powerGroupToIndex', 'powerGroupsToAnomalyType', '<dictcomp>', 'analyzePowerGroupString', '<listcomp>', 'powerGroupToDiskPanels', 'extract_data_whole_era', 'panelDiskToIndex'}
🚫 Unused functions: {'condense_powergroup_overlap', 'calcNumAnomalousLumisections', 'condense_lumisection_runs'}
