# Reference Run Ranking Performance Testing & Development Notebook

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, sys

dqmepath = "path/to/DQMExplore/project/root/directory"
sys.path.append(os.path.join(dqmepath, "src/"))
import dqmexplore as dqme

rrrpath = "path/to/rrr/project/root/directory"
sys.path.append(os.path.join(rrrpath, "src/"))
import refrunrank as rrr

dials = dqme.utils.setupdials.setup_dials_object_deviceauth()

## CertHelper

In [None]:
json_dir = os.path.join(rrrpath, "jsons")
ch_refrunjson = "ch_refrunjson.json"
rr_goldenjson = "Express-Collisions-2022_pixel-strip-track-good.json"

chdata = dqme.certhelper.CHRunData(
    os.path.join(json_dir, ch_refrunjson),
    os.path.join(json_dir, rr_goldenjson)
)

chdata.getGoodRuns()

In [None]:
filters = {
    "dataset": "/Express/Collisions2022/*"
    # "run_number":[(352564, 355103), 365889],
    # "reference_run_number": [352425],
    # "run_reconstruction_type": "express",
    # "reference_run_reconstruction_type": "express",
}

chdata.applyFilter(filters=filters)

## OMS

In [None]:
runnbs = list(chdata.applyFilter(filters=filters)["run_number"].unique())
omsdata = dqme.omsdata.OMSData(dials)

# Set the runs (i.e. filters to specify runs) that we will get data for 
omsdata.addRuns(runnbs)
omsdata.fetchData("runs")
omsdata.fetchData("lumisections")

In [None]:
omsdata.getData("runs").head(4)

In [None]:
omsdata.getData("lumisections").head(4)

## Ranking

| Run Features                    | LS Features                              | Stats  |
|---------------------------------|------------------------------------------|--------|
| `b_field`                      | `delivered_lumi_per_lumisection`         | `mean` |
| `l1_triggers_counter`          | `recorded_lumi_per_lumisection`          | `std`  |
| `hlt_physics_throughput`       | `init_lumi`                              | `min`  |
| `init_lumi`                    | `recorded_lumi`                          | `25%`  |
| `crossing_angle_stop`          | `end_lumi`                               | `50%`  |
| `energy`                       | `pileup`                                 | `75%`  |
| `end_lumi`                     | `delivered_lumi`                         | `max`  |
| `hlt_physics_rate`             | `prescale_index`                         |        |
| `fill_number`                  |                                          |        |
| `crossing_angle_start`         |                                          |        |
| `beta_star_start`              |                                          |        |
| `initial_prescale_index`       |                                          |        |
| `hlt_physics_size`             |                                          |        |
| `l1_rate`                      |                                          |        |
| `run_number`                   |                                          |        |
| `beta_star_stop`               |                                          |        |
| `hlt_physics_counter`          |                                          |        |

<!-- Run features:
* `b_field`
* `l1_triggers_counter`
* `hlt_physics_throughput`
* `init_lumi`
* `crossing_angle_stop`
* `energy`
* `end_lumi`
* `hlt_physics_rate`
* `fill_number`
* `crossing_angle_start`
* `beta_star_start`
* `initial_prescale_index`
* `hlt_physics_size`
* `l1_rate`
* `run_number`
* `beta_star_stop`
* `hlt_physics_counter`

LS features:
* `delivered_lumi_per_lumisection`
* `recorded_lumi_per_lumisection`
* `init_lumi`
* `recorded_lumi`
* `end_lumi`
* `pileup`
* `delivered_lumi`
* `prescale_index`

Stats:
* `mean`
* `std`
* `min`
* `25%`
* `50%`
* `75%`
* `max` -->

In [None]:
ranker = rrr.ranking.RunRanker(omsdata)

ftrs_dict = {
    "runs": [
        "b_field",
        "energy",
        "hlt_physics_rate",
        "fill_number",
        # "run_number",
        # "init_lumi",
        # "end_lumi",
        # "crossing_angle_start",
        # "crossing_angle_stop",
        # "beta_star_start", # Maybe
        # "initial_prescale_index", # Maybe
        # "hlt_physics_throughput", # Not relevant
        # "hlt_physics_size", # Mean event size, not relevant
        # "l1_rate", # NO, use HLT since that's what ZB uses
        # "hlt_physics_counter" # NO, num of events
        # "l1_triggers_counter", # NO, num of events
    ],

    "lumisections": {
        "50%": ["pileup", "recorded_lumi"] 
        # "mean":["delivered_lumi_per_lumisection", "recorded_lumi_per_lumisection", "init_lumi", "recorded_lumi", "end_lumi", "pileup", "delivered_lumi", "prescale_index"],
        # "std": ["delivered_lumi_per_lumisection", "recorded_lumi_per_lumisection", "init_lumi", "recorded_lumi", "end_lumi", "pileup", "delivered_lumi", "prescale_index"],
        # "min": ["delivered_lumi_per_lumisection", "recorded_lumi_per_lumisection", "init_lumi", "recorded_lumi", "end_lumi", "pileup", "delivered_lumi", "prescale_index"],
        # "max": ["delivered_lumi_per_lumisection", "recorded_lumi_per_lumisection", "init_lumi", "recorded_lumi", "end_lumi", "pileup", "delivered_lumi", "prescale_index"],
        # "50%": ["delivered_lumi_per_lumisection", "recorded_lumi_per_lumisection", "init_lumi", "recorded_lumi", "end_lumi", "pileup", "delivered_lumi", "prescale_index"],
    }
}

ranker.setFeatures(ftrs_dict)
ranker.constructFeatures()
ranker.ftrsDF

In [None]:
numlss_mask = ranker.omsdata.getData("lumisections").groupby("runnb").size() > 1000
runnbs = np.array(ranker.omsdata.getRuns())[numlss_mask]
print(len(runnbs))

In [None]:
rslts, wghts = ranker.refrank_pca(target, n_components=2)
print(wghts)
rslts

In [None]:
target = 362728
clustering = False # Hierarchical clustering for auto feature selection
n_components = 2 # Number of PCA components
dist = "manh" # Manhattan distance

if clustering:
    rslts, wghts = rslts_h, wghts_h = ranker.refrank_pca_hierarch(target, n_components=n_components, runnbs=runnbs, dist_metric=dist)
else:
    rslts, wghts = ranker.refrank_pca(target, n_components=n_components, runnbs=runnbs, dist_metric=dist)

In [None]:
rslts

In [None]:
keys = list(wghts.keys())
values = list(wghts.values())
sorted_items = sorted(zip(values, keys))  # Sort by values
values, keys = zip(*sorted_items)  # Unzip into sorted values and keys

# Plotting
fig, ax = plt.subplots(figsize=(6, 4), dpi=150)
ax.bar(keys, values)
ax.set_xlabel("Feature")
ax.set_ylabel("Weight")
ax.set_title("PCA Feature Weights")
ax.set_xticklabels(keys, rotation=45)
plt.show()

wghts_df = pd.DataFrame(list(wghts.items()), columns=["Feature", "Weight"])
wghts_df = wghts_df.sort_values(by="Weight", ascending=False).reset_index(drop=True)
wghts_df