# Setup

In [None]:
# External modules
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import importlib
import numpy as np
import os, sys
import json

pd.options.mode.chained_assignment = None

In [None]:
with open("../clientid.json", "r") as file:
    secrets = json.load(file)

os.environ["API_CLIENT_ID"] = secrets["API_CLIENT_ID"]
os.environ["API_CLIENT_SECRET"] = secrets["API_CLIENT_SECRET"]

In [None]:
import refrunrank as rrr
import oms

In [None]:
importlib.reload(rrr)
importlib.reload(oms)

CHData = rrr.CHRunData("../jsons/AllRunsRefRuns_Mar5_2024.json")
oms_fetch = oms.oms_fetch()

## Checking setup

In [None]:
# Using the API, you can check all of the available features
print("All available run level features:\n")
oms_fetch.get_oms_data(308997, "runs")

In [None]:
oms_fetch.get_oms_data([313118, 355408], "runs")

In [None]:
oms_fetch.get_oms_data((313118, 320000), "runs")

In [None]:
filters = {
    # "run_number": (313118, 377251),
    # "reference_run_number": 312727,
    # "run_reconstruction_type": "express",
    # "reference_run_reconstruction_type": "express",
    "dataset": "/Express/Collisions202?/*"
}

CHData.applyfilter(filters=filters)

# User Input

In [None]:
# User input: Features that will be used 
ftrs_lst = [
    'b_field',
    'init_lumi',
    'energy',
    'end_lumi',
    'hlt_physics_rate',
    'l1_rate',
    'run_number',
]

# Target run
target = 370772

# If testing or not
testing = True

# Getting run numbers to query
CHfilters = {
    "run_number": (target-500, target),
    # "reference_run_number": 312727,
    "run_reconstruction_type": "prompt",
    # "reference_run_reconstruction_type": "express",
    "dataset": "*/Collisions202?/*"
}

CHData.applyfilter(filters=CHfilters)
candsrunnb = list(CHData.filteredDF["run_number"].unique())

# if target in candsrunnb:
    # candsrunnb.remove(target)

# OMS Filter
min_lss = 500

# Filters that will be applied in query to OMS
oms_filter = [
    {"attribute_name": "last_lumisection_number", "value": min_lss, "operator": "GE"},
]

# Fetching Data

In [None]:
# Getting data from oms
oms_fetch.get_oms_data(candsrunnb, "runs", extrafilters=oms_filter)

In [None]:
# Creating RunRanker instance

importlib.reload(rrr)
import refrunrank as rrr

ranker = rrr.RunRanker(oms_fetch.last_run_query, ftrs_lst)

In [None]:
# Taking a look at the ranking feature selection
ranker.rankftrs

In [None]:
# Taking a look at the stored dataframe
ranker.rundf

# Ranking

In [None]:
ranker.refrank(target)

---

In [None]:
# Finding the rank of the actual RR used
target_data = targets_df.loc[rankings.index.values[0][1]]
RR_used = target_data["RR_used"]
rankings.loc[(slice(None), RR_used), :] # Some really weird slicing syntax

In [None]:
importlib.reload(rrr)
importlib.reload(omsu)
importlib.reload(tests)

In [None]:
targets_ftrs

In [None]:
candidates_ftrs

In [None]:
comp_number = 60
n_components = 3

RRranks = tests.test_rank(
    targets_ftrs, 
    candidates_ftrs, 
    comp_number=comp_number, 
    n_components=n_components, 
    print_stats=False, 
    plot=True, 
    dpi=100, 
    return_RRranks=True
)

In [None]:
RRranks

In [None]:
RRranks.sort_values(by=["runs"], inplace=True)
RRranks["runRR_delta"] = RRranks["runs"] - RRranks["RRs"]
RRranks

In [None]:
RRranks["RRs"].unique()

In [None]:
# Making histograms of difference in run number and actual RRs used
fig, axs = plt.subplots(2)
axs[0].hist(RRranks["runRR_delta"], bins=50)
axs[1].hist(RRranks["RRs"], bins=50)


In [None]:
# Looking at those RR which scored badly
RRranks_GE60 = RRranks[RRranks["ranks"] >= 60]
RRranks_GE60

In [None]:
RRranks_max = RRranks[RRranks["ranks"] == RRranks["ranks"].max()]
RRranks_max

In [None]:
pd.DataFrame(targets_wRR.loc[RRranks_max.iloc[0]["runs"]]).T

In [None]:
pd.DataFrame(candidates_df.loc[RRranks_max.iloc[0]["RRs"]]).T

In [None]:
# Testing on a wide range of possible value combinations
comp_number_range = np.arange(20, 130, 20)
n_components_range = np.arange(1, len(ftrs_lst) - 4)

In [None]:
for comparisons, components in product(comp_number_range, n_components_range):
    RRranks = test_rank(
        targets_wRR, 
        candidates_df, 
        comp_number=comparisons, 
        n_components=components, 
        print_stats=True, 
        plot=True, 
        dpi=100, 
        return_RRranks=True
    )