# Preparing Data 4 the AE

### Import and function definition

In [None]:
import sys, os
sys.path.append(os.path.abspath('./oms-api-client'))

In [None]:
import pandas as pd
import numpy as np
import timeit, math, copy, yaml, ast
from omsapi import OMSAPI
import matplotlib.pyplot as plt
from scipy.stats import linregress
from scipy.optimize import curve_fit

from utilities import *
#%load_ext memory_profiler

In [None]:
try:
    import runregistry
except:
    !pip install runregistry
    #IMPORTANT: change this path based on the output of
    #!pip show runregistry
    sys.path.append('/eos/home-i04/f/fsimone/.local/lib/python3.9/site-packages')
    import runregistry

### Read data and add lumi info

In [None]:
out_label = "240724"
me = "hRHGlobalm3"
path = "./"
dirs = os.listdir(path)
me_dirs = [path+i for i in dirs if me in i and os.path.isdir(path+i)]
print(me_dirs)

In [None]:
files_all = []
for dir in me_dirs:
    files = os.listdir(dir)
    me_files = [dir+"/"+i for i in files if me in i]
    files_all = files_all + me_files
filtered_files = [file for file in files_all if os.path.exists(file) and os.path.getsize(file) >= 601]

In [None]:
%%time
#monitoring_elements =pl.read_parquet(filtered_files).filter((pl.col('dataset').str.contains("StreamExpress"))).to_pandas()
monitoring_elements = pd.read_parquet(filtered_files)
monitoring_elements = monitoring_elements[monitoring_elements['dataset'].str.contains("StreamExpress")]

In [None]:
monitoring_elements

In [None]:
dataset = monitoring_elements["dataset"].unique()
print(dataset)

In [None]:
run_list = np.sort(np.unique(monitoring_elements["run_number"].unique()))
print(run_list)

In [None]:
run = runregistry.get_run(run_number=381298)
run["class"]

In [None]:
#if you get errors here, go through the readme and look for the instructions on how to get runregistry credentials
runreg_df = pd.DataFrame(columns=["run_number", "cscGOOD", "cscSTANDBY", "cscBAD", "cscEMPTY"])
bad_runs = []
for r in run_list:
    run = runregistry.get_run(run_number=int(r))
    try:
        dict = {"run_number": int(r), "class": run["class"], "cscGOOD": 0, "cscSTANDBY":0, "cscBAD":0, "cscEMPTY":0}
        if 'csc-csc' in run["lumisections"]:
            #print("Run :", r)
            data_dict = run["lumisections"]["csc-csc"]
            for key in data_dict.keys():
                if key == "GOOD":
                    dict["cscGOOD"] = data_dict["GOOD"]
                if key == "STANDBY":
                    dict["cscSTANDBY"] = data_dict["STANDBY"]
                if key == "BAD":
                    dict["cscBAD"] = data_dict["BAD"]
                if key == "EMPTY":
                    dict["cscEMPTY"] = data_dict["EMPTY"]
            del data_dict
        runreg_df = pd.concat([runreg_df, pd.DataFrame([dict])], ignore_index=True)
    except:
        dict = {"run_number": int(r), "class": "BAD", "cscGOOD": 0, "cscSTANDBY":0, "cscBAD":0, "cscEMPTY":0}
    if ("Collisions" not in dict["class"]) or dict["cscSTANDBY"]!=0 or dict["cscBAD"]!=0 or dict["cscGOOD"]==0:
        print("Run :", r, "--> BAD")
        bad_runs.append(int(r))
    else:
        print("Run :", r, "--> GOOD")
    print(dict)
    del dict
    del run

In [None]:
print(bad_runs)

In [None]:
monitoring_elements = monitoring_elements[~monitoring_elements['run_number'].isin(bad_runs)]

In [None]:
run_list = np.sort(np.unique(monitoring_elements["run_number"].unique()))
print(run_list)

In [None]:
with open("config.yaml", 'r') as f:
    try:
        info = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(f"Cannot read the file: {exc}")

In [None]:
omsapi = OMSAPI("https://cmsoms.cern.ch/agg/api", "v1", cert_verify=False)
omsapi.auth_oidc(info["APIClient"]["client_ID"], info["APIClient"]["Client_Secret"])

In [None]:
lumi_dfs = []
for r in run_list:
    print("Run :", r)
    df = []
    ls_query = omsapi.query("lumisections")
    ls_query.filter("run_number", r)
    ls_query.sort("lumisection_number", asc=False).paginate(page=1, per_page=100000)
    response = ls_query.data().json()["data"];
    for i in range(len(response)):
        df.append(response[i]["attributes"])
    lumi_dfs.append(pd.DataFrame(df)) # Downcasting object dtype arrays on .fillna is deprecated
    del df
    del response
    del ls_query

In [None]:
for i in range(len(lumi_dfs)):
    lumi_dfs[i]['castor_ready'] = lumi_dfs[i]['castor_ready'].fillna(False)
    lumi_dfs[i]['gem_ready'] = lumi_dfs[i]['gem_ready'].fillna(False)
    lumi_dfs[i]['zdc_ready'] = lumi_dfs[i]['zdc_ready'].fillna(False)
    lumi_dfs[i]['prescale_index'] = lumi_dfs[i]['prescale_index'].fillna(-1)
    lumi_dfs[i]['prescale_name'] = lumi_dfs[i]['prescale_name'].fillna("")

In [None]:
lumi_info = pd.concat(lumi_dfs)

In [None]:
lumi_info = lumi_info.rename(columns={'lumisection_number': 'ls_number'})
lumi_info["mean_lumi"]=(lumi_info["init_lumi"]+lumi_info["end_lumi"])/2

In [None]:
print(lumi_info.columns)

In [None]:
monitoring_elements = pd.merge(monitoring_elements, lumi_info, on=['run_number', 'ls_number'], how='left') 

In [None]:
#%memit

In [None]:
monitoring_elements = monitoring_elements[(monitoring_elements["beams_stable"]==True) & (monitoring_elements["cscm_ready"]==True) & (monitoring_elements["cms_active"]==True) & (monitoring_elements["beam_present"]==True) & (monitoring_elements["physics_flag"]==True)]


In [None]:
monitoring_elements = monitoring_elements.sort_values(by=['run_number', 'ls_number']).reset_index()
monitoring_elements = monitoring_elements.drop(columns=["index"])

In [None]:
monitoring_elements

In [None]:
monitoring_elements.to_parquet(me+'_'+out_label+'_s0.parquet', index=False)