In [None]:
import swagger_client
from swagger_client.rest import ApiException
from pprint import pprint
import requests

# Configure and create an API client
# using an API token
API_TOKEN = "000b74d58339656029f052a32d6f9e11fae8321f"
MLP_URL = "https://ml4dqm-playground.web.cern.ch"

DEVLOP_API_TOKEN= "871450223ff7d809acdf6ffe8d11ef4a18a724c6"
MLP_DEVELOP_URL = "https://ml4dqm-playground-develop.web.cern.ch"

configuration = swagger_client.Configuration()
configuration.host = MLP_DEVELOP_URL
client = swagger_client.ApiClient(configuration)
client.set_default_header(header_name="Content-Type", header_value="application/json")
client.set_default_header(header_name="Authorization", header_value=f"Token {DEVLOP_API_TOKEN}")

api_instance = swagger_client.ApiApi(client)

In [None]:
import matplotlib.pyplot as plt
import awkward as ak
import numpy as np
import pandas as pd

In [None]:
dir(api_instance)

In [None]:
# Get by run number
run_num = 315267
lh1d = api_instance.list_lumisection_histogram1_ds(
    lumisection__run__run_number=run_num
) 
print(f"Got {len(lh1d.results)} results!\n")
pprint(lh1d)



In [None]:
lh1d_df = pd.DataFrame.from_dict([result.to_dict() for result in lh1d.results])

lh1d_df.head()

In [None]:
ar = ak.Array([result.to_dict() for result in lh1d.results])

In [None]:
ar.fields

In [None]:
ar

In [None]:
for i in ar.data:
    print(i)

In [None]:
ar.title.tolistst()

In [None]:
logbins=np.logspace(0,7)

plt.figure(dpi=100)
plt.hist(ak.ravel(ar.data),
        bins=logbins,log=False,
        histtype='step');
plt.xscale('log')

In [None]:
plt.figure(dpi=130)
for i,nbins,minval,maxval,title in zip(ar.data,ar.x_bin,ar.x_min,ar.x_max,ar.title):
    if 'tib' in title.lower() :
        plt.plot(np.linspace(minval,maxval,nbins),i,lw=1,label=title) 
plt.legend(fontsize=7)
# plt.yscale('log')
plt.show()

# Managing DQM Histogram Data Files

DQM data files are `.csv` files (`.root` support for nanoDQM files will be added in the future) which contain:
- Run Histograms or
- Lumisection 1D Histograms or
- Lumisection 2D Histograms

They are represented in DQM Playground's database with the `HistogramDataFile` model. 

By default, these files are expected to reside in `/eos/project/c/cmsml4dc`. This directory is not being monitored for changes, and new files can only be discovered using a [custom Django Management command](https://cmstrackerdpg.github.io/cms-tkdpg-software-knowledge-transfer/mlplayground/apps/histogram_file_manager/management/). 

The contents of a Histogram Data Files are unknown, until a user starts parsing the file by specifying three options:
- The `granularity` of the data (`run` or `lumisection`),
- The `data_dimensionality` of the data (`1` for 1D or `2` for 2D) and
- The file `file_format` (`csv` only available currently).

Depending on the combination of the three options above, DQM Playground will try to extract the appropriate histograms from the Histogram Data File, namely:

- `granularity=run`, `data_dimensionality=1` and `file_format=csv` will produce `RunHistogram`s,
- `granularity=lum`, `data_dimensionality=1` and `file_format=csv` will produce `LumisectionHistogram1D`s,
- `granularity=lum`, `data_dimensionality=2` and `file_format=csv` will produce `LumisectionHistogram2D`s.

## Listing all the available Histogram Data Files

To list all the Histogram Data Files that DQM Playground has discovered:

In [None]:
dir(api_instance)

In [None]:
available_hdf = api_instance.list_histogram_data_files(page=1) # No filters supplied
print(f"Got {len(available_hdf.results)} Histogram Data Files")  # Keep in mind that results are returned in pages of 50

## Filtering histogram data files by filename
For this example, we will try to get the status of the `ZeroBias_2017B_DataFrame_2D_17.csv` DQM file.

In [None]:
available_hdf = api_instance.list_histogram_data_files(filepath__contains="ZeroBias_2017B_DataFrame_2D_17")
print(available_hdf.results)

If parsing has already been started for this specific Histogram Data File, we will see the `data_dimensionality` and `granularity` values already set. If not, they will be unknown. 

Once parsing of the file has started, the total number of `run_histograms`, `lumisection_histograms_1d` or `lumisection_histograms_2d` that were extracted from this file will be available under the respective values in the response. 

Its parsing percentage can also be seen under the `percentage_processed` value.

## Starting parsing a Histogram Data File
To start parsing a Histogram Data file, we need:
- Its unique `id`,
- The `granularity` of the data contained (`run` or `lumisection`),
- The `data_dimensionality` of the data (`1` for 1D or `2` for 2D) and
- The file `file_format` (`csv`).

In [None]:
MLP_DEVELOP_URL

In [None]:
available_hdf = api_instance.list_histogram_data_files(filepath__contains="ZeroBias_2017B_DataFrame_2D_18")
file_id = available_hdf.results[0].id

# TODO: Buggy generated code by swagger creates multiple definitions for start_parsing_histogram_data_file
# body = {'granularity':'lum', 'data_dimensionality':2, 'file_format':'csv'}
# success = api_instance.start_parsing_histogram_data_file(id=file_id, body=body)

# Manually do the request for now
r = requests.post(
    f"{MLP_DEVELOP_URL}/api/histogram_data_files/{file_id}/start_parsing/",
    headers={"Content-Type": "application/json", "Authorization": f"Token {DEVLOP_API_TOKEN}"},
    json={'granularity':'lum', 'data_dimensionality':2, 'file_format':'csv'}
)

In [None]:
dir(r)

In [None]:
r

In [None]:
print(f"Parsing for file {file_id} started: {r.ok}")

In [None]:
available_hdf

In [None]:
for i in range(129,140):
    temp = api_instance.list_histogram_data_files(page=i)
    print(i)
    if temp.next == None: print('No more pages'); break

There are 137 pages with $\approx$ 50 files each

In [None]:
histo_files = api_instance.list_histogram_data_files(granularity='lum')

In [None]:
histo_files

In [None]:
pd.DataFrame.from_dict([r.to_dict() for r in histo_files.results]).filepath.tolist()

## Discovering new DQMIO files [Admins only]
If new files are added in the directories that DQMIO files reside, they won't be automatically discovered by DQM Playground without forcing a new discovery process. To do so:

In [None]:
# Manually do the request for now
r = requests.get(
    f"{MLP_DEVELOP_URL}/api/histogram_data_files/discover/",
    headers={"Content-Type": "application/json", "Authorization": f"Token {DEVLOP_API_TOKEN}"},
)

In [None]:
print(f"Discovery started: {r.ok}")

In [None]:
r.content