# Gather HCAL histos

In [1]:
import pandas as pd
import numpy as np

import cmsdials
from cmsdials import Dials
from cmsdials.auth.bearer import Credentials
from cmsdials.filters import (
    FileIndexFilters,
    LumisectionHistogram1DFilters,
    LumisectionHistogram2DFilters,
    LumisectionFilters,
    RunFilters,
    MEFilters
)

In [2]:
creds = Credentials.from_creds_file()

dials = Dials(creds,workspace="hcal") # this will load into a DQM workspace by default (i.e. Tracker,HCAL,ECAL,etc.)

In [3]:
kwargs= dict(
    status="FINISHED",
    dataset__regex="ZeroBias/Run2024[A-Z]-PromptReco/*",
    dim = 2,
    page_size = 500
            )

In [4]:
allMEs= dials.mes.list_all(MEFilters(**kwargs))
allMEs

[MonitoringElement(me_id=18, me='Hcal/DigiTask/Occupancy/depth/depth1', count=1266468, dim=2),
 MonitoringElement(me_id=19, me='Hcal/DigiTask/Occupancy/depth/depth2', count=1266468, dim=2),
 MonitoringElement(me_id=20, me='Hcal/DigiTask/Occupancy/depth/depth3', count=1266468, dim=2),
 MonitoringElement(me_id=21, me='Hcal/DigiTask/Occupancy/depth/depth4', count=1266468, dim=2),
 MonitoringElement(me_id=22, me='Hcal/DigiTask/Occupancy/depth/depth5', count=1266468, dim=2),
 MonitoringElement(me_id=23, me='Hcal/DigiTask/Occupancy/depth/depth6', count=1266468, dim=2),
 MonitoringElement(me_id=24, me='Hcal/DigiTask/Occupancy/depth/depth7', count=1266468, dim=2),
 MonitoringElement(me_id=25, me='Hcal/DigiTask/Occupancy/depth/depthHO', count=1266468, dim=2),
 MonitoringElement(me_id=102, me='Hcal/DigiTask/OccupancyCut/depth/depth1', count=979807, dim=2),
 MonitoringElement(me_id=103, me='Hcal/DigiTask/OccupancyCut/depth/depth2', count=979807, dim=2),
 MonitoringElement(me_id=104, me='Hcal/Digi

In [6]:
RunsDF = dials.run.list_all(
                    RunFilters(**kwargs),
                    enable_progress = True
).to_pandas()

ls_mask = RunsDF.loc[:,"ls_count"] > 1000
selected_runs = RunsDF.loc[ls_mask].reset_index()
del RunsDF

In [16]:
selected_runs.loc[::7].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 133
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   index       20 non-null     int64 
 1   dataset_id  20 non-null     int64 
 2   dataset     20 non-null     object
 3   run_number  20 non-null     int64 
 4   ls_count    20 non-null     int64 
dtypes: int64(4), object(1)
memory usage: 932.0+ bytes


In [7]:
testDF = dials.h2d.list(LumisectionHistogram2DFilters(run_number=383687)
                       ).to_pandas()
testDF

Unnamed: 0,dataset,me,dataset_id,file_id,run_number,ls_number,me_id,x_min,x_max,x_bin,y_min,y_max,y_bin,entries,data
0,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth1,14986300,17108482237,383687,1,18,0.0,84.0,84.0,0.5,72.5,72.0,2806166,"[[0.0, 0.0, 2380.0, 2380.0, 2380.0, 2380.0, 23..."
1,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth2,14986300,17108482237,383687,1,19,0.0,84.0,84.0,0.5,72.5,72.0,2733498,"[[0.0, 0.0, 2380.0, 2380.0, 2380.0, 2380.0, 23..."
2,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth3,14986300,17108482237,383687,1,20,0.0,84.0,84.0,0.5,72.5,72.0,3025909,"[[0.0, 0.0, 2380.0, 2380.0, 2380.0, 2380.0, 23..."
3,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth4,14986300,17108482237,383687,1,21,0.0,84.0,84.0,0.5,72.5,72.0,3073077,"[[0.0, 0.0, 2380.0, 2380.0, 2380.0, 2380.0, 23..."
4,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth5,14986300,17108482237,383687,1,22,0.0,84.0,84.0,0.5,72.5,72.0,157524,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
5,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth6,14986300,17108482237,383687,1,23,0.0,84.0,84.0,0.5,72.5,72.0,151055,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
6,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depth7,14986300,17108482237,383687,1,24,0.0,84.0,84.0,0.5,72.5,72.0,5475,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
7,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/Occupancy/depth/depthHO,14986300,17108482237,383687,1,25,0.0,84.0,84.0,0.5,72.5,72.0,816255,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth1,14986300,17108482237,383687,1,102,0.0,84.0,84.0,0.5,72.5,72.0,753288,"[[0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 2.0, 12.0, 4.0..."
9,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth2,14986300,17108482237,383687,1,103,0.0,84.0,84.0,0.5,72.5,72.0,680696,"[[0.0, 0.0, 4.0, 3.0, 4.0, 5.0, 2.0, 2.0, 5.0,..."


# Now downloading all the data and making the files

In [14]:
selected_runs

Unnamed: 0,index,dataset_id,dataset,run_number,ls_count
0,89,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,379154,1075
1,135,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379456,1208
2,158,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379660,1563
3,160,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379729,1534
4,161,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379765,1728
...,...,...,...,...,...
132,3426,15099576,/ZeroBias/Run2024I-PromptReco-v2/DQMIO,386924,1818
133,3596,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387574,1084
134,3615,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387607,2509
135,3623,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387640,2596


In [10]:
import os

finished = [file for file in os.listdir() if "run" in file]
for run in selected_runs.run_number:
    for fin in finished:
        if f"{run:_}" in fin:
            print(f"skipping {run:_}")
        
    
# os.path.exists("run-378_239.parquet")

skipping 378_239
skipping 379_530
skipping 380_092
skipping 380_481
skipping 380_744
skipping 381_208


In [12]:
for run in selected_runs.run_number:
    skip=False
    
    for fin in finished:
        if f"{run:_}" in fin:
            print(f"skipping {run:_}")
            skip=True
    
    if not skip:
        print(f"Starting download of {run:_}")
        irun_df = dials.h2d.list_all(
                                    LumisectionHistogram2DFilters(run_number=run,page_size=500),
                                     enable_progress=True
        ).to_pandas()
        
        irun_df.to_parquet(f"run-{run:_}.parquet")
        print(f"Done with {run = :,}.") 
        del irun_df

skipping 378_239
skipping 379_530
skipping 380_092
skipping 380_481
skipping 380_744
skipping 381_208


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 381,698.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 382,465.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 382,770.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 383,254.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 383,687.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 384,052.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 384,331.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 384,963.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 385,286.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 385,712.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 386,143.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 386,673.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 386,951.


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Done with run = 387,721.
