# Gather HCAL histos

In [1]:
import pandas as pd
import numpy as np

# import cmsdials
from cmsdials import Dials
from cmsdials.auth.bearer import Credentials
from cmsdials.filters import (
    FileIndexFilters,
    LumisectionHistogram1DFilters,
    LumisectionHistogram2DFilters,
    LumisectionFilters,
    RunFilters,
    MEFilters
)

In [2]:
creds = Credentials.from_creds_file()

dials = Dials(creds,workspace="hcal") # this will load into a DQM workspace by default (i.e. Tracker,HCAL,ECAL,etc.)

[2025-05-22 13:49:09,313] INFO: Access token and refresh token are expired, triggering device authentication flow...
[2025-05-22 13:49:10,122] INFO: This device will expire in 600 seconds.
[2025-05-22 13:49:10,123] INFO: Go to the following url and authenticate: https://auth.cern.ch/auth/realms/cern/device?user_code=RAHF-SYWQ
[2025-05-22 13:49:10,123] INFO: Checking authorization status every 5 seconds...
[2025-05-22 13:49:15,787] INFO: Device not authorized yet.
[2025-05-22 13:49:21,400] INFO: Device not authorized yet.
[2025-05-22 13:49:27,014] INFO: Device not authorized yet.
[2025-05-22 13:49:33,227] INFO: Device not authorized yet.
[2025-05-22 13:49:38,887] INFO: Device not authorized yet.
[2025-05-22 13:49:44,637] INFO: Device authorized, authentication finished successfully!


In [3]:
kwargs= dict(
    status="FINISHED",
    dataset__regex="ZeroBias/Run2024[A-Z]-PromptReco/*",
    dim = 2,
    page_size = 500,
            )

In [4]:
allMEs= dials.mes.list_all(MEFilters(**kwargs))
allMEs

[MonitoringElement(me_id=18, me='Hcal/DigiTask/Occupancy/depth/depth1', count=1310900, dim=2),
 MonitoringElement(me_id=19, me='Hcal/DigiTask/Occupancy/depth/depth2', count=1310900, dim=2),
 MonitoringElement(me_id=20, me='Hcal/DigiTask/Occupancy/depth/depth3', count=1310900, dim=2),
 MonitoringElement(me_id=21, me='Hcal/DigiTask/Occupancy/depth/depth4', count=1310900, dim=2),
 MonitoringElement(me_id=22, me='Hcal/DigiTask/Occupancy/depth/depth5', count=1310900, dim=2),
 MonitoringElement(me_id=23, me='Hcal/DigiTask/Occupancy/depth/depth6', count=1310900, dim=2),
 MonitoringElement(me_id=24, me='Hcal/DigiTask/Occupancy/depth/depth7', count=1310900, dim=2),
 MonitoringElement(me_id=25, me='Hcal/DigiTask/Occupancy/depth/depthHO', count=1310900, dim=2),
 MonitoringElement(me_id=102, me='Hcal/DigiTask/OccupancyCut/depth/depth1', count=1024239, dim=2),
 MonitoringElement(me_id=103, me='Hcal/DigiTask/OccupancyCut/depth/depth2', count=1024239, dim=2),
 MonitoringElement(me_id=104, me='Hcal/Di

In [22]:
RunsDF = dials.run.list_all(
                    RunFilters(**kwargs),
                    enable_progress = True
).to_pandas()

ls_mask = RunsDF.loc[:,"ls_count"] > 1000
long_runs = RunsDF.loc[ls_mask].reset_index()
del RunsDF

Progress:   0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
# selected_runs.loc[::7].info()
selected_runs = long_runs.loc[::7]

## Looking for the rootfiles that contain these runs

In [28]:
# selected_runs.drop(columns=["index"]).to_csv("selected_runs.csv",index=False)
selected_runs = pd.read_csv("selected_runs.csv")
selected_runs

Unnamed: 0,dataset_id,dataset,run_number,ls_count
0,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,379154,1075
1,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,380005,1101
2,14959583,/ZeroBias/Run2024D-PromptReco-v1/DQMIO,380470,1642
3,14959583,/ZeroBias/Run2024D-PromptReco-v1/DQMIO,380644,1010
4,14965177,/ZeroBias/Run2024E-PromptReco-v1/DQMIO,381190,1093
5,14972620,/ZeroBias/Run2024E-PromptReco-v2/DQMIO,381480,1287
6,14972620,/ZeroBias/Run2024E-PromptReco-v2/DQMIO,381793,1055
7,14986300,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,382343,2238
8,14986300,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,382769,1565
9,14986300,/ZeroBias/Run2024F-PromptReco-v1/DQMIO,383254,1255


In [29]:
testDF = dials.file_index.list_all(FileIndexFilters(**kwargs,dataset_id=14944573),
                                   enable_progress = True
                       ).to_pandas()
testDF

Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
0,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14005951477,309723126,2024-04-09 07:45:28+00:00,2024-04-09 07:45:28+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
1,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14005951517,331788679,2024-04-09 07:45:28+00:00,2024-04-09 07:45:28+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
2,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14005951557,342074540,2024-04-09 07:45:28+00:00,2024-04-09 07:45:28+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
3,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14005951597,431616039,2024-04-09 07:45:28+00:00,2024-04-09 07:45:28+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
4,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14005951637,424164092,2024-04-09 07:45:28+00:00,2024-04-09 07:45:28+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
...,...,...,...,...,...,...,...,...,...
151,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14184090277,42200426,2024-04-16 22:43:35+00:00,2024-04-16 22:43:35+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
152,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14184090317,46041386,2024-04-16 22:43:35+00:00,2024-04-16 22:43:35+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
153,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14184975157,110725631,2024-04-17 03:23:56+00:00,2024-04-17 03:23:56+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14184975197,64900890,2024-04-17 03:23:56+00:00,2024-04-17 03:23:56+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,


In [40]:
testDF.file_size.sum() / 1024e6

33.0394589140625

In [34]:
testDF.file_size.transform( lambda x: f"{x/1e6:.2f} MB")

0      309.72 MB
1      331.79 MB
2      342.07 MB
3      431.62 MB
4      424.16 MB
         ...    
151     42.20 MB
152     46.04 MB
153    110.73 MB
154     64.90 MB
155     31.39 MB
Name: file_size, Length: 156, dtype: object

In [33]:
print(f"{testDF.file_size[4]:,} bytes")
print(f"{testDF.file_size[4]/1e6:.2f} MB")

424,164,092 bytes
424.16 MB


# Now downloading all the data and making the files

In [8]:
import os

finished = [file for file in os.listdir() if "run" in file]
for run in selected_runs.loc[::7].run_number:
    for fin in finished:
        if f"{run:_}" in fin:
            print(f"skipping {run:_}")
        
    
# os.path.exists("run-378_239.parquet")

skipping 383_254


In [1]:
finished

NameError: name 'finished' is not defined

In [9]:
!mkdir -p Ls_ge_1k

In [10]:
base_path = os.path.join(os.getcwd(),"Ls_ge_1k")
print(base_path)

/home/gfidalgo/Documents/Github/DIALS-explore/Ls_ge_1k


In [None]:
for run in selected_runs.loc[::7].run_number:
    skip=False
    
    for fin in finished:
        if f"{run:_}" in fin:
            print(f"skipping {run:_}")
            skip=True
    
    if not skip:
        print(f"Starting download of {run:_}")
        irun_df = dials.h2d.list_all(
                                    LumisectionHistogram2DFilters(run_number=run,page_size=500),
                                     enable_progress=True
        ).to_pandas()
        
        irun_df.to_parquet(f"{base_path}/run-{run:_}.parquet")
        print(f"Done with {run = :,}.") 
        del irun_df

Starting download of 379_154


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7efff6046510>>
Traceback (most recent call last):
  File "/home/gfidalgo/miniforge3/envs/DIALS/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


In [20]:
print(f"{base_path} has been populated with {len(selected_runs.loc[::7].run_number)} runs.")

/home/gfidalgo/Documents/Github/DIALS-explore/Ls_ge_1k has been populated with 20 runs.
