# Finding and Downloading Root files for specific runs

In [5]:
import pandas as pd
import numpy as np

# import cmsdials
from cmsdials import Dials
from cmsdials.auth.bearer import Credentials
from cmsdials.filters import (
    FileIndexFilters,
    LumisectionHistogram1DFilters,
    LumisectionHistogram2DFilters,
    LumisectionFilters,
    RunFilters,
    MEFilters
)

In [6]:
creds = Credentials.from_creds_file()

dials = Dials(creds,workspace="hcal") # this will load into a DQM workspace by default (i.e. Tracker,HCAL,ECAL,etc.)

[2025-06-04 22:44:45,663] INFO: Credentials file not found, triggering device authentication flow...
[2025-06-04 22:44:45,734] INFO: This device will expire in 600 seconds.
[2025-06-04 22:44:45,735] INFO: Go to the following url and authenticate: https://auth.cern.ch/auth/realms/cern/device?user_code=RBYZ-HIUA
[2025-06-04 22:44:45,736] INFO: Checking authorization status every 5 seconds...
[2025-06-04 22:44:52,864] INFO: Device not authorized yet.
[2025-06-04 22:44:58,003] INFO: Device authorized, authentication finished successfully!


In [7]:
kwargs= dict(
    status="FINISHED",
    dataset__regex="ZeroBias/Run2024[A-Z]-PromptReco/*",
    dim = 2,
    page_size = 500
            )

In [8]:
runs = dials.run.list_all(RunFilters(**kwargs),enable_progress=True).to_pandas()
lscount_mask = runs.ls_count > 1000
runs[lscount_mask]

Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,dataset_id,dataset,run_number,ls_count
89,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,379154,1075
135,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379456,1208
158,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379660,1563
160,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379729,1534
161,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379765,1728
...,...,...,...,...
3426,15099576,/ZeroBias/Run2024I-PromptReco-v2/DQMIO,386924,1818
3596,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387574,1084
3615,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387607,2509
3623,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387640,2596


In [10]:
runs.to_csv("../files/TEMPall2024runs.csv",index=False)

In [9]:
runs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3665 entries, 0 to 3664
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   dataset_id  3665 non-null   int64 
 1   dataset     3665 non-null   object
 2   run_number  3665 non-null   int64 
 3   ls_count    3665 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 114.7+ KB


In [17]:
# range of run numbers in 2024
runs.run_number.iloc[0],runs.run_number.iloc[-1] 

(378144, 387721)

In [18]:
runs[lscount_mask].nunique()

dataset_id     11
dataset        11
run_number    137
ls_count      132
dtype: int64

In [19]:
run_numbers = runs[lscount_mask].run_number

In [23]:
run_numbers.iloc[0],run_numbers.iloc[-1]

(379154, 387696)

In [11]:
files = dials.file_index.list_all(FileIndexFilters(**kwargs),enable_progress=True).to_pandas()

files

Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
0,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13770979997,25105071,2024-03-21 10:52:28+00:00,2024-03-21 10:52:28+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
1,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13822664277,136518352,2024-03-22 05:20:04+00:00,2024-03-22 05:20:04+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
2,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13854945877,298774004,2024-03-23 21:05:31+00:00,2024-03-23 21:05:31+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
3,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13854945917,142308376,2024-03-23 21:05:31+00:00,2024-03-23 21:05:31+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
4,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13856188037,310099159,2024-03-24 17:01:02+00:00,2024-03-24 17:01:02+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
...,...,...,...,...,...,...,...,...,...
6238,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18048826877,44108849,2024-11-06 04:43:09+00:00,2024-11-06 04:43:09+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,
6239,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18055027077,21461718,2024-11-06 12:27:39+00:00,2024-11-06 12:27:39+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,
6240,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18055027117,66443397,2024-11-06 12:27:39+00:00,2024-11-06 12:27:39+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,
6241,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18055027157,1261200398,2024-11-06 12:27:39+00:00,2024-11-06 12:27:39+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,


In [13]:
files.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6243 entries, 0 to 6242
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   dataset_id              6243 non-null   int64              
 1   dataset                 6243 non-null   object             
 2   file_id                 6243 non-null   int64              
 3   file_size               6243 non-null   int64              
 4   creation_date           6243 non-null   datetime64[ns, UTC]
 5   last_modification_date  6243 non-null   datetime64[ns, UTC]
 6   logical_file_name       6243 non-null   object             
 7   status                  6243 non-null   object             
 8   err_trace               0 non-null      object             
dtypes: datetime64[ns, UTC](2), int64(3), object(4)
memory usage: 439.1+ KB


In [14]:
files.to_csv("all2024.csv",index=False)

In [3]:
! ls -alhtr ../files

total 2.7M
drwxr-xr-x. 2 gfidalgo 1399 4.0K May  9 11:44 parquet_files500
-rw-r--r--. 1 gfidalgo 1399 854K May 26 05:00 all2024files.csv
-rw-r--r--. 1 gfidalgo 1399 398K May 26 19:34 long_runs.csv
drwxr-xr-x. 2 gfidalgo 1399 4.0K May 26 19:34 .
-rw-r--r--. 1 gfidalgo 1399 1.5M Jun  4 17:17 all2024.csv
drwxr-xr-x. 2 gfidalgo 1399 4.0K Jun  4 17:28 root_files
drwxr-xr-x. 2 gfidalgo 1399 4.0K Jun  4 17:47 ..


# Getting all the files first

In [9]:
all_files = files.logical_file_name
all_files

0       /store/data/Run2024A/ZeroBias/DQMIO/PromptReco...
1       /store/data/Run2024A/ZeroBias/DQMIO/PromptReco...
2       /store/data/Run2024A/ZeroBias/DQMIO/PromptReco...
3       /store/data/Run2024A/ZeroBias/DQMIO/PromptReco...
4       /store/data/Run2024A/ZeroBias/DQMIO/PromptReco...
                              ...                        
6238    /store/data/Run2024J/ZeroBias/DQMIO/PromptReco...
6239    /store/data/Run2024J/ZeroBias/DQMIO/PromptReco...
6240    /store/data/Run2024J/ZeroBias/DQMIO/PromptReco...
6241    /store/data/Run2024J/ZeroBias/DQMIO/PromptReco...
6242    /store/data/Run2024J/ZeroBias/DQMIO/PromptReco...
Name: logical_file_name, Length: 6243, dtype: object

In [None]:
redir = "root://eoscms.cern.ch//eos/cms"

all_files.apply(lambda x: redir+x).to_csv("all2024files.csv",index=False)

# Now getting the list of files for a long run

In [36]:
h2d_df = dials.h2d.list_all(LumisectionHistogram2DFilters(**kwargs,run_number=run_numbers.iloc[0],me__regex="Hcal/DigiTask/OccupancyCut/*"),
                            enable_progress=True).to_pandas()
# h2d_df = dials.h2d.list(LumisectionHistogram2DFilters(**kwargs,run_number=run_numbers.iloc[0],me__regex="Hcal/DigiTask/OccupancyCut/*"),
#                        ).to_pandas()
h2d_df

Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,dataset,me,dataset_id,file_id,run_number,ls_number,me_id,x_min,x_max,x_bin,y_min,y_max,y_bin,entries,data
0,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth1,14944573,14097406797,379154,1,102,0.0,84.0,84.0,0.5,72.5,72.0,1531215,"[[0.0, 0.0, 609.0, 603.0, 592.0, 664.0, 560.0,..."
1,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth2,14944573,14097406797,379154,1,103,0.0,84.0,84.0,0.5,72.5,72.0,1303508,"[[0.0, 0.0, 461.0, 459.0, 400.0, 357.0, 416.0,..."
2,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth3,14944573,14097406797,379154,1,104,0.0,84.0,84.0,0.5,72.5,72.0,1224289,"[[0.0, 0.0, 736.0, 594.0, 661.0, 622.0, 668.0,..."
3,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth4,14944573,14097406797,379154,1,105,0.0,84.0,84.0,0.5,72.5,72.0,905500,"[[0.0, 0.0, 468.0, 514.0, 383.0, 380.0, 367.0,..."
4,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth5,14944573,14097406797,379154,1,106,0.0,84.0,84.0,0.5,72.5,72.0,226950,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8595,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth4,14944573,14097406797,379154,1076,105,0.0,84.0,84.0,0.5,72.5,72.0,78653,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,..."
8596,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth5,14944573,14097406797,379154,1076,106,0.0,84.0,84.0,0.5,72.5,72.0,19934,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8597,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth6,14944573,14097406797,379154,1076,107,0.0,84.0,84.0,0.5,72.5,72.0,17797,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8598,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,Hcal/DigiTask/OccupancyCut/depth/depth7,14944573,14097406797,379154,1076,108,0.0,84.0,84.0,0.5,72.5,72.0,4391,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


# Putting it all together.

 We can use the info given by the h2d endpoint to give us the file.
 The run endpoint will give us the run numbers

In [40]:
runs[lscount_mask]

Unnamed: 0,dataset_id,dataset,run_number,ls_count
89,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,379154,1075
135,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379456,1208
158,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379660,1563
160,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379729,1534
161,14949731,/ZeroBias/Run2024C-PromptReco-v1/DQMIO,379765,1728
...,...,...,...,...
3426,15099576,/ZeroBias/Run2024I-PromptReco-v2/DQMIO,386924,1818
3596,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387574,1084
3615,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387607,2509
3623,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,387640,2596


In [41]:
files

Unnamed: 0,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
0,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13770979997,25105071,2024-03-21 10:52:28+00:00,2024-03-21 10:52:28+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
1,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13822664277,136518352,2024-03-22 05:20:04+00:00,2024-03-22 05:20:04+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
2,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13854945877,298774004,2024-03-23 21:05:31+00:00,2024-03-23 21:05:31+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
3,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13854945917,142308376,2024-03-23 21:05:31+00:00,2024-03-23 21:05:31+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
4,14927230,/ZeroBias/Run2024A-PromptReco-v1/DQMIO,13856188037,310099159,2024-03-24 17:01:02+00:00,2024-03-24 17:01:02+00:00,/store/data/Run2024A/ZeroBias/DQMIO/PromptReco...,FINISHED,
...,...,...,...,...,...,...,...,...,...
6238,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18048826877,44108849,2024-11-06 04:43:09+00:00,2024-11-06 04:43:09+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,
6239,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18055027077,21461718,2024-11-06 12:27:39+00:00,2024-11-06 12:27:39+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,
6240,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18055027117,66443397,2024-11-06 12:27:39+00:00,2024-11-06 12:27:39+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,
6241,15139648,/ZeroBias/Run2024J-PromptReco-v1/DQMIO,18055027157,1261200398,2024-11-06 12:27:39+00:00,2024-11-06 12:27:39+00:00,/store/data/Run2024J/ZeroBias/DQMIO/PromptReco...,FINISHED,


In [42]:
files.file_id.unique()

array([13770979997, 13822664277, 13854945877, ..., 18055027117,
       18055027157, 18055027197])

In [105]:
files_list = []

for i in h2d_df.file_id.unique():
    if i in files.file_id.unique():
        files_list.append(files[files.loc[:,"file_id"] == i])

In [106]:
temp = pd.concat(files_list)
temp.insert(0,"run_number",value = run_numbers.iloc[0])

temp

Unnamed: 0,run_number,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
130,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406797,335001931,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
131,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406837,390919623,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
132,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406877,427523932,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
133,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406917,450827655,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
135,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406997,401507494,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
134,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406957,457493520,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
137,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097407077,357994890,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
139,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097407157,341161117,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
148,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14150155837,329016448,2024-04-12 09:36:07+00:00,2024-04-12 09:36:07+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,
150,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14152284597,90207658,2024-04-12 11:41:58+00:00,2024-04-12 11:41:58+00:00,/store/data/Run2024B/ZeroBias/DQMIO/PromptReco...,FINISHED,


In [107]:
temp.loc[:,"logical_file_name"] = temp.logical_file_name.apply(lambda x: redir+x)

temp

Unnamed: 0,run_number,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
130,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406797,335001931,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
131,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406837,390919623,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
132,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406877,427523932,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
133,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406917,450827655,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
135,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406997,401507494,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
134,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406957,457493520,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
137,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097407077,357994890,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
139,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097407157,341161117,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
148,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14150155837,329016448,2024-04-12 09:36:07+00:00,2024-04-12 09:36:07+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
150,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14152284597,90207658,2024-04-12 11:41:58+00:00,2024-04-12 11:41:58+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,


In [95]:
# !rmdir root_files/*

In [85]:
# for run_number in run_numbers:
#     os.makedirs(f"root_files/{run_number}")

# Finally getting the list of files for the long runs 
each run has many files attached to it so they are orgainized in their own folder under `root_files`

In [109]:
# df_list = []
for run_number in run_numbers:
    print(f"Fetching files for {run_number=}")
    h2d_df = dials.h2d.list_all(LumisectionHistogram2DFilters(**kwargs,run_number=run_number,me__regex="Hcal/DigiTask/OccupancyCut/*"),
                                enable_progress=True).to_pandas()
    # df_list.append(h2d_df)
    files_list = []
    
    for i in h2d_df.file_id.unique():
        if i in files.file_id.unique():
            files_list.append(files[files.loc[:,"file_id"] == i])
    temp = pd.concat(files_list)
    temp.insert(0,"run_number",value = run_number)
    temp.loc[:,"logical_file_name"] = temp.logical_file_name.apply(lambda x: redir+x)
    temp.to_csv(f"root_files/{run_number}.csv",index=False)
    print(f"Finished {run_number=}")
    del temp
    del h2d_df




Fetching files for run_number=379154


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379154
Fetching files for run_number=379456


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379456
Fetching files for run_number=379660


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379660
Fetching files for run_number=379729


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379729
Fetching files for run_number=379765


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379765
Fetching files for run_number=379866


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379866
Fetching files for run_number=379956


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=379956
Fetching files for run_number=380005


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380005
Fetching files for run_number=380074


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380074
Fetching files for run_number=380115


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380115
Fetching files for run_number=380310


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380310
Fetching files for run_number=380360


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380360
Fetching files for run_number=380385


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380385
Fetching files for run_number=380446


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380446
Fetching files for run_number=380470


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380470
Fetching files for run_number=380513


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380513
Fetching files for run_number=380531


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380531
Fetching files for run_number=380567


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380567
Fetching files for run_number=380601


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380603
Fetching files for run_number=380614


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380614
Fetching files for run_number=380644


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380644
Fetching files for run_number=380705


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380705
Fetching files for run_number=380847


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380847
Fetching files for run_number=380848


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380848
Fetching files for run_number=380895


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=380895
Fetching files for run_number=381115


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381115
Fetching files for run_number=381164


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381164
Fetching files for run_number=381190


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381190
Fetching files for run_number=381191


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381191
Fetching files for run_number=381208


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381208
Fetching files for run_number=381380


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381380
Fetching files for run_number=381384


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381384
Fetching files for run_number=381417


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381417
Fetching files for run_number=381443


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381443
Fetching files for run_number=381480


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381480
Fetching files for run_number=381484


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381484
Fetching files for run_number=381516


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381516
Fetching files for run_number=381544


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381544
Fetching files for run_number=381594


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381594
Fetching files for run_number=381698


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381698
Fetching files for run_number=381778


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381778
Fetching files for run_number=381793


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381793
Fetching files for run_number=381900


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381900
Fetching files for run_number=381968


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=381968
Fetching files for run_number=382120


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382120
Fetching files for run_number=382258


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382258
Fetching files for run_number=382300


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382300
Fetching files for run_number=382330


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382330
Fetching files for run_number=382343


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382343
Fetching files for run_number=382435


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382435
Fetching files for run_number=382511


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382511
Fetching files for run_number=382580


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382580
Fetching files for run_number=382594


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382594
Fetching files for run_number=382654


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382654
Fetching files for run_number=382684


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382684
Fetching files for run_number=382769


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382769
Fetching files for run_number=382913


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382913
Fetching files for run_number=382921


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=382921
Fetching files for run_number=383067


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383067
Fetching files for run_number=383155


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383155
Fetching files for run_number=383162


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383162
Fetching files for run_number=383174


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383174
Fetching files for run_number=383254


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383254
Fetching files for run_number=383323


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383323
Fetching files for run_number=383368


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383368
Fetching files for run_number=383449


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383449
Fetching files for run_number=383468


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383468
Fetching files for run_number=383487


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383487
Fetching files for run_number=383512


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383512
Fetching files for run_number=383615


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383615
Fetching files for run_number=383631


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383631
Fetching files for run_number=383712


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383712
Fetching files for run_number=383756


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383756
Fetching files for run_number=383767


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383767
Fetching files for run_number=383814


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383814
Fetching files for run_number=383854


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383854
Fetching files for run_number=383903


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383903
Fetching files for run_number=383996


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=383996
Fetching files for run_number=384052


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384052
Fetching files for run_number=384069


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384069
Fetching files for run_number=384128


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384128
Fetching files for run_number=384188


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384188
Fetching files for run_number=384202


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384202
Fetching files for run_number=384239


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384239
Fetching files for run_number=384291


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384291
Fetching files for run_number=384383


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384383
Fetching files for run_number=384413


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384413
Fetching files for run_number=384468


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384468
Fetching files for run_number=384492


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384492
Fetching files for run_number=384565


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384565
Fetching files for run_number=384614


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384614
Fetching files for run_number=384644


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384644
Fetching files for run_number=384935


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384935
Fetching files for run_number=384963


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384963
Fetching files for run_number=384981


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=384981
Fetching files for run_number=385054


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385054
Fetching files for run_number=385094


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385094
Fetching files for run_number=385127


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385127
Fetching files for run_number=385142


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385142
Fetching files for run_number=385152


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385152
Fetching files for run_number=385168


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385168
Fetching files for run_number=385194


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385194
Fetching files for run_number=385281


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385281
Fetching files for run_number=385286


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385286
Fetching files for run_number=385324


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385324
Fetching files for run_number=385390


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385390
Fetching files for run_number=385422


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385422
Fetching files for run_number=385443


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385443
Fetching files for run_number=385515


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385515
Fetching files for run_number=385604


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385604
Fetching files for run_number=385620


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385620
Fetching files for run_number=385728


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385728
Fetching files for run_number=385738


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385738
Fetching files for run_number=385764


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385764
Fetching files for run_number=385842


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385842
Fetching files for run_number=385889


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385889
Fetching files for run_number=385934


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385934
Fetching files for run_number=385986


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=385986
Fetching files for run_number=386025


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386025
Fetching files for run_number=386071


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386071
Fetching files for run_number=386218


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386218
Fetching files for run_number=386509


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386509
Fetching files for run_number=386554


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386554
Fetching files for run_number=386604


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386604
Fetching files for run_number=386640


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386640
Fetching files for run_number=386694


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Finished run_number=386694
Fetching files for run_number=386704


Progress:   0%|          | 0/1 [00:00<?, ?it/s]

ImpossibleToRefreshTokenError: 

# Now lets concatenate all the files into 1 Dataframe

In [2]:
from glob import glob

In [3]:
df_list = [pd.read_csv(i) for i in glob("root_files/*.csv") ]

In [4]:
df = pd.concat(df_list)

df = df.rename(columns={"Unnamed: 0": "old_index"})

In [5]:
run_mask = df.loc[:,"run_number"] == 379154
df[run_mask]

Unnamed: 0,old_index,run_number,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
0,130,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406797,335001931,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
1,131,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406837,390919623,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
2,132,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406877,427523932,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
3,133,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406917,450827655,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
4,135,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406997,401507494,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
5,134,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097406957,457493520,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
6,137,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097407077,357994890,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
7,139,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14097407157,341161117,2024-04-11 13:42:21+00:00,2024-04-11 13:42:21+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
8,148,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14150155837,329016448,2024-04-12 09:36:07+00:00,2024-04-12 09:36:07+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,
9,150,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14152284597,90207658,2024-04-12 11:41:58+00:00,2024-04-12 11:41:58+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,


In [6]:
file_id_mask = df.loc[:,"file_id"] == 14152284597
df[file_id_mask]

Unnamed: 0,old_index,run_number,dataset_id,dataset,file_id,file_size,creation_date,last_modification_date,logical_file_name,status,err_trace
9,150,379154,14944573,/ZeroBias/Run2024B-PromptReco-v1/DQMIO,14152284597,90207658,2024-04-12 11:41:58+00:00,2024-04-12 11:41:58+00:00,root://eoscms.cern.ch//eos/cms/store/data/Run2...,FINISHED,


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1423 entries, 0 to 12
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   old_index               1423 non-null   int64  
 1   run_number              1423 non-null   int64  
 2   dataset_id              1423 non-null   int64  
 3   dataset                 1423 non-null   object 
 4   file_id                 1423 non-null   int64  
 5   file_size               1423 non-null   int64  
 6   creation_date           1423 non-null   object 
 7   last_modification_date  1423 non-null   object 
 8   logical_file_name       1423 non-null   object 
 9   status                  1423 non-null   object 
 10  err_trace               0 non-null      float64
dtypes: float64(1), int64(5), object(5)
memory usage: 133.4+ KB


In [8]:
df.nunique()

old_index                 1423
run_number                 127
dataset_id                  10
dataset                     10
file_id                   1423
file_size                 1423
creation_date              237
last_modification_date     237
logical_file_name         1423
status                       1
err_trace                    0
dtype: int64

In [99]:
# df.to_csv("long_runs.csv")