In [1]:
# 1) Install dependencies (run once)
!pip install --quiet uproot awkward requests

# 2) Imports
import uproot
import awkward as ak
import pandas as pd
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# 3) Your datasets dict
datasets = {
    'JetHT_Run2022C': ['/JetHT/Run2022C-22Sep2023-v1/NANOAOD'],
    'JetMET_Run2022C': ['/JetMET/Run2022C-22Sep2023-v1/NANOAOD'],
    'JetMET_Run2022D': ['/JetMET/Run2022D-22Sep2023-v1/NANOAOD'],
}

# 4) Helper: query DAS for the list of ROOT files in a NanoAOD dataset
def get_root_files(das_name):
    """Return a list of xrootd URLs for all files in the given DAS dataset."""
    url = "https://cmsweb.cern.ch/das/api/v1.0/file"
    params = {
        "dataset": das_name,
        "instance": "prod/global",
        "format": "plain"       # 拿纯文本格式列表
    }
    resp = requests.get(url, params=params, verify=False)
    file_names = resp.text.strip().splitlines()
    return [f"root://cms-xrd-global.cern.ch/{fn}" for fn in file_names]

# 5) Debug: check you actually got files
key      = 'JetHT_Run2022C'
das_name = datasets[key][0]
file_list = get_root_files(das_name)
print(f"Found {len(file_list)} files; first few:\n", file_list[:5])

# 6) Open the 'Events' tree and read a few branches (if file_list non-empty)
if file_list:
    events = uproot.concatenate(
        file_list,
        "Events",
        branches=["Jet_pt", "Jet_eta", "MET_pt"],
        library="ak",
    )
    df = ak.to_pandas(events[:1000])
    print(df.head())
else:
    raise RuntimeError("No ROOT files found for "+das_name)


Found 0 files; first few:
 []


RuntimeError: No ROOT files found for /JetHT/Run2022C-22Sep2023-v1/NANOAOD

In [None]:
# Get data file names from stock nano
import os
import json

def add_prefix(files):

    new_files = {}
    prefix = 'root://cmsxrootd.fnal.gov/'
    
    for k in files.keys():
        print(k)
        new_files[k] = [prefix + f for f in files[k]]

    print(new_files.keys())
    return new_files

# 2022 JetHT
filesets = {}

filesets['JetHT_Run2022C'] = !dasgoclient --query="file dataset=/JetHT/Run2022C-22Sep2023-v1/NANOAOD"

this_file = 'infiles/2022_JetHTData.json'
with open(this_file, 'w') as json_file:
    json.dump(add_prefix(filesets), json_file)
        
for i,f in filesets.items():
    print(i, len(f))