In [1]:
!rm -rf IFIC-SummerSchool-2025
!git clone https://github.com/M0V1/IFIC-SummerSchool-2025.git
import sys
import os.path
!pip install atlasopenmagic
from atlasopenmagic import install_from_environment
install_from_environment()

Cloning into 'IFIC-SummerSchool-2025'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (46/46), done.[K
remote: Total 49 (delta 15), reused 4 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (49/49), 74.77 MiB | 17.43 MiB/s, done.
Resolving deltas: 100% (15/15), done.
Collecting atlasopenmagic
  Downloading atlasopenmagic-1.0.1-py3-none-any.whl.metadata (7.2 kB)
Downloading atlasopenmagic-1.0.1-py3-none-any.whl (15 kB)
Installing collected packages: atlasopenmagic
Successfully installed atlasopenmagic-1.0.1
Installing packages: ['aiohttp>=3.9.5', 'atlasopenmagic>=1.0.1', 'awkward>=2.6.7', 'awkward-pandas>=2023.8.0', 'coffea~=0.7.0', 'hist>=2.8.0', 'ipykernel>=6.29.5', 'jupyter>=1.0.0', 'lmfit>=1.3.2', 'matplotlib>=3.9.1', 'metakernel>=0.30.2', 'notebook<7', 'numpy>=1.26.4', 'pandas>=2.2.2', 'papermill>=2.6.0', 'pip>=24.2', 'scikit-learn>=1.5.1', 'uproot>=5.3.10', 'uproot3>=3.14.4', 'fsspec-xroot

In [1]:
import numpy as np # for numerical calculations such as histogramming
import matplotlib.pyplot as plt # for plotting
import matplotlib_inline # to edit the inline plot format
#matplotlib_inline.backend_inline.set_matplotlib_formats('pdf', 'svg') # to make plots in pdf (vector) format
from matplotlib.ticker import AutoMinorLocator # for minor ticks
import uproot # for reading .root files
import awkward as ak # to represent nested data in columnar format
import vector # for 4-momentum calculations
import time # for printing time stamps
import requests # for file gathering, if needed

MeV = 0.001
GeV = 1.0

In [2]:
import atlasopenmagic as atom
atom.available_releases()
atom.set_release('2025e-13tev-beta')

Available releases:
2016e-8tev        2016 Open Data for education release of 8 TeV proton-proton collisions (https://opendata.cern.ch/record/3860).
2020e-13tev       2020 Open Data for education release of 13 TeV proton-proton collisions (https://cern.ch/2r7xt).
2024r-pp          2024 Open Data for research release for proton-proton collisions (https://opendata.cern.record/80020).
2024r-hi          2024 Open Data for research release for heavy-ion collisions (https://opendata.cern.ch/record/80035).
2025e-13tev-beta  2025 Open Data for education and outreach beta release for 13 TeV proton-proton collisions (https://opendata.cern.ch/record/93910).
2025r-evgen       2025 Open Data for research release for event generation (https://opendata.cern.ch/record/160000).
Active release set to: 2025e-13tev-beta. Metadata cache cleared.


In [3]:
lumi = 36.6 # fb-1 # data size of the full release
fraction = 1.0 # reduce this is if you want the code to run quicker

In [4]:
# Select the skim to use for the analysis
skim = "2bjets"

In [5]:
mc_defs = {
    r'Background $t\bar{t}$':{'dids': [410470], 'color': "#6b59d3" }, # purple
    r'Signal ($m_H$ = 125 GeV)':  {'dids': [345949],'color': "#00cdff" },# light blue
}

mc_samples   = atom.build_mc_dataset(mc_defs, skim=skim, protocol='https')
data_samples = atom.build_data_dataset(skim, protocol='https')

samples = {**data_samples, **mc_samples}

Fetching and caching all metadata for release: 2025e-13tev-beta...
Successfully cached 374 datasets.


In [6]:
samples.keys()

dict_keys(['Data', 'Background $t\\bar{t}$', 'Signal ($m_H$ = 125 GeV)'])

In [7]:
# We shall use the first entry in 'list', 'data15_periodD'
value = samples['Data']['list'][0]
print(f"{value = }")

# This is now appended to our file path to retrieve the data_A.4lep.root file
data15_periodD = value #path + "Data/" + value + ".root"

value = 'https://opendata.cern.ch/eos/opendata/atlas/rucio/user/egramsta/data15_periodD.2bjets70.root'


In [8]:
# Accessing the file from the online database (":analysis" opens the tree in a desired manner)
tree = uproot.open(data15_periodD + ":analysis")

# There are 39 entries in the tree
print(tree.num_entries)

# We can view all the information stored in the tree using the .keys() method.
print(tree.keys())

# We can also view the entire tree using the .arrays() method
# This generates a 39-entry list of dictionaries
print(tree.arrays())

424678
['num_events', 'sum_of_weights', 'sum_of_weights_squared', 'category', 'sig_lep', 'sig_muo', 'sig_ele', 'sig_pho', 'sig_jet', 'sig_bjet70', 'n_lep', 'n_ele', 'n_muo', 'n_jet', 'n_pho', 'n_bjet70', 'TriggerMatch_DILEPTON', 'ScaleFactor_MLTRIGGER', 'ScaleFactor_PILEUP', 'ScaleFactor_FTAG', 'mcWeight', 'xsec', 'filteff', 'kfac', 'channelNumber', 'eventNumber', 'runNumber', 'trigML', 'trigP', 'trigDT', 'trigT', 'trigE', 'trigDM', 'trigDE', 'trigM', 'trigMET', 'ScaleFactor_BTAG', 'ScaleFactor_JVT', 'jet_n', 'jet_pt', 'jet_eta', 'jet_phi', 'jet_e', 'jet_btag_quantile', 'jet_jvt', 'largeRJet_n', 'largeRJet_pt', 'largeRJet_eta', 'largeRJet_phi', 'largeRJet_e', 'largeRJet_m', 'largeRJet_D2', 'jet_pt_jer1', 'jet_pt_jer2', 'ScaleFactor_ELE', 'ScaleFactor_MUON', 'ScaleFactor_LepTRIGGER', 'ScaleFactor_MuTRIGGER', 'ScaleFactor_ElTRIGGER', 'lep_n', 'lep_type', 'lep_pt', 'lep_eta', 'lep_phi', 'lep_e', 'lep_charge', 'lep_ptvarcone30', 'lep_topoetcone20', 'lep_z0', 'lep_d0', 'lep_d0sig', 'lep_isT

In [10]:
tree["jet_e"].arrays(library="ak")

TypeError: 'ClientResponseError' object is not subscriptable