## ROOT Implementation

In [1]:
import ROOT
import os

RDataFrame = ROOT.RDF.Experimental.Distributed.OSCAR.RDataFrame

oscarclient = {
    "minio_endpoint": os.environ['minio_endpoint'],
    "minio_access":   os.environ['minio_access'],
    "minio_secret":   os.environ['minio_secret'],
    "bucket_name": 'root-oscar',
    # Optional arguments
    "benchmarking" : True,
    "oscar_endpoint": f"{os.environ['oscar_endpoint']}",
    "oscar_access":   os.environ['oscar_access'],
    "oscar_secret":   os.environ['oscar_secret']
}

Welcome to JupyROOT 6.27/01


In [2]:
def dimuon_analysis(df):
    # For simplicity, select only events with exactly two muons and require opposite charge
    df_2mu = df.Filter("nMuon == 2", "Events with exactly two muons")
    df_os = df_2mu.Filter("Muon_charge[0] != Muon_charge[1]", "Muons with opposite charge")
    
    # Compute invariant mass of the dimuon system
    df_mass = df_os.Define("Dimuon_mass", "InvariantMass(Muon_pt, Muon_eta, Muon_phi, Muon_mass)")
    
    # Make histogram of dimuon mass spectrum. Note how we can set titles and axis labels in one go.
    h = df_mass.Histo1D(("Dimuon_mass", "Dimuon mass;m_{#mu#mu} (GeV);N_{Events}", 30000, 0.25, 300), "Dimuon_mass")
    
    # Produce plot
    ROOT.gStyle.SetOptStat(0); ROOT.gStyle.SetTextFont(42)
    c = ROOT.TCanvas("c", "", 800, 700)
    c.SetLogx(); c.SetLogy()
    
    watch = ROOT.TStopwatch()
    h.SetTitle("")
    print(f"Time elapsed {watch.RealTime()}")
    h.GetXaxis().SetTitleSize(0.04)
    h.GetYaxis().SetTitleSize(0.04)
    h.Draw()
    
    label = ROOT.TLatex(); label.SetNDC(True)
    label.DrawLatex(0.175, 0.740, "#eta")
    label.DrawLatex(0.205, 0.775, "#rho,#omega")
    label.DrawLatex(0.270, 0.740, "#phi")
    label.DrawLatex(0.400, 0.800, "J/#psi")
    label.DrawLatex(0.415, 0.670, "#psi'")
    label.DrawLatex(0.485, 0.700, "Y(1,2,3S)")
    label.DrawLatex(0.755, 0.680, "Z")
    label.SetTextSize(0.040); label.DrawLatex(0.100, 0.920, "#bf{CMS Open Data}")
    label.SetTextSize(0.030); label.DrawLatex(0.630, 0.920, "#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}")
    
    c.SaveAs("dimuon_spectrum.pdf")

In [3]:
# Create dataframe from NanoAOD files
filenames = ["root://eospublic.cern.ch//eos/opendata/cms/derived-data/AOD2NanoAODOutreachTool/Run2012BC_DoubleMuParked_Muons.root"]
#filenames = filenames * 4
treename = "Events"
df = RDataFrame(treename, filenames, oscarclient=oscarclient, npartitions=8)

root-oscar-678285da-83fc-4743-9011-38728295fb9c-benchmark
Bucket does not exist. Trying to create it.
Creating bucket...
Bucket created!
Creating services...
Done creating services!
Creating service mapper for root-oscar-678285da-83fc-4743-9011-38728295fb9c-benchmark
root-oscar-678285da-83fc-4743-9011-38728295fb9c-benchmark
Creating service reducer for root-oscar-678285da-83fc-4743-9011-38728295fb9c-benchmark
root-oscar-678285da-83fc-4743-9011-38728295fb9c-benchmark


In [4]:
dimuon_analysis(df)

File name: 0_0
File name: 0_1
File name: 0_3
File name: 0_7
Waiting for final result 0_7, sleeping 10 seconds.
Deleting objects.
Bucket Deleted.
Deleting services
m-678285da-83fc-4743-9011-38728295fb9c
<Response [204]>
r-678285da-83fc-4743-9011-38728295fb9c
<Response [204]>
Time elapsed 127.58213305473328


Info in <TCanvas::Print>: pdf file dimuon_spectrum.pdf has been created


In [14]:
# Plot generation
import csv
import sys
import pandas as pd
import plotly.express as px

df = pd.read_csv('678285da-83fc-4743-9011-38728295fb9c_usage.csv' , delimiter='|')
df.head()

Unnamed: 0,function,id,time,cpu_percent,mem_percent
0,mapper,0_0,0.0,0.0,5e-05
1,mapper,0_0,0.5,87.8,0.37165
2,mapper,0_0,1.0,93.8,0.474831
3,mapper,0_0,1.5,85.8,0.513743
4,mapper,0_0,2.0,67.8,0.518799


In [22]:
mappers = df[df['function'] == 'mapper']
map0 = mappers[mappers['id'] == '5_5']

cpu_fig = px.line(map0,
                 x='time',
                 y='cpu_percent',
                 title='CPU usage of mapper 0')

mem_fig = px.line(map0,
                 x='time',
                 y='mem_percent',
                 title='Memory usage of mapper 0')

In [23]:
cpu_fig.show()

In [17]:
mem_fig.show()

## Testing with dataset in local minio.

In [None]:
# Da fallo con presigned url. Probablemente por como funciona TWebFile::GetHead -> forbidden 403
# que genera otro 403 aqui -> TWebFile::GetFromWeb10 
#minio_data = 'https://158.42.106.12:30300/root-common/dimuon_data.root'
minio_data = 'https://test-cern-data.s3.amazonaws.com/dimuon_data.root'

In [None]:
filenames = [minio_data]
treename = "Events"
df = RDataFrame(treename, filenames, oscarclient=oscarclient, npartitions=2)

In [None]:
dimuon_analysis(df)

In [None]:
filenames