# Example: read yoda files using `yoda2numpy`
> Created Jan 31 2024 HBP and Ali Al Kadhim

In [1]:
import os, sys, re
import yoda
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
from tqdm import tqdm
from yoda2numpy import Yoda2Numpy

Welcome to JupyROOT 6.30/02


In [2]:
!ls rivet_histograms/data/*.yoda
!ls rivet_histograms/newseeds/*_0.yoda
!ls rivet_histograms/simulation/*_0.yoda

[31mrivet_histograms/data/ALEPH_1996_S3486095.yoda[m[m
[31mrivet_histograms/newseeds/ALEPH_1996_S3486095_card_newseed_0.yoda[m[m
[31mrivet_histograms/simulation/ALEPH_1996_S3486095_card_0.yoda[m[m


In [3]:
def get_hist(hist_name='d01-x01-y01', 
             hist_type='dat', 
             hist_tree='ALEPH_1996_S3486095', 
             index=0,
             verbose=False):

    if hist_type[:3] == 'dat':
        yoda_filename = f'{hist_tree:s}.yoda'
        yoda_dir  = 'data'
        tree_name = f'/REF/{hist_tree:s}'

    elif hist_type[:3] == 'sim':
        yoda_filename = f'{hist_tree:s}_card_{index:d}.yoda'
        yoda_dir = 'simulation'
        tree_name = f'/{hist_tree:s}'
    elif hist_type[:3] == 'new':
        yoda_filename = f'{hist_tree:s}_card_newseed_{index:d}.yoda'
        yoda_dir = 'newseeds'
        tree_name = f'/{hist_tree:s}'
        
    if verbose:
        print(f'hist name: {hist_name:s}\tyoda file: {yoda_filename:s}\t=> {hist_type:s}')
    
    hist_path = os.path.join(os.getcwd(), 
                             'rivet_histograms', 
                             yoda_dir, 
                             yoda_filename)
    yoda_file  = yoda.read(hist_path)
    
    full_hist_name = '%s/%s' % (tree_name, hist_name)
    print(full_hist_name)
    if full_hist_name in yoda_file:
        yoda_hist  = yoda_file[full_hist_name]
        hist_lows  = yoda_hist.xMins()
        hist_xvals = yoda_hist.xVals()
        hist_highs = yoda_hist.xMaxs()
        hist_values= yoda_hist.yVals()
        hist_errors= yoda_hist.yErrs()
        
        if len(hist_errors.shape) > 1:
            hist_errors = hist_errors.sum(axis=1)/2
            
        if hist_type[:3] == 'dat':
            return hist_lows, hist_xvals, hist_values, hist_errors
        else:
            return hist_lows, hist_highs, hist_values, hist_errors
    else:
        None

In [4]:
def print_hist1(h):
  
    lows, highs, values, errors = h

    print()
    print('%3s %10s %10s %10s %10s' % \
          ('bin', 'low', 'high', 'values', 'errors'))

    for i, (low, high, y, ye) in enumerate(zip(lows, highs, values, errors)): 
        print(f'{i+1:3d} {low:10.4f} {high:10.4f} {y:10.4f} {ye:10.4f}')
        
def print_hist2(hdat, hsim):
    print()    
    if hsim:
        _, xvals, dat, errs = hdat
        lows, highs, sim, sim_errs = hsim

        s_errs = sim_errs + 1e-6
        N = (sim/s_errs)**2
        K = sim/s_errs**2
        print('%3s %8s %8s %8s %8s %8s %10s %10s %12s %12s' % \
              ('bin', 'low', 'central', 'high',
                                                          'data', 'errs', 
                                                          'pred', 'errs', 
                                                          'MC(count)', 'MC(scale)'))

        for i, (low, xval, high, y, ye, t, te, n, k) in enumerate(zip(lows, xvals, highs,
                                                           dat, errs, 
                                                           sim, sim_errs, 
                                                           N, K)):
            print(f'{i+1:3d} {low:8.4f} {xval:8.4f} {high:8.4f} {y:8.4f} '\
                  f'{ye:8.4f} {t:10.4f} {te:10.4f} '\
                  f'{n:12.4f} {k:12.4f}')

### Get data histograms as `numpy` arrays and as `pandas` dataframes

In [5]:
keydat = '/REF/ALEPH_1996_S3486095/d01-x01-y01'
keynew = '/ALEPH_1996_S3486095/d01-x01-y01'
keysim = '/ALEPH_1996_S3486095/d01-x01-y01'

yoda2numpy = Yoda2Numpy()

hdata  = yoda2numpy('dat')
dfdata = yoda2numpy.todf(hdata)
dfdat  = dfdata[keydat]
dfdat

Unnamed: 0,xval,xerr-,xerr+,yval,yerr-,yerr+
0,0.0025,0.0025,0.0025,12.36,0.407922,0.407922
1,0.0075,0.0025,0.0025,23.33,0.254951,0.254951
2,0.0125,0.0025,0.0025,20.23,0.156205,0.156205
3,0.0175,0.0025,0.0025,16.69,0.120416,0.120416
4,0.0225,0.0025,0.0025,13.41,0.1,0.1
5,0.0275,0.0025,0.0025,10.79,0.098995,0.098995
6,0.0325,0.0025,0.0025,8.87,0.094048,0.094048
7,0.0375,0.0025,0.0025,7.408,0.089196,0.089196
8,0.045,0.005,0.005,5.922,0.06934,0.06934
9,0.055,0.005,0.005,4.508,0.052631,0.052631


### Get new histograms as `numpy` arrays and as `pandas` dataframes

In [6]:
M = 1000
hnews = []
for ii in tqdm(range(M)):
    hnews.append( yoda2numpy('new', index=ii) )

dfnews = []
for ii in tqdm(range(M)):
    dfnews.append( yoda2numpy.todf(hnews[ii]) )

dfnew = dfnews[0][keynew]
dfnew

100%|██████████████████████████████████████| 1000/1000 [00:08<00:00, 118.10it/s]
100%|██████████████████████████████████████| 1000/1000 [00:04<00:00, 217.31it/s]


Unnamed: 0,xlow,xhigh,sumw,sumw2,sumwx,sumwx2,numEntries
0,0.0,0.005,14.760296,0.059042,0.049408,0.036583,3690.0
1,0.005,0.01,24.12048,0.096484,0.179866,0.278239,6030.0
2,0.01,0.015,20.72842,0.082915,0.257413,0.647904,5182.0
3,0.015,0.02,16.47633,0.065907,0.286605,1.00399,4119.0
4,0.02,0.025,13.052262,0.05221,0.292858,1.319645,3263.0
5,0.025,0.03,10.49221,0.04197,0.288017,1.58568,2623.0
6,0.03,0.035,8.888178,0.035553,0.28841,1.875443,2222.0
7,0.035,0.04,7.284146,0.029137,0.273011,2.049584,1821.0
8,0.04,0.05,5.778116,0.011556,0.257819,1.155251,2889.0
9,0.05,0.06,4.528091,0.009056,0.24808,1.362982,2264.0


### Get sim histograms as `numpy` arrays and as `pandas` dataframes

In [7]:
hsims = []
for ii in tqdm(range(M)):
    hsims.append( yoda2numpy('sim', index=ii) )

dfsims = []
for ii in tqdm(range(M)):
    dfsims.append( yoda2numpy.todf(hsims[ii]) )

dfsim = dfsims[0][keysim]
dfsim

100%|██████████████████████████████████████| 1000/1000 [00:08<00:00, 116.91it/s]
100%|██████████████████████████████████████| 1000/1000 [00:04<00:00, 217.23it/s]


Unnamed: 0,xlow,xhigh,sumw,sumw2,sumwx,sumwx2,numEntries
0,0.0,0.005,9.207162,4.709544,0.029255,0.021593,18.0
1,0.005,0.01,28.6445,14.65192,0.210193,0.317386,56.0
2,0.01,0.015,18.925832,9.680732,0.234288,0.586316,37.0
3,0.015,0.02,15.856778,8.110884,0.284102,1.02368,31.0
4,0.02,0.025,10.23018,5.232828,0.235225,1.085886,20.0
5,0.025,0.03,18.414322,9.419092,0.504793,2.773457,36.0
6,0.03,0.035,10.741688,5.494468,0.355455,2.357036,21.0
7,0.035,0.04,7.161126,3.66298,0.27184,2.065958,14.0
8,0.04,0.05,4.092072,1.046566,0.187649,0.864406,16.0
9,0.05,0.06,5.882353,1.504438,0.319918,1.743721,23.0


### Print histogram contents extracted using `yoda2numpy`

In [12]:
hdat = None, dfdat.xval, dfdat.yval, dfdat['yerr+']
hnew = dfnew.xlow, dfnew.xhigh, dfnew.sumw, np.sqrt(dfnew.sumw2)

print_hist2(hdat, hnew)


bin      low  central     high     data     errs       pred       errs    MC(count)    MC(scale)
  1   0.0000   0.0025   0.0050  12.3600   0.4079    14.7603     0.2430    3689.9701     249.9930
  2   0.0050   0.0075   0.0100  23.3300   0.2550    24.1205     0.3106    6029.9612     249.9934
  3   0.0100   0.0125   0.0150  20.2300   0.1562    20.7284     0.2880    5181.9665     249.9933
  4   0.0150   0.0175   0.0200  16.6900   0.1204    16.4763     0.2567    4118.9679     249.9930
  5   0.0200   0.0225   0.0250  13.4100   0.1000    13.0523     0.2285    3262.9724     249.9929
  6   0.0250   0.0275   0.0300  10.7900   0.0990    10.4922     0.2049    2622.9744     249.9926
  7   0.0300   0.0325   0.0350   8.8700   0.0940     8.8882     0.1886    2221.9764     249.9923
  8   0.0350   0.0375   0.0400   7.4080   0.0892     7.2841     0.1707    1820.9789     249.9921
  9   0.0400   0.0450   0.0500   5.9220   0.0693     5.7781     0.1075    2888.9473     499.9808
 10   0.0500   0.0550   0.060

### Print histogram contents using `yoda`

In [14]:
HDAT = get_hist(hist_name='d01-x01-y01', hist_type='dat')
HNEW = get_hist(hist_name='d01-x01-y01', hist_type='new', index=0)

print_hist2(HDAT, HNEW)

/REF/ALEPH_1996_S3486095/d01-x01-y01
/ALEPH_1996_S3486095/d01-x01-y01

bin      low  central     high     data     errs       pred       errs    MC(count)    MC(scale)
  1   0.0000   0.0025   0.0050  12.3600   0.4079    14.7603     0.2430    3689.9701     249.9930
  2   0.0050   0.0075   0.0100  23.3300   0.2550    24.1205     0.3106    6029.9612     249.9934
  3   0.0100   0.0125   0.0150  20.2300   0.1562    20.7284     0.2880    5181.9665     249.9933
  4   0.0150   0.0175   0.0200  16.6900   0.1204    16.4763     0.2567    4118.9679     249.9930
  5   0.0200   0.0225   0.0250  13.4100   0.1000    13.0523     0.2285    3262.9724     249.9929
  6   0.0250   0.0275   0.0300  10.7900   0.0990    10.4922     0.2049    2622.9744     249.9926
  7   0.0300   0.0325   0.0350   8.8700   0.0940     8.8882     0.1886    2221.9764     249.9923
  8   0.0350   0.0375   0.0400   7.4080   0.0892     7.2841     0.1707    1820.9789     249.9921
  9   0.0400   0.0450   0.0500   5.9220   0.0693     5.7