In [1]:
import ruamel.yaml
from pylatexenc import latexwalker

In [2]:
import pandas as pd

In [3]:
yaml = ruamel.yaml.YAML()

In [4]:
def float_cast(x):
    def leading_zeros(v) -> int:
        lead0 = 0
        idx = 0
        while idx < len(v) and v[idx] in '0.':
            if v[idx] == '0':
                lead0 += 1
            idx += 1
        return lead0

    # underscore = None
    m_sign: Any = False
    value_so = x
    value_s = value_so.replace('_', "").lower()
    sign = +1
    if value_s[0] == '-':
        sign = -1
    if value_s[0] in '+-':
        m_sign = value_s[0]
        value_s = value_s[1:]

    if 'e' in value_s:
        try:
            mantissa, exponent = value_so.split('e')
            exp = 'e'
        except ValueError:
            mantissa, exponent = value_so.split('E')
            exp = 'E'
        lead0 = leading_zeros(mantissa)
        width = len(mantissa)
        prec = mantissa.find('.')
        if m_sign:
            width -= 1
        e_width = len(exponent)
        e_sign = exponent[0] in '+-'
        # nprint('sf', width, prec, m_sign, exp, e_width, e_sign)
        return ruamel.yaml.scalarfloat.ScalarFloat(
                sign * float(value_s),
                width=width,
                prec=prec,
                m_sign=m_sign,
                m_lead0=lead0,
                exp=exp,
                e_width=e_width,
                e_sign=e_sign
        )
    width = len(value_so)
    # you can't use index, !!float 42 would be a float without a dot
    prec = value_so.find('.')
    lead0 = leading_zeros(value_so)
    return ruamel.yaml.scalarfloat.ScalarFloat(
            sign * float(value_s),
            width=width,
            prec=prec,
            m_sign=m_sign,
            m_lead0=lead0
    )


In [5]:
df = pd.read_csv("table1.csv")

In [6]:
df.columns

Index(['$x_{2}$ range', '$\expval{x_{2}}$', '$\expval{x_{1}}$',
       '$\expval{p_{T}}$\n(\unit{\GeV/c})', '$\expval{M}$\n(\unit{\GeV/c^2})',
       '$\sigma_{pd}/2\sigma_{pp}$', '$\bar{d}/\bar{u}$', '$\bar{d}-\bar{u}$'],
      dtype='object')

In [7]:
tex=latexwalker.LatexWalker(df[df.columns[6]][0])
tex.get_latex_nodes()[0][0].nodelist[3].nodelist[0].chars.split("-")

['', '0.053', '0.060']

In [8]:
points = {'independent_variables':[{'header':{'name': "X[2]"},
                                    'values':[]}],
          'dependent_variables':[{'header':{'name':"X[1]"},
                                'values':[]},
                                 {'header':{'name':"PT","units":"GEV"},
                                'values':[]},
                                 {'header':{'name':"M","units":"GEV"},
                                'values':[]},
                                 {'header':{'name':"DSIG[DEU]/2DSIG[p]"},
                                'values':[]},
                                 {'header':{'name':"RATIO[DQBAR/UQBAR]"},
                                'values':[]},
                                 {'header':{'name':"D[DQBAR-UQBAR]"},
                                'values':[]}
                                ]
                                }

In [9]:
for l in df.iloc:
    x2_range = l[df.columns[0]]
    tex=latexwalker.LatexWalker(l[df.columns[0]])
    x2_lo = tex.get_latex_nodes()[0][0].nodelist[0].chars
    x2_hi = tex.get_latex_nodes()[0][2].nodelist[0].chars
    tex=latexwalker.LatexWalker(l[df.columns[1]])
    x2_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars

    points['independent_variables'][0]['values'].append({"low":float_cast(x2_lo),
                                                         "high":float_cast(x2_hi), 
                                                         "value":float_cast(x2_mean)
                                                        })
    tex=latexwalker.LatexWalker(l[df.columns[2]])
    x1_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points['dependent_variables'][0]['values'].append({"value":float_cast(x1_mean)})

    tex=latexwalker.LatexWalker(l[df.columns[3]])
    pT_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points['dependent_variables'][1]['values'].append({"value":float_cast(pT_mean)})

    tex=latexwalker.LatexWalker(l[df.columns[4]])
    m_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points['dependent_variables'][2]['values'].append({"value":float_cast(m_mean)})

    tex=latexwalker.LatexWalker(l[df.columns[5]])
    csr = tex.get_latex_nodes()[0][0].nodelist[0].chars
    csr_stat = tex.get_latex_nodes()[0][0].nodelist[2].chars
    csr_sys = tex.get_latex_nodes()[0][0].nodelist[4].chars
    points['dependent_variables'][3]['values'].append({"value":float_cast(csr),
                                                       "errors":[
                                                           {"symerror":float_cast(csr_stat), "label":"stat"},
                                                           {"symerror":float_cast(csr_sys), "label":"sys"}
                                                       ]})
    tex=latexwalker.LatexWalker(l[df.columns[6]])
    v = tex.get_latex_nodes()[0][0].nodelist[0].chars.strip("^")
    up = tex.get_latex_nodes()[0][0].nodelist[1].nodelist[0].chars.split("+")
    down = tex.get_latex_nodes()[0][0].nodelist[3].nodelist[0].chars.split("-")
    points['dependent_variables'][4]['values'].append({"value":v,
                                                       "errors":[
                                                           {"asymerror":{'plus':float_cast("+"+up[1]), "minus":float_cast("-"+down[1])}, "label":"stat"},
                                                           {"asymerror":{'plus':float_cast("+"+up[2]), "minus":float_cast("-"+down[2])}, "label":"sys"}
                                                       ]})
    tex=latexwalker.LatexWalker(l[df.columns[7]])
    v = tex.get_latex_nodes()[0][0].nodelist[0].chars.strip("^")
    up = tex.get_latex_nodes()[0][0].nodelist[1].nodelist[0].chars.split("+")
    down = tex.get_latex_nodes()[0][0].nodelist[3].nodelist[0].chars.split("-")
    points['dependent_variables'][5]['values'].append({"value":v,
                                                       "errors":[
                                                           {"asymerror":{'plus':float_cast("+"+up[1]), "minus":float_cast("-"+down[1])}, "label":"stat"},
                                                           {"asymerror":{'plus':float_cast("+"+up[2]), "minus":float_cast("-"+down[2])}, "label":"sys"}
                                                       ]})
    
     
    

In [10]:
with open ("output/table1.yaml",'w') as f1:
    yaml.dump(points,f1)

In [11]:
dt2 = pd.read_csv("table3.csv")
dt2

Unnamed: 0,\diagbox{$x_2$}{$x_1$},$0.30$--$0.35$,$0.35$--$0.40$,$0.40$--$0.45$,$0.45$--$0.50$,$0.50$--$0.55$,$0.55$--$0.60$,$0.60$--$0.65$,$0.65$--$0.70$,$0.70$--$0.75$,$0.75$--$0.80$
0,$0.130$--$0.160$,,,,,,$1.19\%$\n$0.590$\n$0.157$\n$4.54$,$2.47\%$\n$0.628$\n$0.153$\n$4.60$,$3.22\%$\n$0.676$\n$0.148$\n$4.67$,$3.81\%$\n$0.723$\n$0.144$\n$4.77$,$4.35\%$\n$0.772$\n$0.143$\n$4.91$
1,$0.160$--$0.195$,,,,$1.03\%$\n$0.489$\n$0.191$\n$4.55$,$1.97\%$\n$0.529$\n$0.184$\n$4.63$,$2.66\%$\n$0.575$\n$0.178$\n$4.73$,$3.64\%$\n$0.623$\n$0.176$\n$4.89$,$4.08\%$\n$0.673$\n$0.176$\n$5.08$,$4.87\%$\n$0.722$\n$0.176$\n$5.27$,$5.40\%$\n$0.771$\n$0.175$\n$5.44$
2,$0.195$--$0.240$,,$0.04\%$\n$0.393$\n$0.235$\n$4.54$,$0.66\%$\n$0.433$\n$0.226$\n$4.64$,$1.51\%$\n$0.476$\n$0.218$\n$4.77$,$2.50\%$\n$0.524$\n$0.215$\n$4.97$,$3.34\%$\n$0.574$\n$0.215$\n$5.20$,$4.32\%$\n$0.623$\n$0.215$\n$5.42$,$5.08\%$\n$0.673$\n$0.215$\n$5.64$,$5.65\%$\n$0.723$\n$0.215$\n$5.85$,$6.06\%$\n$0.771$\n$0.214$\n$6.02$
3,$0.240$--$0.290$,$0.03\%$\n$0.343$\n$0.279$\n$4.63$,$0.26\%$\n$0.383$\n$0.267$\n$4.76$,$0.95\%$\n$0.427$\n$0.264$\n$4.97$,$2.02\%$\n$0.474$\n$0.263$\n$5.23$,$3.00\%$\n$0.524$\n$0.261$\n$5.49$,$4.03\%$\n$0.574$\n$0.262$\n$5.76$,$4.92\%$\n$0.623$\n$0.262$\n$6.00$,$5.56\%$\n$0.673$\n$0.262$\n$6.23$,$6.11\%$\n$0.722$\n$0.262$\n$6.46$,$6.32\%$\n$0.771$\n$0.262$\n$6.68$
4,$0.290$--$0.350$,$0.04\%$\n$0.338$\n$0.323$\n$4.92$,$0.45\%$\n$0.379$\n$0.318$\n$5.16$,$1.38\%$\n$0.425$\n$0.316$\n$5.44$,$2.46\%$\n$0.474$\n$0.315$\n$5.75$,$3.40\%$\n$0.524$\n$0.316$\n$6.04$,$4.18\%$\n$0.573$\n$0.315$\n$6.32$,$5.02\%$\n$0.624$\n$0.314$\n$6.59$,$5.50\%$\n$0.673$\n$0.314$\n$6.84$,$6.19\%$\n$0.722$\n$0.313$\n$7.09$,$6.10\%$\n$0.772$\n$0.315$\n$7.36$
5,$0.350$--$0.450$,$0.08\%$\n$0.337$\n$0.385$\n$5.38$,$0.64\%$\n$0.377$\n$0.387$\n$5.69$,$1.48\%$\n$0.425$\n$0.386$\n$6.03$,$2.32\%$\n$0.475$\n$0.385$\n$6.38$,$3.09\%$\n$0.523$\n$0.385$\n$6.69$,$3.80\%$\n$0.573$\n$0.384$\n$6.99$,$4.37\%$\n$0.624$\n$0.383$\n$7.29$,$4.68\%$\n$0.671$\n$0.379$\n$7.53$,$4.93\%$\n$0.721$\n$0.380$\n$7.81$,$4.85\%$\n$0.772$\n$0.378$\n$8.06$


In [12]:
acceptance = {
    'independent_variables':[{'header':{'name': "X[1]"}, 'values':[]},
                             {'header':{'name': "X[2]"}, 'values':[]}
                            ],
    'dependent_variables': [{'header':{'name': "ACC", "units":"PERCENT"}, 'values':[]},
                            #{'header':{'name': "X[1]"}, 'values':[]},
                            #{'header':{'name': "X[2]"}, 'values':[]},
                            {'header':{'name': "M","units":"GEV"}, 'values':[]}
                           ]
}

In [13]:
for i in range(1,10):
    for j in range(0,6):
        if not pd.isna(dt2[dt2.columns[i]][j]):
            acceptance['independent_variables'][0]['values'].append(
                {"low":float_cast(dt2[dt2.columns[0]][j].split("--")[0][1:-1]),
                 "high":float_cast(dt2[dt2.columns[0]][j].split("--")[1][1:-1]),
                 "value":float_cast(dt2[dt2.columns[i]][j].split()[1][1:-1])
                }
            )
            acceptance['independent_variables'][1]['values'].append(
                {"low":float_cast(dt2.columns[i].split("--")[0][1:-1]),
                 "high":float_cast(dt2.columns[i].split("--")[1][1:-1]),
                 "value":float_cast(dt2[dt2.columns[i]][j].split()[2][1:-1])
                }
            )
            
            acceptance['dependent_variables'][0]['values'].append({"value":float_cast(dt2[dt2.columns[i]][j].split()[0][1:-3])})
            acceptance['dependent_variables'][1]['values'].append({"value":float_cast(dt2[dt2.columns[i]][j].split()[3][1:-1])})

In [14]:
with open("output/table3.yaml", "w") as fa:
    yaml.dump(acceptance,fa)

In [15]:
df3 = pd.read_csv("table4.csv")

In [16]:
points_x1 = {'independent_variables':[{'header':{'name': "X[1]"},
                                    'values':[]}],
             'dependent_variables':[
                 {'header':{'name':"PT","units":"GEV"},
                  'values':[]},
                 {'header':{'name':"M","units":"GEV"},
                  'values':[]},
                 {'header':{'name':"DSIG[DEU]/2DSIG[p]"},
                  'values':[]}
             ]
            }
points_xF = {'independent_variables':[{'header':{'name': "XL"},
                                    'values':[]}],
             'dependent_variables':[
                 {'header':{'name':"PT","units":"GEV"},
                  'values':[]},
                 {'header':{'name':"M","units":"GEV"},
                  'values':[]},
                 {'header':{'name':"DSIG[DEU]/2DSIG[p]"},
                  'values':[]}
             ]
            }

In [17]:
for l in df3.iloc:
    x1_range = l[df3.columns[0]]
    tex=latexwalker.LatexWalker(l[df3.columns[0]])
    x1_lo = tex.get_latex_nodes()[0][0].nodelist[0].chars
    x1_hi = tex.get_latex_nodes()[0][2].nodelist[0].chars
    tex=latexwalker.LatexWalker(l[df3.columns[1]])
    x1_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars

    points_x1['independent_variables'][0]['values'].append({"low":float_cast(x1_lo),
                                                         "high":float_cast(x1_hi), 
                                                         "value":float_cast(x1_mean)
                                                        })
    tex=latexwalker.LatexWalker(l[df3.columns[2]])
    pT_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points_x1['dependent_variables'][0]['values'].append({"value":float_cast(pT_mean)})
    
    tex=latexwalker.LatexWalker(l[df3.columns[3]])
    m_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points_x1['dependent_variables'][1]['values'].append({"value":float_cast(m_mean)})
    
    tex=latexwalker.LatexWalker(l[df3.columns[4]])
    csr = tex.get_latex_nodes()[0][0].nodelist[0].chars
    csr_stat = tex.get_latex_nodes()[0][0].nodelist[2].chars
    csr_sys = tex.get_latex_nodes()[0][0].nodelist[4].chars
    points_x1['dependent_variables'][2]['values'].append({"value":float_cast(csr),
                                                       "errors":[
                                                           {"symerror":float_cast(csr_stat), "label":"stat"},
                                                           {"symerror":float_cast(csr_sys), "label":"sys"}
                                                       ]})


    xF_range = l[df3.columns[5]]
    tex=latexwalker.LatexWalker(l[df3.columns[5]])
    xF_lo = tex.get_latex_nodes()[0][0].nodelist[0].chars
    xF_hi = tex.get_latex_nodes()[0][2].nodelist[0].chars
    tex=latexwalker.LatexWalker(l[df3.columns[6]])
    xF_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars

    points_xF['independent_variables'][0]['values'].append({"low":float_cast(xF_lo),
                                                         "high":float_cast(xF_hi), 
                                                         "value":float_cast(xF_mean)
                                                        })
    tex=latexwalker.LatexWalker(l[df3.columns[6]])
    pT_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points_xF['dependent_variables'][0]['values'].append({"value":float_cast(pT_mean)})
    
    tex=latexwalker.LatexWalker(l[df3.columns[7]])
    m_mean = tex.get_latex_nodes()[0][0].nodelist[0].chars
    points_xF['dependent_variables'][1]['values'].append({"value":float_cast(m_mean)})
    
    tex=latexwalker.LatexWalker(l[df3.columns[9]])
    csr = tex.get_latex_nodes()[0][0].nodelist[0].chars
    csr_stat = tex.get_latex_nodes()[0][0].nodelist[2].chars
    csr_sys = tex.get_latex_nodes()[0][0].nodelist[4].chars
    points_xF['dependent_variables'][2]['values'].append({"value":float_cast(csr),
                                                       "errors":[
                                                           {"symerror":float_cast(csr_stat), "label":"stat"},
                                                           {"symerror":float_cast(csr_sys), "label":"sys"}
                                                       ]})

In [18]:
with open("output/table4_x1.yaml", "w") as f2:
    yaml.dump(points_x1,f2)
with open("output/table4_xF.yaml", "w") as f3:
    yaml.dump(points_xF,f3)

In [19]:
import tarfile
import pathlib

In [20]:
tar_new = tarfile.open("e906_new.tar.gz","w:gz")

In [21]:
p =pathlib.Path("output")
for i in p.iterdir():
   if i.suffix==".yaml":
    tar_new.add(i, i.name)

In [22]:
tar_new.list()

-rw-r--r-- cleung/765       3820 2025-12-09 15:49:29 table1.yaml 
-rw-r--r-- cleung/765       5884 2025-12-09 15:49:29 table3.yaml 
-rw-r--r-- cleung/765       1237 2025-11-24 08:56:52 submission.yaml 
-rw-r--r-- cleung/765       1532 2025-12-09 15:49:29 table4_x1.yaml 
-rw-r--r-- cleung/765       1538 2025-12-09 15:49:29 table4_xF.yaml 


In [23]:
tar_new.close()