# Data sorted into pandas dataframe and extractions

In [9]:
import os, sys, yaml, re
from IPython.display import display, Markdown, Latex #Can write latex too!!!!
import numpy as np

In [101]:
class DecayChain(yaml.YAMLObject):
    
    yaml_tag = u'!DecayChain'
    
    def __init__(self, idd="", be="", pi="", ie="", it="", ea=[], eas=[], ta=[]):
        self.ID = idd
        self.BeamEnergy = be
        self.Pixel = pi
        self.ImplantEnergy = ie
        self.ImplantTime = it
        self.EAlpha, self.EAlphaSigma, self.TAlpha = ea, eas, ta

    def __repr__(self):
        return yaml.dump(self)
    
    def __str__(self):
        return yaml.dump(self)
    
    def MDTable(self):
        #Data to insert
        rows = []
        rows.append(['**ID**', '$E_{lab}$ (MeV)', '$E_{rec}$ (MeV)']) #'$E_1$ (MeV)', '$E_2$ (MeV)']
        rows.append(['', '', '**pixel**']) #'**$\Delta t_2$ (s)**']
        rows.append([self.ID, self.BeamEnergy, self.ImplantEnergy])
        rows.append(['', '', self.Pixel])
        for i in range(len(self.EAlpha)):
            rows[0].append('$E_'+str(i)+'$ (MeV)')
            rows[1].append('$\Delta t_'+str(i)+'$ (s)')
            rows[2].append(self.EAlpha[i])
            rows[3].append(self.TAlpha[i])
        rows[2] = [str(i) for i in rows[2]]
        rows[3] = [str(i) for i in rows[3]]
        
        #Formatting strings where data is to be put in
        s_format = """| """
        s_hline = """|"""
        s_empty = """| """

        for i in range(len(rows[0])):
            s_format += """{"""+str(i)+"""} | """
            s_hline += """:---:| """
            s_empty += """ | """
            if i == len(rows[0])-1:
                s_format += """\n"""
                s_hline += """\n"""
                s_empty += """\n"""

        s_md = """"""
        s_md += s_empty+s_hline
        for r in rows: 
            s_md += s_format.format(*r)
        
        #print("string:\n", s_md)
        #print(Markdown(s_md))
        display(Markdown(s_md))
        return s_md

class SetDecayChains:
    
    def __init__(self, path='', ids=[]):
        self.max_chain = 0
        s_files = " ".join(os.listdir(path))
        files = []
        for s in ids:
            files += (sorted(re.findall(string=s_files, pattern="Chain"+s+"\d+.yml")))
        print("Reading the following files:", files)
        self.Chains = []
        for f in files:
            f_in = open(path+f, 'r')
            self.Chains.append(yaml.load(f_in))
            if len(self.Chains[-1].TAlpha) > self.max_chain:
                self.max_chain = len(self.Chains[-1].TAlpha)
            f_in.close()
   

In [103]:
s_path = "E115_Chains/"
s_id = ["13", "14", "11", "12", "16", "17"]

setDC = SetDecayChains(s_path, s_id)
#for c in setDC.Chains:
#    c.MDTable()
print("Max length=", setDC.max_chain)

Reading the following files: ['Chain1301.yml', 'Chain1302.yml', 'Chain1303.yml', 'Chain1304.yml', 'Chain1305.yml', 'Chain1306.yml', 'Chain1307.yml', 'Chain1308.yml', 'Chain1309.yml', 'Chain1310.yml', 'Chain1311.yml', 'Chain1312.yml', 'Chain1313.yml', 'Chain1314.yml', 'Chain1315.yml', 'Chain1316.yml', 'Chain1317.yml', 'Chain1318.yml', 'Chain1319.yml', 'Chain1320.yml', 'Chain1321.yml', 'Chain1322.yml', 'Chain1401.yml', 'Chain1402.yml', 'Chain1403.yml', 'Chain1404.yml', 'Chain1405.yml', 'Chain1406.yml', 'Chain1407.yml', 'Chain1101.yml', 'Chain1102.yml', 'Chain1103.yml', 'Chain1104.yml', 'Chain1201.yml', 'Chain1202.yml', 'Chain1601.yml', 'Chain1602.yml', 'Chain1603.yml', 'Chain1604.yml', 'Chain1605.yml', 'Chain1606.yml', 'Chain1607.yml', 'Chain1608.yml', 'Chain1609.yml', 'Chain1610.yml', 'Chain1611.yml', 'Chain1612.yml', 'Chain1613.yml', 'Chain1614.yml', 'Chain1615.yml', 'Chain1616.yml', 'Chain1617.yml', 'Chain1618.yml', 'Chain1619.yml', 'Chain1620.yml', 'Chain1621.yml', 'Chain1622.yml', '

In [3]:
import pandas as pd

## One chain in a data frame

* Indices = chain identification
* Series = values of variables, Energy, Lifetime ...
* Data Frame = everything together

In [7]:
c1 = setDC.Chains[0]
c1

!DecayChain
BeamEnergy: 245.0
EAlpha: [10.51, 242.0]
EAlphaSigma: [10.0, 0.0]
ID: '1401'
ImplantEnergy: 12.3
ImplantTime: 0.0
Pixel: 268
TAlpha: [0.227, 0.378]

In [83]:
inds = [ [c1.ID, '12'], ["Dubna", '13'], ["E115", '14'] ]
col_names = ["ID", "Lab", "Exp."]
inds = list(zip(*inds))
print(inds)
index = pd.MultiIndex.from_tuples(inds, names=col_names)
ser_it = {"Implantation time": c1.ImplantTime}
index

[('1401', 'Dubna', 'E115'), ('12', '13', '14')]


MultiIndex(levels=[['12', '1401'], ['13', 'Dubna'], ['14', 'E115']],
           labels=[[1, 0], [1, 0], [1, 0]],
           names=['ID', 'Lab', 'Exp.'])

In [75]:
pd.Series(data=ser_it, index=index)

ID    Lab    Exp.
1401  Dubna  E115   NaN
12    13     14     NaN
dtype: float64

In [76]:
pd.DataFrame(data=ser_it, index=index)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Implantation time
ID,Lab,Exp.,Unnamed: 3_level_1
1401,Dubna,E115,0.0
12,13,14,0.0


## Generic data frame implementation

Filling arrays

In [110]:
ind_names = ["ID", "Lab", "Chain type"]
#inds = [ [c1.ID], ["Dubna"], ["E115"] ]
index = np.empty((len(setDC.Chains), len(ind_names)), dtype="<U15")
col_names = ["Beam Energy (MeV)", "Implantation Energy (MeV)", "Implantation time (s)", "Pixel"]
singles = np.empty((len(setDC.Chains), len(col_names)))
rest = np.full((len(setDC.Chains), 3*setDC.max_chain), np.nan)
E_names = ["E115", "E113", "Rg", "Mt", "Bh", "Db", "Lr"]
rest_str = ["Energy {} (MeV)", "$\sigma_E$ {} (keV)", "Life time {} (s)"]
rest_names = []
for i in range(3):
    for j in range(setDC.max_chain):
        rest_names.append(rest_str[i].format(E_names[j]))
print("Rest_names=", rest_names)

for i, c in enumerate(setDC.Chains):
    idi = c.ID[0:2]
    if idi == '10' or idi == '11':
        lab = "Dubna"
        if idi == "10":
            chain_type = 'E115 long'
        else:
            chain_type = 'E115 short'
    if idi == '13' or idi == '14':
        lab = "GSI"
        if idi == "13":
            chain_type = 'E115 long'
        else:
            chain_type = 'E115 short'
    if idi == '16' or idi == '17':
        lab = "Berkeley"
        if idi == "16":
            chain_type = 'E115 long'
        else:
            chain_type = 'E115 short'
    index[i] = np.asarray([c.ID, lab, chain_type])
    singles[i] = np.asarray([c.BeamEnergy, c.ImplantEnergy, c.ImplantTime, c.Pixel])
    rest[i, 0:len(c.EAlpha)] = c.EAlpha
    rest[i, setDC.max_chain:setDC.max_chain+len(c.EAlpha)] = c.EAlphaSigma
    rest[i, 2*setDC.max_chain:2*setDC.max_chain+len(c.EAlpha)] = c.TAlpha
        
index = list(zip(*index.T))
print(index)
index = pd.MultiIndex.from_tuples(index, names=ind_names)
index

Rest_names= ['Energy E115 (MeV)', 'Energy E113 (MeV)', 'Energy Rg (MeV)', 'Energy Mt (MeV)', 'Energy Bh (MeV)', 'Energy Db (MeV)', 'Energy Lr (MeV)', '$\\sigma_E$ E115 (keV)', '$\\sigma_E$ E113 (keV)', '$\\sigma_E$ Rg (keV)', '$\\sigma_E$ Mt (keV)', '$\\sigma_E$ Bh (keV)', '$\\sigma_E$ Db (keV)', '$\\sigma_E$ Lr (keV)', 'Life time E115 (s)', 'Life time E113 (s)', 'Life time Rg (s)', 'Life time Mt (s)', 'Life time Bh (s)', 'Life time Db (s)', 'Life time Lr (s)']
[('1301', 'GSI', 'E115 long'), ('1302', 'GSI', 'E115 long'), ('1303', 'GSI', 'E115 long'), ('1304', 'GSI', 'E115 long'), ('1305', 'GSI', 'E115 long'), ('1306', 'GSI', 'E115 long'), ('1307', 'GSI', 'E115 long'), ('1308', 'GSI', 'E115 long'), ('1309', 'GSI', 'E115 long'), ('1310', 'GSI', 'E115 long'), ('1311', 'GSI', 'E115 long'), ('1312', 'GSI', 'E115 long'), ('1313', 'GSI', 'E115 long'), ('1314', 'GSI', 'E115 long'), ('1315', 'GSI', 'E115 long'), ('1316', 'GSI', 'E115 long'), ('1317', 'GSI', 'E115 long'), ('1318', 'GSI', 'E115 l

MultiIndex(levels=[['1101', '1102', '1103', '1104', '1201', '1202', '1301', '1302', '1303', '1304', '1305', '1306', '1307', '1308', '1309', '1310', '1311', '1312', '1313', '1314', '1315', '1316', '1317', '1318', '1319', '1320', '1321', '1322', '1401', '1402', '1403', '1404', '1405', '1406', '1407', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1617', '1618', '1619', '1620', '1621', '1622', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1633', '1634', '1635', '1636', '1637', '1638', '1639', '1640', '1641', '1642', '1643', '1701', '1702', '1703'], ['Berkeley', 'Dubna', 'GSI'], ['E115 long', 'E115 short']],
           labels=[[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 0, 1, 2, 3, 4, 5, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 6

In [123]:
print(col_names + rest_names)
data = np.hstack((singles, rest))

['Beam Energy (MeV)', 'Implantation Energy (MeV)', 'Implantation time (s)', 'Pixel', 'Energy E115 (MeV)', 'Energy E113 (MeV)', 'Energy Rg (MeV)', 'Energy Mt (MeV)', 'Energy Bh (MeV)', 'Energy Db (MeV)', 'Energy Lr (MeV)', '$\\sigma_E$ E115 (keV)', '$\\sigma_E$ E113 (keV)', '$\\sigma_E$ Rg (keV)', '$\\sigma_E$ Mt (keV)', '$\\sigma_E$ Bh (keV)', '$\\sigma_E$ Db (keV)', '$\\sigma_E$ Lr (keV)', 'Life time E115 (s)', 'Life time E113 (s)', 'Life time Rg (s)', 'Life time Mt (s)', 'Life time Bh (s)', 'Life time Db (s)', 'Life time Lr (s)']


In [124]:
data = dict(zip(col_names+rest_names, data.T))
data

{'$\\sigma_E$ Bh (keV)': array([  10.    ,   10.    ,   10.    ,   40.    ,   10.    ,   10.    ,
          50.    ,    0.    ,    0.    ,   50.    ,   10.    ,   10.    ,
          10.    ,   40.    ,   10.    ,   10.    ,    0.    ,   40.    ,
          40.    ,   50.    ,    0.    ,   10.    ,       nan,       nan,
              nan,       nan,       nan,       nan,       nan,       nan,
              nan,       nan,       nan,   68.7898,    0.    ,   20.    ,
          50.    ,   20.    ,   20.    ,    0.    ,   20.    ,   20.    ,
         100.    ,   50.    ,   20.    ,   20.    ,   20.    ,    0.    ,
          20.    ,   20.    ,  200.    ,   20.    ,   20.    ,   20.    ,
          20.    ,   20.    ,   50.    ,   20.    ,    0.    ,    0.    ,
           0.    ,    0.    ,   20.    ,   20.    ,   20.    ,   50.    ,
          50.    ,   20.    ,   50.    ,   50.    ,   50.    ,   20.    ,
          50.    ,   20.    ,   50.    ,   20.    ,   50.    ,    0.    ,
              

In [126]:
pd.DataFrame(data=data, index=index, columns=col_names+rest_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Beam Energy (MeV),Implantation Energy (MeV),Implantation time (s),Pixel,Energy E115 (MeV),Energy E113 (MeV),Energy Rg (MeV),Energy Mt (MeV),Energy Bh (MeV),Energy Db (MeV),...,$\sigma_E$ Bh (keV),$\sigma_E$ Db (keV),$\sigma_E$ Lr (keV),Life time E115 (s),Life time E113 (s),Life time Rg (s),Life time Mt (s),Life time Bh (s),Life time Db (s),Life time Lr (s)
ID,Lab,Chain type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1301,GSI,E115 long,242.1,13.40,0.0,465.0,10.290,9.240,9.77,0.825,9.06,197.0,...,10.0,0.0,,0.3660,0.9350,11.1000,0.5150,15.300,17.40000,
1302,GSI,E115 long,242.1,13.90,0.0,748.0,10.480,10.000,0.00,0.000,9.07,232.0,...,10.0,0.0,,0.1770,1.1800,0.0000,0.0000,8.230,25.50000,
1303,GSI,E115 long,242.1,16.30,0.0,557.0,0.000,9.990,9.77,9.600,9.04,233.0,...,10.0,0.0,,0.0000,0.2400,18.0000,0.6000,2.700,39.70000,
1304,GSI,E115 long,242.1,16.10,0.0,716.0,10.370,9.990,9.50,9.650,9.05,236.0,...,40.0,0.0,,0.2120,1.5000,26.4000,8.9500,17.800,26.60000,
1305,GSI,E115 long,242.1,16.40,0.0,331.0,10.440,10.000,9.76,9.650,8.55,230.0,...,10.0,0.0,,0.3630,0.2420,11.3000,0.5390,29.800,48.90000,
1306,GSI,E115 long,242.1,15.40,0.0,621.0,3.000,9.850,0.00,0.000,9.06,189.0,...,10.0,0.0,,0.6530,1.0100,0.0000,0.0000,52.600,79.70000,
1307,GSI,E115 long,242.1,14.10,0.0,368.0,10.480,9.940,1.38,9.600,9.06,182.0,...,50.0,0.0,,0.0663,3.3000,8.2100,0.0550,8.950,1.98000,
1308,GSI,E115 long,242.1,12.50,0.0,200.0,0.609,9.960,9.75,0.000,0.00,212.0,...,0.0,0.0,,0.5450,2.4000,19.1000,0.0000,0.000,15.50000,
1309,GSI,E115 long,245.0,15.90,0.0,38.0,10.380,9.960,0.00,0.000,0.00,153.0,...,0.0,0.0,,0.1520,2.0000,0.0000,0.0000,0.000,0.90500,
1310,GSI,E115 long,245.0,14.50,0.0,1007.0,1.660,9.840,1.28,9.680,8.97,189.0,...,50.0,0.0,,0.0702,3.0800,1.5600,0.3710,24.500,95.60000,
