# Generalised Schmidt values of experimental data

In [1]:
import os, sys, yaml, re
from IPython.display import display, Markdown, Latex #Can write latex too!!!!

In [2]:
class DecayChain(yaml.YAMLObject):
    
    yaml_tag = u'!DecayChain'
    
    def __init__(self, idd="", be="", pi="", ie="", it="", ea=[], eas=[], ta=[]):
        self.ID = idd
        self.BeamEnergy = be
        self.Pixel = pi
        self.ImplantEnergy = ie
        self.ImplantTime = it
        self.EAlpha, self.EAlphaSigma, self.TAlpha = ea, eas, ta

    def __repr__(self):
        return yaml.dump(self)
    
    def __str__(self):
        return yaml.dump(self)
    
    def MDTable(self):
        #Data to insert
        rows = []
        rows.append(['**ID**', '$E_{lab}$ (MeV)', '$E_{rec}$ (MeV)']) #'$E_1$ (MeV)', '$E_2$ (MeV)']
        rows.append(['', '', '**pixel**']) #'**$\Delta t_2$ (s)**']
        rows.append([self.ID, self.BeamEnergy, self.ImplantEnergy])
        rows.append(['', '', self.Pixel])
        for i in range(len(self.EAlpha)):
            rows[0].append('$E_'+str(i)+'$ (MeV)')
            rows[1].append('$\Delta t_'+str(i)+'$ (s)')
            rows[2].append(self.EAlpha[i])
            rows[3].append(self.TAlpha[i])
        rows[2] = [str(i) for i in rows[2]]
        rows[3] = [str(i) for i in rows[3]]
        
        #Formatting strings where data is to be put in
        s_format = """| """
        s_hline = """|"""
        s_empty = """| """

        for i in range(len(rows[0])):
            s_format += """{"""+str(i)+"""} | """
            s_hline += """:---:| """
            s_empty += """ | """
            if i == len(rows[0])-1:
                s_format += """\n"""
                s_hline += """\n"""
                s_empty += """\n"""

        s_md = """"""
        s_md += s_empty+s_hline
        for r in rows: 
            s_md += s_format.format(*r)
        
        #print("string:\n", s_md)
        #print(Markdown(s_md))
        display(Markdown(s_md))
        return s_md

class SetDecayChains:
    
    def __init__(self, path='', ids=[]):
        s_files = " ".join(os.listdir(path))
        files = []
        for s in ids:
            files += (sorted(re.findall(string=s_files, pattern="Chain"+s+"\d+.yml")))
        print("Reading the following files:", files)
        self.Chains = []
        for f in files:
            f_in = open(path+f, 'r')
            self.Chains.append(yaml.load(f_in))
            f_in.close()
   

## Reading in short chains and print the tables

In [3]:
s_path = "E115_Chains/"
s_id = ["14", "11", "17"]

setDC = SetDecayChains(s_path, s_id)
for c in setDC.Chains:
    c.MDTable()

Reading the following files: ['Chain1401.yml', 'Chain1402.yml', 'Chain1403.yml', 'Chain1404.yml', 'Chain1405.yml', 'Chain1406.yml', 'Chain1407.yml', 'Chain1101.yml', 'Chain1102.yml', 'Chain1103.yml', 'Chain1104.yml', 'Chain1701.yml', 'Chain1702.yml', 'Chain1703.yml']


|  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | 
| 1401 | 245.0 | 12.3 | 10.51 | 242.0 | 
|  |  | 268 | 0.227 | 0.378 | 


|  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | 
| 1402 | 242.1 | 16.2 | 1.45 | 211.0 | 
|  |  | 425 | 0.0645 | 0.366 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1403 | 242.1 | 13.9 | 10.54 | 9.95 | 196.0 | 
|  |  | 681 | 0.261 | 1.15 | 0.343 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1404 | 242.1 | 14.5 | 10.34 | 9.89 | 218.0 | 
|  |  | 344 | 1.46 | 0.0262 | 0.432 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1405 | 242.1 | 13.8 | 10.49 | 9.97 | 135.0 | 
|  |  | 554 | 0.345 | 0.369 | 14.4 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1406 | 245.0 | 14.5 | 10.53 | 9.89 | 230.0 | 
|  |  | 205 | 0.21 | 1.05 | 8.27 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1407 | 245.0 | 11.9 | 0.541 | 3.12 | 230.0 | 
|  |  | 128 | 0.815 | 2.33 | 2.89 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1101 | 240.5 | 11.38 | 10.377 | 9.886 | 215.7 | 
|  |  | 3 | 0.2562 | 1.4027 | 1.9775 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1102 | 241.0 | 15.18 | 10.54 | 9.916 | 214.9 | 
|  |  | 6 | 0.0661 | 1.55 | 2.3638 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1103 | 241.0 | 9.04 | 10.373 | 9.579 | 141.1 | 
|  |  | 2 | 2.3507 | 22.5822 | 60.1855 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1104 | 241.0 | 13.35 | 10.292 | 10.178 | 182.2 | 
|  |  | 11 | 0.0536 | 0.4671 | 0.0908 | 


|  |  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | $E_2$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | $\Delta t_2$ (s) | 
| 1701 | 270.0 | 11.65 | 10.49 | 9.82 | 107.0 | 
|  |  | 0 | 0.214 | 1.54 | 7.57 | 


|  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | 
| 1702 | 270.0 | 11.18 | 10.49 | 187.0 | 
|  |  | 1 | 0.0591 | 0.824 | 


|  |  |  |  |  | 
|:---:| :---:| :---:| :---:| :---:| 
| **ID** | $E_{lab}$ (MeV) | $E_{rec}$ (MeV) | $E_0$ (MeV) | $E_1$ (MeV) | 
|  |  | **pixel** | $\Delta t_0$ (s) | $\Delta t_1$ (s) | 
| 1703 | 270.0 | 13.72 | 10.22 | 128.0 | 
|  |  | 0 | 0.0455 | 0.0142 | 


### Gather the lifetimes

In [42]:
import numpy as np

max_steps = 0
for chain in setDC.Chains:
    if len(chain.TAlpha) > max_steps:
        max_steps = len(chain.TAlpha)
print("Max steps=", max_steps)

Max steps= 3


In [43]:
times = np.zeros((len(setDC.Chains), max_steps))
for row, chain in enumerate(setDC.Chains):
    steps = len(chain.TAlpha)
    for i in range(max_steps):
        if i < steps:
            times[row][i] = chain.TAlpha[i]
        else:
            times[row][i] = np.nan       
times

array([[  2.27000000e-01,   3.78000000e-01,              nan],
       [  6.45000000e-02,   3.66000000e-01,              nan],
       [  2.61000000e-01,   1.15000000e+00,   3.43000000e-01],
       [  1.46000000e+00,   2.62000000e-02,   4.32000000e-01],
       [  3.45000000e-01,   3.69000000e-01,   1.44000000e+01],
       [  2.10000000e-01,   1.05000000e+00,   8.27000000e+00],
       [  8.15000000e-01,   2.33000000e+00,   2.89000000e+00],
       [  2.56200000e-01,   1.40270000e+00,   1.97750000e+00],
       [  6.61000000e-02,   1.55000000e+00,   2.36380000e+00],
       [  2.35070000e+00,   2.25822000e+01,   6.01855000e+01],
       [  5.36000000e-02,   4.67100000e-01,   9.08000000e-02],
       [  2.14000000e-01,   1.54000000e+00,   7.57000000e+00],
       [  5.91000000e-02,   8.24000000e-01,              nan],
       [  4.55000000e-02,   1.42000000e-02,              nan]])

In [44]:
thetas = np.log(times)
thetas

array([[-1.48280526, -0.97286108,         nan],
       [-2.74109006, -1.00512195,         nan],
       [-1.34323487,  0.13976194, -1.07002483],
       [ 0.37843644, -3.64199587, -0.83932969],
       [-1.06421086, -0.99695863,  2.66722821],
       [-1.56064775,  0.04879016,  2.11263451],
       [-0.20456717,  0.84586827,  1.0612565 ],
       [-1.36179689,  0.33839895,  0.68183342],
       [-2.71658653,  0.43825493,  0.86027049],
       [ 0.85471316,  3.11716199,  4.09743146],
       [-2.92620621, -0.76121191, -2.39909599],
       [-1.54177926,  0.43178242,  2.02419307],
       [-2.82852435, -0.19358475,         nan],
       [-3.09004295, -4.25451331,         nan]])

In [45]:
theta_mean = np.nanmean(thetas, axis=0)
theta_mean

array([-1.54488161, -0.46187349,  0.91963971])

In [46]:
np.mean(thetas[:,0])

-1.5448816126100511

In a sense the following should be the standard Schmidt test values. From Table 1 in <https://link.springer.com/article/10.1007/s100500070129>.
For 14 number of chains (step 1 &  2):
* Expected value: 1.19
* Limits (5% quantiles) = [0.73, 1.77]

For 10 number of chains (step 3):
* Expected value: 1.16
* Limits (5% quantiles) = [0.65, 1.82]

In [47]:
np.nanstd(thetas, axis=0)

array([ 1.20445941,  1.75037006,  1.84235115])

This is congruent with Ulrika's calculations!!! 

## Starting generalised Schmidt method

In [59]:
theta_var = np.square(thetas - theta_mean)
theta_var

array([[  3.85347337e-03,   2.61108321e-01,              nan],
       [  1.43091464e+00,   2.95118885e-01,              nan],
       [  4.06614081e-02,   3.61965193e-01,   3.95876501e+00],
       [  3.69915232e+00,   1.01131783e+01,   3.09397337e+00],
       [  2.31044371e-01,   2.86316113e-01,   3.05406554e+00],
       [  2.48571033e-04,   2.60777367e-01,   1.42323658e+00],
       [  1.79644282e+00,   1.71018850e+00,   2.00553146e-02],
       [  3.35200158e-02,   6.40435978e-01,   5.65518333e-02],
       [  1.37289242e+00,   8.10231173e-01,   3.52470437e-03],
       [  5.75805505e+00,   1.28094949e+01,   1.00983604e+01],
       [  1.90805765e+00,   8.96034910e-02,   1.10140067e+01],
       [  9.62456714e-06,   7.98620878e-01,   1.22003811e+00],
       [  1.64773869e+00,   7.19788481e-02,              nan],
       [  2.38752357e+00,   1.43841168e+01,              nan]])

In [49]:
(thetas[0][1] - theta_mean[1])**2

0.26110832133921458

Without taking into account shorter chains

In [51]:
from scipy.stats.mstats import gmean
theta_gmean = gmean(theta_var, axis=1)
np.sqrt(np.nanmean(theta_gmean))

1.2989631843809233

In [54]:
~np.isnan(theta_var)

array([[ True,  True, False],
       [ True,  True, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True, False],
       [ True,  True, False]], dtype=bool)

In [55]:
gmean(theta_var[~np.isnan(theta_var)], axis=1)

IndexError: tuple index out of range

Shorter chains, i.e. `nan`, are included. 

In [57]:
theta_gmean = []
for ti in theta_var:
    temp = 1
    steps = 0
    for j in ti:
        if not np.isnan(j):
            temp *= j
        else:
            break
        steps += 1
    theta_gmean.append(temp**(1./steps))
theta_gmean        

[0.031720245304774332,
 0.64983838987885523,
 0.38767665445829846,
 4.8734384309017438,
 0.58677712894870893,
 0.045185508130979053,
 0.3949683413943787,
 0.10667816623489172,
 0.15768463742985117,
 9.0646887124215567,
 1.234868140528899,
 0.021087823320976206,
 0.34438689407536921,
 5.8602404359054745]

Generalised Schmidt value is the square root of the mean of the last

In [33]:
np.sqrt(np.mean(theta_gmean))

1.302723507254123

This value corresponds to Ulrika's in text (and table) in [D. Rudolph et al., EPJ Web of Conferences](https://www.epj-conferences.org/articles/epjconf/pdf/2016/12/epjconf_nn2016_01001.pdf). From simulations and the full set of short chains: 
* Expected value: 0.86
* Limits (5% quantile): [0.62, 1.15]

## Without nan, instead sorting into different arrays

This should be better concerning numpy operations and furthermore sampling.

In [11]:
N_j = np.count_nonzero(~np.isnan(times), axis=1)
N_j

array([2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2])

In [39]:
inds = [] #storing original order of the chains in the set
t_new = [] # list of the times sorted wrt to its length
for i in reversed(range(1, max_steps+1)):
    tmp = np.argwhere(N_j == i)[:,0]
    if len(tmp) > 0:
        inds.append(tmp)
        t_new.append(times[tmp][:, :i])
print(inds)
print(t_new)

[array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), array([ 0,  1, 12, 13])]
[array([[  2.61000000e-01,   1.15000000e+00,   3.43000000e-01],
       [  1.46000000e+00,   2.62000000e-02,   4.32000000e-01],
       [  3.45000000e-01,   3.69000000e-01,   1.44000000e+01],
       [  2.10000000e-01,   1.05000000e+00,   8.27000000e+00],
       [  8.15000000e-01,   2.33000000e+00,   2.89000000e+00],
       [  2.56200000e-01,   1.40270000e+00,   1.97750000e+00],
       [  6.61000000e-02,   1.55000000e+00,   2.36380000e+00],
       [  2.35070000e+00,   2.25822000e+01,   6.01855000e+01],
       [  5.36000000e-02,   4.67100000e-01,   9.08000000e-02],
       [  2.14000000e-01,   1.54000000e+00,   7.57000000e+00]]), array([[ 0.227 ,  0.378 ],
       [ 0.0645,  0.366 ],
       [ 0.0591,  0.824 ],
       [ 0.0455,  0.0142]])]


With the mean calculations this does not become straight forward, rather confusing for the reader. Better to keep the structure and use the existing functions and inserting the nans into simulated arrays.