# Simulation of lifetimes for Schmidt distributions

In [1]:
import numpy as np
import os, sys, yaml, re
from IPython.display import display, Markdown, Latex #Can write latex too!!!!

In [2]:
class DecayChain(yaml.YAMLObject):
    
    yaml_tag = u'!DecayChain'
    
    def __init__(self, idd="", be="", pi="", ie="", it="", ea=[], eas=[], ta=[]):
        self.ID = idd
        self.BeamEnergy = be
        self.Pixel = pi
        self.ImplantEnergy = ie
        self.ImplantTime = it
        self.EAlpha, self.EAlphaSigma, self.TAlpha = ea, eas, ta

class SetDecayChains:
    
    def __init__(self, path='', ids=[]):
        s_files = " ".join(os.listdir(path))
        files = []
        for s in ids:
            files += (sorted(re.findall(string=s_files, pattern="Chain"+s+"\d+.yml")))
        print("Reading the following files:", files)
        self.Chains = []
        for f in files:
            f_in = open(path+f, 'r')
            self.Chains.append(yaml.load(f_in))
            f_in.close()

In [3]:
s_path = "data/"
s_id = ["14", "11", "17"]

setDC = SetDecayChains(s_path, s_id)
max_steps = 0
for chain in setDC.Chains:
    if len(chain.TAlpha) > max_steps:
        max_steps = len(chain.TAlpha)
print("Max steps=", max_steps)
times = np.zeros((len(setDC.Chains), max_steps))
for row, chain in enumerate(setDC.Chains):
    steps = len(chain.TAlpha)
    for i in range(max_steps):
        if i < steps:
            times[row][i] = chain.TAlpha[i]
        else:
            times[row][i] = np.nan       
times

Reading the following files: ['Chain1401.yml', 'Chain1402.yml', 'Chain1403.yml', 'Chain1404.yml', 'Chain1405.yml', 'Chain1406.yml', 'Chain1407.yml', 'Chain1101.yml', 'Chain1102.yml', 'Chain1103.yml', 'Chain1104.yml', 'Chain1701.yml', 'Chain1702.yml', 'Chain1703.yml']
Max steps= 3


array([[  2.27000000e-01,   3.78000000e-01,              nan],
       [  6.45000000e-02,   3.66000000e-01,              nan],
       [  2.61000000e-01,   1.15000000e+00,   3.43000000e-01],
       [  1.46000000e+00,   2.62000000e-02,   4.32000000e-01],
       [  3.45000000e-01,   3.69000000e-01,   1.44000000e+01],
       [  2.10000000e-01,   1.05000000e+00,   8.27000000e+00],
       [  8.15000000e-01,   2.33000000e+00,   2.89000000e+00],
       [  2.56200000e-01,   1.40270000e+00,   1.97750000e+00],
       [  6.61000000e-02,   1.55000000e+00,   2.36380000e+00],
       [  2.35070000e+00,   2.25822000e+01,   6.01855000e+01],
       [  5.36000000e-02,   4.67100000e-01,   9.08000000e-02],
       [  2.14000000e-01,   1.54000000e+00,   7.57000000e+00],
       [  5.91000000e-02,   8.24000000e-01,              nan],
       [  4.55000000e-02,   1.42000000e-02,              nan]])

## Sorting out chains of different lengths into arrays

Need to simulate lifetime values for the number of chains we have in the set for enough number of times. To simplify, maybe it would be nice to reshuffle the set into two subsets depending on the length.

In [4]:
N_j = np.count_nonzero(~np.isnan(times), axis=1)
N_j

array([2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2])

In [5]:
ind1 = np.argwhere(N_j == max_steps)[:,0]
ind1

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [6]:
ind2 = np.argwhere(N_j == max_steps-1)[:,0]
ind2

array([ 0,  1, 12, 13])

In [7]:
times1 = times[ind1]
times1

array([[  2.61000000e-01,   1.15000000e+00,   3.43000000e-01],
       [  1.46000000e+00,   2.62000000e-02,   4.32000000e-01],
       [  3.45000000e-01,   3.69000000e-01,   1.44000000e+01],
       [  2.10000000e-01,   1.05000000e+00,   8.27000000e+00],
       [  8.15000000e-01,   2.33000000e+00,   2.89000000e+00],
       [  2.56200000e-01,   1.40270000e+00,   1.97750000e+00],
       [  6.61000000e-02,   1.55000000e+00,   2.36380000e+00],
       [  2.35070000e+00,   2.25822000e+01,   6.01855000e+01],
       [  5.36000000e-02,   4.67100000e-01,   9.08000000e-02],
       [  2.14000000e-01,   1.54000000e+00,   7.57000000e+00]])

In [8]:
times2 = times[ind2][:, :-1]
times2

array([[ 0.227 ,  0.378 ],
       [ 0.0645,  0.366 ],
       [ 0.0591,  0.824 ],
       [ 0.0455,  0.0142]])

Well, these additional steps may only confuse the reader!!! For the geometrical mean with nans I can implement something in cython. 

## Sample from exponential distribution

In [9]:
nbr_sets = 10
shape = (nbr_sets, *np.shape(times))
sim = np.random.exponential(scale=1, size=shape)
np.shape(sim)

(10, 14, 3)

### Masking to insert the nans

In [10]:
m2d = np.ma.masked_array(np.isnan(times))
m2d

masked_array(data =
 [[False False  True]
 [False False  True]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False False]
 [False False  True]
 [False False  True]],
             mask =
 False,
       fill_value = True)

In [11]:
mask3d = np.ma.dstack([m2d]*nbr_sets)
np.shape(mask3d)

(14, 3, 10)

### Sooooo much easier

Short chains data set duplicated

In [12]:
def g_nan_mean(data):
    ret = np.empty(np.shape(data)[0])
    for i in range(np.shape(data)[0]):
        temp = 1
        steps = 0
        for j in range(np.shape(data)[1]):
            if ~np.isnan(data[i,j]):
                temp *= data[i,j]
            else:
                break
            steps += 1
        ret[i] = temp**(1./steps)
    return ret

In [13]:
nbr_sets = 2
shape = (nbr_sets, *np.shape(times))
sim = np.empty(shape)
for i in range(nbr_sets):
    sim[i] = times
sim[0, :, 0], sim

(array([ 0.227 ,  0.0645,  0.261 ,  1.46  ,  0.345 ,  0.21  ,  0.815 ,
         0.2562,  0.0661,  2.3507,  0.0536,  0.214 ,  0.0591,  0.0455]),
 array([[[  2.27000000e-01,   3.78000000e-01,              nan],
         [  6.45000000e-02,   3.66000000e-01,              nan],
         [  2.61000000e-01,   1.15000000e+00,   3.43000000e-01],
         [  1.46000000e+00,   2.62000000e-02,   4.32000000e-01],
         [  3.45000000e-01,   3.69000000e-01,   1.44000000e+01],
         [  2.10000000e-01,   1.05000000e+00,   8.27000000e+00],
         [  8.15000000e-01,   2.33000000e+00,   2.89000000e+00],
         [  2.56200000e-01,   1.40270000e+00,   1.97750000e+00],
         [  6.61000000e-02,   1.55000000e+00,   2.36380000e+00],
         [  2.35070000e+00,   2.25822000e+01,   6.01855000e+01],
         [  5.36000000e-02,   4.67100000e-01,   9.08000000e-02],
         [  2.14000000e-01,   1.54000000e+00,   7.57000000e+00],
         [  5.91000000e-02,   8.24000000e-01,              nan],
         [ 

Simulated chains

In [14]:
nbr_sets = 30000
shape = (nbr_sets, *np.shape(times))
sim = np.random.exponential(scale=1, size=shape)
for i in range(nbr_sets):
    sim[i][np.isnan(times)] = np.nan
sim

array([[[ 1.47295106,  0.13826594,         nan],
        [ 0.38282398,  1.06258346,         nan],
        [ 2.53596589,  1.740428  ,  1.91222996],
        ..., 
        [ 1.35843455,  1.1423758 ,  0.27904258],
        [ 0.7360603 ,  0.10834985,         nan],
        [ 0.8911183 ,  0.47844164,         nan]],

       [[ 0.02481661,  0.7246067 ,         nan],
        [ 1.06547893,  1.23697359,         nan],
        [ 0.34521624,  2.33060285,  0.47324651],
        ..., 
        [ 0.13274914,  0.20452968,  0.17858752],
        [ 4.33654876,  1.05441854,         nan],
        [ 0.92337613,  4.15171197,         nan]],

       [[ 0.60823939,  1.10775691,         nan],
        [ 0.63352722,  0.04008763,         nan],
        [ 0.82440384,  0.02573784,  0.89047258],
        ..., 
        [ 1.32088057,  0.24126684,  1.13691169],
        [ 0.39146004,  0.05700766,         nan],
        [ 0.01565506,  0.85139508,         nan]],

       ..., 
       [[ 0.01353299,  0.58340357,         nan],
        

In [15]:
theta = np.log(sim)
theta_var = np.zeros(np.shape(theta))
gen_Schmidt = np.zeros(np.shape(theta)[:-1])
for i in range(nbr_sets):
    theta_var[i] = np.square(theta[i] - np.nanmean(theta[i], axis=0))
    gen_Schmidt[i] = g_nan_mean(theta_var[i])

print(gen_Schmidt[0])
#theta_var
gen_Schmidt = np.sqrt(np.mean(gen_Schmidt, axis=1))
print(gen_Schmidt)

[  3.93036514e-01   7.33020671e-01   9.76850955e-01   1.52887052e-01
   6.70331168e-03   6.74732804e-01   8.78257340e+00   3.41375667e-01
   4.79490058e-01   2.56202980e-01   1.35685774e+00   2.87815784e-01
   6.37565314e-01   2.16967121e-02]
[ 1.03857075  0.72533713  0.9966775  ...,  0.8016078   0.89532149
  0.74665797]


### Expected generalised Schmidt value

In [16]:
E_Schmidt = np.mean(gen_Schmidt)
E_Schmidt

0.86179162222916006

Percentiles

In [17]:
np.percentile(gen_Schmidt, q=5), np.percentile(gen_Schmidt, q=95)

(0.6265521560630567, 1.1331438681832808)