# Measure delta Lambda

Lambda fluctuates, and it fluctuates more as two galaxies get closer.
It is hard to separate 'normal' stage and 'merging' stage of lambda.
Measuring L at normal stage may require some fitting algorithm. 

In [1]:
def load_pickle(fname):
    with open(fname, 'rb') as f:
        return pickle.load(f)

## time
def aexp2zred(aexp):
    return [1.0/a - 1.0 for a in aexp]

def zred2aexp(zred):
    return [1.0/(1.0 + z) for z in zred]

def lbt2aexp(lts):
    import astropy.units as u
    from astropy.cosmology import WMAP7, z_at_value
    zreds = [z_at_value(WMAP7.lookback_time, ll * u.Gyr) for ll in lts]
    return [1.0/(1+z) for z in zreds]

def density_map(x, y, ax, sort=True):
    from scipy.stats import gaussian_kde
    xy = np.vstack([x,y])
    z = gaussian_kde(xy)(xy) 
    z /= max(z)

    idx = z.argsort()    
    xx, yy = x[idx], y[idx]
    z = z[idx]
    
    im = ax.scatter(xx, yy, c=z, s=50, edgecolor='')
    return im


def sigma_clip_ind(c, high, low):
    """
        returns indices of sigma-clipping-safe elements.
    """
    import numpy as np
    ind = (np.mean(c) - np.std(c)*low < c) * (c < np.mean(c) + np.std(c)*high)
    return ind


def mask_outlier(y, low=1.5, high=1.5):
    """
        maks outlier assuming monotonic trend.
    """
    x = np.arange(len(y))

    # linear fitting .. more desirably, a very strong smoothing scheme that can reconstrcut mild curve.
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)

    # extract linear fit
    yy = y - (slope * x + intercept)

    # sigma clipped value = mean of the rest 
    i_good = sigma_clip_ind(yy, low, high)
    yy[~i_good] = np.mean(yy[i_good])

    # add linear fit again
    return yy + (slope * x + intercept)


def smooth(x, beta=5, window_len=20, monotonic=False):
    """ 
    kaiser window smoothing 
    beta = 5 : Similar to a Hamming
    """
    
    if monotonic:
        """
        if there is an overall slope, smoothing may result in offset.
        compensate for that. 
        """
        slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y=np.arange(len(x)))
        xx = np.arange(len(x)) * slope + intercept
        x = x - xx
    
    # extending the data at beginning and at the end
    # to apply the window at the borders
    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
    w = np.kaiser(window_len,beta)
    y = np.convolve(w/w.sum(),s,mode='valid')
    if monotonic: 
         return y[int(window_len)/2:len(y)-int(window_len/2) + 1] + xx#[x[window_len-1:0:-1],x,x[-1:-window_len:-1]]
    else:
        return y[int(window_len)/2:len(y)-int(window_len/2) + 1]
        #return y[5:len(y)-5]

In [2]:
class MainPrg():
    import tree.ctutils as ctu
    import numpy as np
    
    def __init__(self, treedata, final_gal, nout_ini=None, nout_fi=None):

        temp_tree = ctu.extract_main_tree(treedata, final_gal)
        if nout_ini == None:
            nout_ini = min(temp_tree['nout'])
        if nout_fi == None:
            nout_fi = max(temp_tree['nout'])            
            
        self.nouts = np.arange(nout_fi, nout_ini -1, -1)
        self.idxs = temp_tree['id'] # nout_ini, nout_fi consideration needed.
        self.ids = temp_tree['Orig_halo_id']
        self.data = None
    
    def set_data(self, cat, nout):
        """
        compile data from catalogs.
        """
        if nout in self.nouts:
            # Declare self.data first if there isn't any.
            if self.data == None:
                self.data = np.zeros(len(self.nouts), dtype=cat.dtype)
            inow = self.nouts == nout
            a = np.where(cat['id'] == self.ids[inow])[0]
            if len(a) > 0:
                self.data[inow] = cat[a]        
            else:
                pass
                #print(self.ids[inow],cat['id'])
        else:
            pass
            #print("No {} in the catalog".format(nout))
            
    def clip_non_detection(self):
        # end of galaxy tree = last non-zero position.
        i_first_nout = max(np.where(self.data['id'] > 0)[0])
        # then, only [0-i_first_nout] are valid.
        # earlier then 187 - 91-th are zero. so get rid of them.
        self.data = self.data[:i_first_nout].copy()
        self.nouts = self.nouts[:i_first_nout].copy()
        self.ids = self.ids[:i_first_nout].copy()
        self.idxs = self.idxs[:i_first_nout].copy()
        
    def fill_missing_data(self):
        assert (self.ids[-1] != 0)
        def locate_wrong_arr(arrays, len_cut = 2):
            ind_bad = []
            for i, arr in enumerate(arrays):
                if isinstance(arr, int): 
                    ind_bad.append(i)
                elif len(arr) < len_cut:
                    ind_bad.append(i)
            return ind_bad
        
        # loop over all fields except id, index, and non-physical entries.
        i_bad = np.where(self.data['id'] == 0)[0]
        for field in self.data.dtype.names:
            if field in ["index", "id"]:
                continue
            arr = self.data[field] # it's a view.
        #    i_bad = locate_wrong_arr(arr)

            for i_b in i_bad:
                # neighbouring array might also be empty. Search for closest valid array.
                # left point
                i_l = i_b - 1
                while(i_l in i_bad):
                    i_l = i_l - 1
                # right point
                i_r = i_b + 1
                while(i_r in i_bad):
                    i_r = i_r + 1

                arr[i_b] = (arr[i_b -1] + arr[i_b +1])/2.
    

In [3]:
import numpy as np
import scipy.stats

import matplotlib.pyplot as plt
# Read a single galaxy evolution catalog.
import pickle
#wdir = '/home/hoseung/Work/data/05427/'
wdir = '/home/hoseung/Work/data/28928/'
cdir = 'catalog_GM/'

#idxs = [122668, 122669, 122683, 122695, 122747, 122750, 122835, 123087] 05427

In [4]:
# Serialize catalogs. -> Only main galaxies

nout_fi = 187

# main galaxy list
import tree.ctutils as ctu
alltrees = ctu.load_tree(wdir, is_gal=True)
ad = alltrees.data
tn = ad[ad['nout'] == nout_fi]

cat = pickle.load(open(wdir + cdir + 'catalog' + str(nout_fi) + '.pickle', 'rb'))
    
idx_all = [tn['id'][tn['Orig_halo_id'] == id_final][0] for id_final in cat['id']]
mpgs = [MainPrg(ad, idx) for idx in idx_all]
#print(mpgs[0].nouts)
#print(mpgs[0].ids)
for nout in range(60,188):
    cat = pickle.load(open(wdir + cdir + 'catalog' + str(nout) + '.pickle', 'rb'))
    for mpg in mpgs:
        mpg.set_data(cat, nout)
        

Loaded an extended tree




중간에 비는 것은 아마도 phantom이라 'Orig_halo_id'는 없는 경우일 듯. 
그렇기 때문에..! cat을 만들때 idx를 넣어야함!  : 넣었음. (근데 final_ID는 뺐나...?)

### What mass cut was applied? (optional)

In [292]:
# from Complete-main-tree criterion, nout = 33 ~ 187.
print("gal.nouts",min(gal.nouts), max(gal.nouts))

# But if galaxy goes below the mass cut at some point, 
# galaxy property is no longer measured.
plt.plot(gal.data['mstar'])
plt.show()
#print("gal.data['mstar']", gal.data['mstar'])


gal.nouts 33 187


constant mass cut... 5e9. Need to re-run it. 

In [293]:
fig, ax = plt.subplots(1)
for gal in mpgs:
    ax.plot(np.log10(gal.data['mstar'][gal.data['mstar'] > 0]))
    
plt.show()

In [5]:
def smoothed_reff(cat, nout_merger):
    """
    returns "representative" lambda at each nout by assuming monotonic change in Reff. 
    During merger, Reff can fluctuate, and if has no physical meaning to infer Labda at Reff during merger stage. 
    So Reff' is derived by linear interpolating Reffs before and after the merger. 
    
    cat is one galaxy catalog over time.
    """
    import utils.match as mtc
    i_merger = np.where(cat['nout'] == nout_merger)[0]
    ind_lower = 20
    ind_upper = 20
    
    reffs = cat['rgal']
    # left and right values chosen by sigma-clipping
    r_lefts, b, c = scipy.stats.sigmaclip(reffs[max([0,i_merger-ind_lower]):i_merger], sig_lower, sig_upper)
    print(r_lefts)
    r_left = r_lefts[-1]
    i_left = np.where(reffs == r_left)[0]
    

    r_rights, b,c = scipy.stats.sigmaclip(reffs[i_merger:min([i_merger+ind_upper,len(reffs)])], sig_lower, sig_upper)
    r_right = r_rights[0]
    i_right = np.where(reffs == r_right)[0]

    r_prime = reffs
    print(i_left, i_right)
    print(np.linspace(r_left, r_right, i_right - i_left + 1))
    r_prime[i_left : i_right + 1] = np.linspace(r_left, r_right, i_right - i_left + 1)
    return r_prime    

#### Gather same galaxy pngs together (Optional)

In [252]:
import os
import shutil

new_gal_dir = wdir + '/z1/gal_' + str(gal.idxs[0])
os.mkdir(new_gal_dir)

for nout, id in zip(gal.nouts, gal.ids):
    fname = wdir + "/z1/galaxy_plot" + str(nout) + "/" + str(nout) + "_" + str(id) + '.png'
    if os.path.isfile(fname):
        shutil.move(fname, new_gal_dir + '/'+ str(nout) + "_" + str(id) + '.png')

##### fix Reff of a galaxy

In [6]:
#idx = mgl['idx'][2]
gal = mpgs[3]
gal.clip_non_detection()
gal.fill_missing_data()
print("Galaxy ID at the final nout {}, {}".format(gal.idxs[0], gal.ids[0]))

nout_merger = 118

Galaxy ID at the final nout 83196, 84


I have reularized galaxy evolution data.

I want to measure rotation parameter before and after a merger.
I take 20 lambda values before and after the merger, sigma clip outliers, and take median value.
So dLambda = media(Lambda_after) - median(Lambda_before).

However, Lambda measurement at 1Reff requires robust Reff measurement, which is very tough during merger events.
So I smooth Reff evolution history to guess more reasonable Reff values at all points. Following is the procedure.

In [27]:
def fixed_ind_Lr(gal):
    nnouts = len(gal.nouts)
    ind_reff_fix = np.zeros(nnouts, dtype='i4')

    smooth_r = smooth(mask_outlier(gal.data['rgal'], 1.5, 1.5), 50, monotonic=False)

    # fixed Reff array
    for i in range(nnouts):
        # 1Reff = 10 points
        reff_real = smooth_r[i]
        reff = gal.data['rgal'][i]
        try:
            ind_reff_fix[i] = np.round(reff_real/reff * 5) -1
        except:
            pass
    return ind_reff_fix

In [374]:
fig, axs = plt.subplots(4,4)
axs = axs.flatten()
for i, field in enumerate(gal.data.dtype.names):
    if field == "lambda_arr":
        continue
    axs[i].plot(gal.data[field])
    axs[i].set_ylabel(field)
    
plt.tight_layout()
plt.show()

ind_reff_fix points to the Lambda_arr element closest to the fixed Reff at every nout.

In [8]:
# fixed Lambda array based on Reff_fix.
ind_reff_fix = fixed_ind_Lr(gal)
lam_fix = np.zeros(sum(ind_reff_fix > 0))
lam = np.zeros(sum(ind_reff_fix > 0))
for i, ind in enumerate(ind_reff_fix[ind_reff_fix > 0]):
    lam_fix[i] = gal.data['lambda_arr'][i][ind]
    lam[i] = gal.data['lambda_arr'][i][4]
    

왜 lambda_r 이랑 lambda_arr[4]랑 다르지? -> 0.5Reff에서 측정했었음..!
그림에는 0.5Reff이지만 나머지는 1.0Reff로 쓸래.. 
나중에 그림도 1.0으로 바꾸지 뭐.. (lambda_single.py로)

In [9]:
fig, ax = plt.subplots(1)
ax.plot(lam, 'b-')
ax.plot(lam_fix, 'g--')
plt.show()

In [11]:
y = gal.data['rgal']
fig, ax = plt.subplots(1)
ax.plot(y)
ax.plot(smooth_r, 'r--')
#ax.plot(smoothed_reff(gal.data['reff'], 102))
ax.plot(gal.data['lambda_r'] * 20)
ax.set_title("Rgal, Rgal_smoothed, and Lambda_r")
plt.show()

In [28]:
i_merger = 75
#i_merger = 187 - mgl[igal][2]

lam = lam_fix

ind_upper = 20
ind_lower = 20
sig_upper = 2.0
sig_lower = 2.0

x_al = range(max([0,i_merger-ind_lower]), i_merger)
x_ar = range(i_merger,min([i_merger+ind_upper,len(lam)]))

al, b1, c1 = scipy.stats.sigmaclip(lam[x_al], sig_lower, sig_upper)
ar, b2, c2 = scipy.stats.sigmaclip(lam[x_ar], sig_lower, sig_upper)

dl = np.median(ar) - np.median(al)

In [29]:
# i_merger가 정확해야하는데... 
# Tree에서 주는 merger는 final coalescence일 가능성이 높음. 
print(np.mean(al), np.mean(ar))
print(np.median(al), np.median(ar))


fig, ax = plt.subplots(1)
ax.plot(x_al, lam[x_al], 'r')
ax.plot(x_ar, lam[x_ar], 'b')
ax.axhline(np.mean(al), color='r')
ax.axhline(np.mean(ar), color='b')
#ax.plot(smoothed_reff(gal.data['reff'], 102))
ax.set_title("Rgal, Rgal_smoothed, and Lambda_r")
plt.show()


0.247116707298 0.241349076274
0.240646574733 0.245995951079


I can measure dL in this way, but am I following the right galaxy? is the tree right?

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(mr, dl)
ax.set_ylim([-1,+1])
ax.set_xlim([0, 10])

In [223]:
# load merger galaxy list (geneated by scripts/notebooks/halo/Merter_no_cat.ipynb)
with open(wdir + 'merger_list.txt', 'rb') as f:
    mgl = np.genfromtxt(f, dtype=[('idx','i8'),('mr','f8'),('nout','i4')])
print(mgl)

[(83116, 1.6080775310169404, 171) (83334, 9.723992317756643, 113)
 (83117, 5.628116671634298, 107) (83139, 1.8823297100888297, 124)
 (83345, 2.8883693284502954, 165) (83345, 1.0428572316559068, 152)
 (83184, 1.532682197682854, 105) (83155, 1.3627227787912803, 141)
 (83155, 1.4007294519675875, 138) (83178, 7.298628930814874, 150)
 (83208, 8.469499127479162, 113) (83242, 3.650532934609259, 136)
 (83196, 1.7698715554171962, 115)]


In [225]:
mgl[0][0]

83116

In [335]:
# Obsolete
# lambda_arr = [0] instead of an array if ill measured.
# filter them.

def locate_wrong_arr(arrays, len_cut = 2):
    ind_bad = []
    for i, arr in enumerate(arrays):
        if isinstance(arr, int): 
            ind_bad.append(i)
        elif len(arr) < len_cut:
            ind_bad.append(i)
    return ind_bad

arr = gal.data['lambda_arr'].copy()
i_bad = locate_wrong_arr(arr)

#print("len Lr_arr", len(arr))

for i_b in i_bad:
    # neighbouring array might also be empty. Search for closest valid array.
    # left point
    i_l = i_b - 1
    while(i_l in i_bad):
        i_l = i_l - 1
    # right point
    i_r = i_b + 1
    while(i_r in i_bad):
        i_r = i_r + 1

    arr[i_b] = arr[i_b -1] + arr[i_b +1]

#fig, ax = plt.subplots(1)      
#for arr in Lr_arr:
#    ax.plot(arr)

len Lr_arr 91
72
[0]
[ array([ 0.24895497,  0.44111995,  0.61401316,  0.74073474,  0.93864124,
        1.03958405,  1.06799237,  1.09722928,  1.07574542,  1.19460084,
        1.15060364,  1.26472134,  1.37878218,  1.39374947,  1.63227243])]
