# Batch process Equivalent Widths with MCMC

The goal of this notebook is to distill our analysis into a programmatic loop over many spectra and save the Equivalent Width (EW) and its uncertainty to a results table.  The table will be in the form of a pandas dataframe, which we'll then save as a csv file.

In [1]:
import numpy as np
import pandas as pd
import os
import glob
from astropy.io import fits
import emcee

In [2]:
import warnings

import pandas as pd
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [3]:
goldilocks_files = glob.glob('../data/HPF/Helium-transit-data/**/Goldilocks*.fits', recursive=True)

In [4]:
def get_goldilocks_dataframe(fn):
    """Return a pandas Dataframe given a Goldilocks FITS file name"""
    hdus = fits.open(fn)
    df_original = pd.DataFrame()
    for j in range(28):
        df = pd.DataFrame()
        for i in range(1, 10):
            name = hdus[i].name
            df[name] = hdus[i].data[j, :]
        df['order'] = j
        df_original = df_original.append(df, ignore_index=True)
    keep_mask = df_original[df_original.columns[0:6]] != 0.0
    df_original = df_original[keep_mask.all(axis=1)].reset_index(drop=True)
    
    return df_original

In [5]:
def normalize_spectrum(df):
    """Normalizes spectrum to set to one"""
    for order in df.order.unique():
        mask = df.order == order
        norm_constant = df['Sci Flux'][mask].median() #mean takes outliers into account
        df['Sci Flux'][mask] = df['Sci Flux'][mask]/norm_constant
        df['Sci Error'][mask] = df['Sci Error'][mask]/norm_constant
        
    return df

Eventually we will loop over index.

In [6]:
order = 16
n_walkers = 32
n_params = 5
n_steps = 5000
labels = ["m", "b", "A", "mu", "w"]

In [39]:
df_results = pd.DataFrame()

In [40]:
df_results

In [41]:
for index in range(125, 130):

    fn = goldilocks_files[index]
    print(index, fn[-49:])
    df = normalize_spectrum(get_goldilocks_dataframe(fn))
    
    
    sub_region = (df.order == order) & (df['Sci Wavl'] > 10343.5) & (df['Sci Wavl'] < 10348.5)
    wl = df['Sci Wavl'][sub_region].values
    flux = df['Sci Flux'][sub_region].values
    unc = df['Sci Error'][sub_region].values
    
    def generative_model(m, b, A, mu, logw, int_wl = 10330):
        """Generate the model given parameters"""
        continuum = m * (wl - int_wl) + b
        w = np.exp(logw)
        gaussian = A * np.exp(-0.5*(wl-mu)**2/w**2)
        return continuum - gaussian
    
    def log_likelihood(theta):
        m, b, A, mu, logw = theta
        model = generative_model(m, b, A, mu, logw, int_wl = 10345)
        residual = flux - model
        chi_squared = np.sum(residual** 2 / unc**2)
        return -0.5 * chi_squared
    
    m_guess, b_guess, A_guess, mu_guess, logw_guess = 0.01, 1, 0.1, 10345, np.log(0.4)
    theta_guess = np.array([m_guess, b_guess, A_guess, mu_guess, logw_guess])
    
    pos = theta_guess + 1e-4 * np.random.randn(n_walkers, n_params) #intial guess position
    
    sampler = emcee.EnsembleSampler(n_walkers, n_params, log_likelihood)
    sampler.run_mcmc(pos, n_steps, progress=True);
    
    flat_samples = sampler.get_chain(discard=1000, thin=15, flat=True)

    A_draws = flat_samples[:,2]
    b_draws = flat_samples[:,1]
    m_draws = flat_samples[:,0]
    mu_draws = flat_samples[:,3]
    w_draws = np.exp(flat_samples[:, 4])

    EW = ((2*np.pi)**.5)*(A_draws*w_draws)/(m_draws*(mu_draws-10345)+b_draws)
    EW

    ew_mean = np.mean(EW)
    ew_std = np.std(EW)
    print(ew_mean)
    print(ew_std)
    temp = {'ew':ew_mean, 'ew_unc':ew_std}
    df_results = df_results.append(temp, ignore_index=True)

125 Goldilocks_20200919T063924_v1.0_0024.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 606.36it/s]


0.11414585796944887
0.0054667194265022425
126 Goldilocks_20200919T065336_v1.0_0025.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 603.43it/s]


0.10599033944536414
0.004907293394754557
127 Goldilocks_20200807T084257_v1.0_0035.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 582.50it/s]


0.09422661845567808
0.007256561318373024
128 Goldilocks_20200807T085713_v1.0_0036.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 520.77it/s]


-0.7873144571863284
2100.5761003250195
129 Goldilocks_20200807T091125_v1.0_0037.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 547.79it/s]

0.09274009286538663
0.008556879171678456





In [42]:
df_results

Unnamed: 0,ew,ew_unc
0,0.114146,0.005467
1,0.10599,0.004907
2,0.094227,0.007257
3,-0.787314,2100.5761
4,0.09274,0.008557


Great! It works!  Let's save the results to a csv file.

In [44]:
df_results.to_csv('../data/preliminary_results.csv',index=False)