# Batch process Equivalent Widths with MCMC

The goal of this notebook is to distill our analysis into a programmatic loop over many spectra and save the Equivalent Width (EW) and its uncertainty to a results table.  The table will be in the form of a pandas dataframe, which we'll then save as a csv file.

In [1]:
import numpy as np
import pandas as pd
import os
import glob
from astropy.io import fits
import emcee
from astropy.time import Time

In [2]:
import warnings

import pandas as pd
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [3]:
goldilocks_files = glob.glob('../data/HPF/Helium-transit-data/**/Goldilocks*.fits', recursive=True)

In [4]:
filename_zero = goldilocks_files[0]
hdus = fits.open(filename_zero)

In [5]:
def get_goldilocks_dataframe(fn):
    """Return a pandas Dataframe given a Goldilocks FITS file name"""
    hdus = fits.open(fn)
    df_original = pd.DataFrame()
    header = hdus[0].header
    for j in range(28):
        df = pd.DataFrame()
        for i in range(1, 10):
            name = hdus[i].name
            df[name] = hdus[i].data[j, :]
        df['order'] = j
        df_original = df_original.append(df, ignore_index=True)
    keep_mask = df_original[df_original.columns[0:6]] != 0.0
    df_original = df_original[keep_mask.all(axis=1)].reset_index(drop=True)
    
    return df_original, header

In [6]:
def normalize_spectrum(df):
    """Normalizes spectrum to set to one"""
    for order in df.order.unique():
        mask = df.order == order
        norm_constant = df['Sci Flux'][mask].median() #mean takes outliers into account
        df['Sci Flux'][mask] = df['Sci Flux'][mask]/norm_constant
        df['Sci Error'][mask] = df['Sci Error'][mask]/norm_constant
        
    return df

Eventually we will loop over index.

In [7]:
order = 4
n_walkers = 32
n_params = 5
n_steps = 5000
labels = ["m", "b", "A", "mu", "w"]

In [8]:
df_results = pd.DataFrame()

In [9]:
for index in range(115, 155):

    fn = goldilocks_files[index]
    print(index, fn[-49:])
    df_orig, header = get_goldilocks_dataframe(fn)
    date_raw = header['DATE-OBS']
    date = date_raw[0:10]
    time = date_raw[11:19]
    obj = header['OBJECT']
    df = normalize_spectrum(df_orig)
    qidx = header['QIDX']
    j_date = date_raw
    t = Time(j_date, format='isot', scale='utc')
    jd = t.jd
    
    wavelength1 = 8538
    wavelength2 = 8546
    calcium_line = 8542
    
    sub_region = (df.order == order) & (df['Sci Wavl'] > wavelength1) & (df['Sci Wavl'] < wavelength2)
    wl = df['Sci Wavl'][sub_region].values
    flux = df['Sci Flux'][sub_region].values
    unc = df['Sci Error'][sub_region].values
    
    def generative_model(m, b, A, mu, logw, int_wl = calcium_line):
        """Generate the model given parameters"""
        continuum = m * (wl - int_wl) + b
        w = np.exp(logw)
        gaussian = A * np.exp(-0.5*(wl-mu)**2/w**2)
        return continuum - gaussian
    
    def log_likelihood(theta):
        m, b, A, mu, logw = theta
        model = generative_model(m, b, A, mu, logw, int_wl = calcium_line)
        residual = flux - model
        chi_squared = np.sum(residual** 2 / unc**2)
        return -0.5 * chi_squared
    
    m_guess, b_guess, A_guess, mu_guess, logw_guess = 0.01, 0.3, 0.1, calcium_line, np.log(0.4)
    theta_guess = np.array([m_guess, b_guess, A_guess, mu_guess, logw_guess])
    
    pos = theta_guess + 1e-4 * np.random.randn(n_walkers, n_params) #intial guess position
    
    sampler = emcee.EnsembleSampler(n_walkers, n_params, log_likelihood)
    sampler.run_mcmc(pos, n_steps, progress=True);
    
    flat_samples = sampler.get_chain(discard=1000, thin=15, flat=True)

    A_draws = flat_samples[:,2]
    b_draws = flat_samples[:,1]
    m_draws = flat_samples[:,0]
    mu_draws = flat_samples[:,3]
    w_draws = np.exp(flat_samples[:, 4])

    EW = ((2*np.pi)**.5)*(A_draws*w_draws)/(m_draws*(mu_draws-calcium_line)+b_draws)
    EW

    ew_mean = np.mean(EW)
    ew_std = np.std(EW)
    print(ew_mean)
    print(ew_std)
    temp = {'ew':ew_mean, 'ew_unc':ew_std, 'date':date, 'star_name':obj, 'time':time, 'int_wv':calcium_line, 'qidx':qidx, 'jd':jd}
    df_results = df_results.append(temp, ignore_index=True)

115 Goldilocks_20200809T082242_v1.0_0040.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 583.49it/s]


1.1770586778055014
0.03818644602828945
116 Goldilocks_20200809T083657_v1.0_0041.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 618.72it/s]


1.173182014964545
0.030147852021617
117 Goldilocks_20200809T085111_v1.0_0042.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 647.16it/s]


1.1003336820030836
0.02534596791425638
118 Goldilocks_20200809T090525_v1.0_0043.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 659.66it/s]


1.0897652608816095
0.022047808755659294
119 Goldilocks_20200809T091938_v1.0_0044.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 650.15it/s]


1.1291370842498847
0.02251949345060005
120 Goldilocks_20200809T093350_v1.0_0045.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 641.42it/s]


1.1113865173452449
0.025273693596823656
121 Goldilocks_20200919T054228_v1.0_0020.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 589.77it/s]


1.10434008074982
0.029879938712683733
122 Goldilocks_20200919T055644_v1.0_0021.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:12<00:00, 413.11it/s]


1.093289406256483
0.0251893666986862
123 Goldilocks_20200919T061054_v1.0_0022.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:10<00:00, 498.70it/s]


1.1011516954593372
0.022643976082385157
124 Goldilocks_20200919T062506_v1.0_0023.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:10<00:00, 492.13it/s]


7.3981483248874
5.315551233947909
125 Goldilocks_20200919T063924_v1.0_0024.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 536.76it/s]


1.0665911253019424
0.02126204830202118
126 Goldilocks_20200919T065336_v1.0_0025.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 546.42it/s]


1.1160965855864218
0.02346827703533789
127 Goldilocks_20200807T084257_v1.0_0035.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 550.09it/s]


1.1856728757264487
0.03274044978521005
128 Goldilocks_20200807T085713_v1.0_0036.spectra.fits


  gaussian = A * np.exp(-0.5*(wl-mu)**2/w**2)
  gaussian = A * np.exp(-0.5*(wl-mu)**2/w**2)
  gaussian = A * np.exp(-0.5*(wl-mu)**2/w**2)
  w = np.exp(logw)
100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 523.72it/s]
  w_draws = np.exp(flat_samples[:, 4])
  x = asanyarray(arr - arrmean)


-inf
nan
129 Goldilocks_20200807T091125_v1.0_0037.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 551.67it/s]


1.2970049509996913
0.5595133441210229
130 Goldilocks_20200807T092540_v1.0_0038.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 559.40it/s]


1.1463771921989112
0.028672085410143783
131 Goldilocks_20200808T084204_v1.0_0024.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 551.52it/s]


1.1390740142681273
0.02151824119928764
132 Goldilocks_20200808T085617_v1.0_0025.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 559.72it/s]


1.175558799985236
0.02099660357582595
133 Goldilocks_20200808T091032_v1.0_0026.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 543.47it/s]


1.1355022047018768
0.01824448577362067
134 Goldilocks_20200808T092446_v1.0_0027.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 551.64it/s]


1.1039998439235124
0.021023443303432608
135 Goldilocks_20200918T111606_v1.0_0029.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 550.84it/s]


1.0579047683944667
0.022617399189824604
136 Goldilocks_20200918T113019_v1.0_0030.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 540.48it/s]


1.0652711250874654
0.02173262933214615
137 Goldilocks_20200918T114433_v1.0_0031.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 535.86it/s]


1.0783195375218735
0.02148198180435215
138 Goldilocks_20200919T111858_v1.0_0038.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 539.00it/s]


1.1318143347497762
0.02046306767899826
139 Goldilocks_20200919T113312_v1.0_0039.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 549.20it/s]


1.053027775597984
0.0160607152170716
140 Goldilocks_20200920T111120_v1.0_0014.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 547.05it/s]


1.055477729458409
0.02238878566863021
141 Goldilocks_20200920T112536_v1.0_0015.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 552.17it/s]


1.1225502221259311
0.21538899717828391
142 Goldilocks_20200920T113948_v1.0_0016.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 554.19it/s]


1.0899451581447663
0.018944536382518744
143 Goldilocks_20200920T115358_v1.0_0017.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 551.53it/s]


1.1007972589600354
0.02397179245818335
144 Goldilocks_20200922T110354_v1.0_0037.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:09<00:00, 549.34it/s]


1.1134721447597746
0.02423765487439154
145 Goldilocks_20200922T111812_v1.0_0038.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 615.93it/s]


1.0553492697937104
0.021240557895237232
146 Goldilocks_20200922T113220_v1.0_0039.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 638.38it/s]


1.110876297643373
0.02380852599236402
147 Goldilocks_20200922T114636_v1.0_0040.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 656.16it/s]


1.0659176052838522
0.022583743681382443
148 Goldilocks_20200801T094311_v1.0_0046.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 648.18it/s]


1.082135507053409
0.018424228372133648
149 Goldilocks_20200801T095725_v1.0_0047.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 645.30it/s]


1.1350863314065969
0.019963799826940697
150 Goldilocks_20200904T065413_v1.0_0019.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 660.17it/s]


1.1090081338757567
0.04201985464437047
151 Goldilocks_20200904T070828_v1.0_0020.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 657.50it/s]


1.0899975599264553
0.03162272650419412
152 Goldilocks_20200904T072244_v1.0_0021.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 652.78it/s]


1.0827859011676888
0.02905607496501788
153 Goldilocks_20200904T073653_v1.0_0022.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 641.18it/s]


1.1365481590747664
0.03191080950871491
154 Goldilocks_20200905T074537_v1.0_0032.spectra.fits


100%|█████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:07<00:00, 651.00it/s]

1.0990492933900469
0.02278501284447815





In [10]:
df_results

Unnamed: 0,date,ew,ew_unc,int_wv,jd,qidx,star_name,time
0,2020-08-09,1.177059,0.038186,8542.0,2459071.0,4967.0,HAT-P-32_1_IN,08:23:13
1,2020-08-09,1.173182,0.030148,8542.0,2459071.0,4967.0,HAT-P-32_1_IN,08:37:26
2,2020-08-09,1.100334,0.025346,8542.0,2459071.0,4967.0,HAT-P-32_1_IN,08:51:38
3,2020-08-09,1.089765,0.022048,8542.0,2459071.0,4967.0,HAT-P-32_1_IN,09:05:51
4,2020-08-09,1.129137,0.022519,8542.0,2459071.0,4967.0,HAT-P-32_1_IN,09:20:04
5,2020-08-09,1.111387,0.025274,8542.0,2459071.0,4967.0,HAT-P-32_1_IN,09:34:17
6,2020-09-19,1.10434,0.02988,8542.0,2459112.0,6062.0,HAT-P-32_3_IN,05:42:56
7,2020-09-19,1.093289,0.025189,8542.0,2459112.0,6062.0,HAT-P-32_3_IN,05:57:09
8,2020-09-19,1.101152,0.022644,8542.0,2459112.0,6062.0,HAT-P-32_3_IN,06:11:22
9,2020-09-19,7.398148,5.315551,8542.0,2459112.0,6062.0,HAT-P-32_3_IN,06:25:35


Great! It works!  Let's save the results to a csv file.

In [11]:
df_results.to_csv('../data/preliminary_results.csv',index=False)