# Creating pathfinder subsample

June 20, 2022  
Gully & Ryan H.

The goal of this notebook is to make the pathfinder sample.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


sns.set_context('notebook', font_scale=1.5)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
names = ['EPIC','Campaign','Teff','log g','Prot','ΔProt','hpeak','Rvar','Kp','MG']

In [None]:
df = pd.read_csv('../../data/Reinhold_Hekker2020/table2.dat', 
                 delim_whitespace=True, names=names, na_values='---')

Looks good!  We see the same trend we had in our proposal figure 2.

## Select a subsample of sources

First search for some high amplitude variable stars

In [None]:
criterion1 = (df.Prot > 1) & (df.Prot < 10)
criterion2 = (df.Rvar > 0.5) & (df.Rvar < 20)
criterion3 = (df.Teff > 4000) & (df.Teff < 4500)
criteria = criterion1 & criterion2 & criterion3

In [None]:
criteria.sum()

In [None]:
#plt.plot(df.Prot, df.Rvar, '.', alpha=0.02);
plt.plot(df.Prot[criterion3], df.Rvar[criterion3], '.', alpha=0.1);
plt.plot(df.Prot[criteria], df.Rvar[criteria], '.', alpha=0.5);
#plt.ylim(3e2, 2e5)
plt.xlim(1e0, 1e2)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('$P_{\mathrm{rot}}$')
plt.ylabel('$\propto$ Amplitude (%)')
plt.title('Reinhold & Hekker 2020 Table 2')

In [None]:
df[criteria].head()

In [None]:
df_subset=df[criteria].reset_index(drop=True)

In [None]:
df_subset

## Make a subsub sample

### Prepopulate our columns

In [None]:
df_subset['N_TESS'] = 0
df_subset['Period'] = 0
df_subset['Number of Light Curves'] = 0
df_subset['Amplitude'] = 0
df_subset['Sector'] = np.NaN

In [None]:
df_tiny = df_subset.head(15)
df_tiny

### Predownload so that it runs faster later

In [None]:
import lightkurve as lk

Let's find one of the sources that *also* has TESS data available

In [None]:
n_sources = len(df_tiny)
n_sources

In [None]:
df_subset['N_TESS'] = 0

In [None]:
df_subset

In [None]:
df_subset = df_tiny

In [None]:
from tqdm import tqdm

In [None]:
for i in tqdm(range(n_sources)):
    name = 'EPIC ' + df_subset.iloc[i].EPIC.astype(int).astype(str)
    sr = lk.search_lightcurve(name, mission="TESS", author='SPOC')
    df_subset.loc[i, 'N_TESS'] = len(sr)

In [None]:
df_subset

In [None]:
sr = lk.search_lightcurve("EPIC 211071889", author="EVEREST", mission="K2")
sr

In [None]:
lc_K2 = sr.download()

In [None]:
sr = lk.search_lightcurve("EPIC 211071889", author="SPOC", mission="TESS")
sr[0]

In [None]:
lc_TESS = sr[0].download()

In [None]:
scalar = np.percentile(lc_K2.flux, 98)
lc_K2 = lc_K2/scalar
ax = lc_K2.plot()
ax.axhline(1.0, linestyle='dashed')
ax.axhline(0.93, linestyle='dotted', color='#d35400', label='7 % flux loss')
ax.set_title('K2 data')
ax.set_ylim(0.8, 1.1)
ax.legend()

In [None]:
lc_TESS = lc_TESS.remove_nans().bin(binsize=5)
scalar = np.nanpercentile(lc_TESS.flux, 98)
lc_TESS = lc_TESS/scalar

In [None]:
ax = lc_TESS.plot()
ax.axhline(1.0, linestyle='dashed')
ax.axhline(0.93, linestyle='dotted', color='#d35400', label='7 % flux loss')
ax.axhline(0.955, linestyle='solid', color='#2ecc71', label='4.5 % flux loss')
ax.set_title('TESS data')
ax.set_ylim(0.8, 1.1)
ax.legend(fontsize=12)

In [None]:
assert len(sr) == 1

In [None]:
lc=sr.download()

In [None]:
lc = lc.remove_outliers(sigma=4,sigma_upper=3).normalize()

In [None]:
pg = lc.to_periodogram(nterms=5)

In [None]:
ax = pg.plot(view='period', scale='log')
ax.axvline(pg.period_at_max_power.value, linestyle='dotted', label=f'{pg.period_at_max_power:0.5f}')
ax.axvline(6.70, linestyle='dashed', label='6.7 d (Reinhold & Hekker 2020)', color = 'red')
ax.legend(fontsize = 12)

In [None]:
pg.period_at_max_power

In [None]:
ax = lc.plot()
pg.model(lc.time).plot(ax=ax)
pg.model(lc.time, frequency=pg.frequency_at_max_power/2).plot(ax=ax)