In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightkurve as lk
from sklearn.utils import resample
from tqdm import tqdm
import time
import concurrent.futures

In [2]:
names = ['EPIC','Campaign','Teff','log g','Prot','ΔProt','hpeak','Rvar','Kp','MG']

In [3]:
df = pd.read_csv('../../data/Reinhold_Hekker2020/table2.dat', delim_whitespace=True, names=names, na_values='---')

In [4]:
criterion1 = (df.Prot > 1) & (df.Prot < 10)
criterion2 = (df.Rvar > 0.5) & (df.Rvar < 20)
criterion3 = (df.Teff > 4000) & (df.Teff < 4500)
criteria = criterion1 & criterion2 & criterion3

In [5]:
df_subset = df[criteria].reset_index(drop=True)

In [6]:
df_subset['N_EVEREST'] = np.NaN
df_subset['N_TESS_SPOC'] = np.NaN
df_subset['Period_TESS'] = 0
df_subset['Amplitude_TESS'] = 0
df_subset['Period_K2'] = 0
df_subset['Amplitude_K2'] = 0
df_subset['Sector'] = np.NaN

In [7]:
df_tiny = df_subset.head(15)

df_subset = df_tiny

In [8]:
n_sources = len(df_subset)

In [9]:
def add_data(data):
    def add_data_helper(data, num):
        mission, idx, sr = data
        lc = sr[num].download()
        # remove NaNs and normalize the data
        lc = lc.remove_nans()#.normalize()
        # find the amplitude percentage
        vector = lc.flux.value
        lo, hi = np.percentile(vector, (5, 95))
        peak_to_valley = hi-lo
        # add the data to the table
        df_subset.loc[idx, f'Amplitude_{mission}'] = peak_to_valley
        # change the lightcurve into a periodogram and find its period
        period = float(lc.to_periodogram().period_at_max_power.to_value())
        # add the period to the data table
        df_subset.loc[idx, f'Period_{mission}'] = period
        if mission == 'TESS':
            # find the sector number and add it to the data table
            df_subset.loc[idx, 'Sector'] = lc.sector

    if len(data[2]) > 0:
        try:
            add_data_helper(data, 0)
        except:
            add_data_helper(data, 1)

In [10]:
def download(data):
    name, index, mission = data
    if mission == 0:
        sr = lk.search_lightcurve(name, mission='TESS')
        df_subset.loc[index, 'N_TESS_SPOC'] = len(sr)
    elif mission == 1:
        sr = lk.search_lightcurve(name, author='EVEREST')
        df_subset.loc[index, 'N_EVEREST'] = len(sr)
    return sr

In [11]:
def main():
    start = time.time()

    TESS_download = []
    K2_download = []
    for i in range(n_sources):
        # find the name of the star
        name = 'EPIC ' + df_subset.iloc[i].EPIC.astype(int).astype(str)
        TESS_download.append([name, i, 0])
        K2_download.append([name, i, 1])


    TESS_data = []
    K2_data = []
    with concurrent.futures.ThreadPoolExecutor() as executor:

        e1 = executor.map(download, TESS_download)
        e2 = executor.map(download, K2_download)

        TESS_sr = list(e1)
        K2_sr = list(e2)
        for i in range(n_sources):
            TESS_data.append(['TESS', i, TESS_sr[i]])
            K2_data.append(['K2', i, K2_sr[i]])


    with concurrent.futures.ThreadPoolExecutor() as executor:

        executor.map(add_data, TESS_data)
        executor.map(add_data, K2_data)
#     for i in range(n_sources):
#         add_data(TESS_data[i])
#         add_data(K2_data[i])



    end = time.time()

In [None]:
if __name__ == '__main__':
    main()

No data found for target "EPIC 201196841".
No data found for target "EPIC 201189968".
No data found for target "EPIC 201232485".
No data found for target "EPIC 202068593".
No data found for target "EPIC 201504611".
No data found for target "EPIC 201318977".
No data found for target "EPIC 201377225".
No data found for target "EPIC 201429915".
No data found for target "EPIC 201457244".
No data found for target "EPIC 202084291".
No data found for target "EPIC 201234609".
No data found for target "EPIC 202085463".
No data found for target "EPIC 202083650".
No data found for target "EPIC 201245978".
No data found for target "EPIC 201363197".
No data found for target "EPIC 201531643".
No data found for target "EPIC 201565901".
No data found for target "EPIC 201584594".
No data found for target "EPIC 201597538".
No data found for target "EPIC 201611164".
No data found for target "EPIC 201622125".
No data found for target "EPIC 201642770".
No data found for target "EPIC 201636587".
No data fou

No data found for target "EPIC 218754715".
No data found for target "EPIC 219671731".
No data found for target "EPIC 220178295".
No data found for target "EPIC 220181036".
No data found for target "EPIC 220202224".
No data found for target "EPIC 220227055".
No data found for target "EPIC 220280952".
No data found for target "EPIC 220296174".
No data found for target "EPIC 220309501".
No data found for target "EPIC 220320189".
No data found for target "EPIC 220340058".
No data found for target "EPIC 220347415".
No data found for target "EPIC 220362839".
No data found for target "EPIC 220392853".
No data found for target "EPIC 220440299".
No data found for target "EPIC 220457721".
No data found for target "EPIC 220475844".
No data found for target "EPIC 220483096".
No data found for target "EPIC 220590606".
No data found for target "EPIC 220616741".
No data found for target "EPIC 220629634".
No data found for target "EPIC 220648304".
No data found for target "EPIC 220688121".
No data fou

  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **

  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)
  result = super().__array_ufunc__(function, method, *arrays, **kwargs)


In [None]:
df_subset

In [None]:
end-start

Exception in thread IPythonHistorySavingThread:
Traceback (most recent call last):
  File "C:\Users\txrya\miniconda3\envs\contracosta\lib\site-packages\IPython\core\history.py", line 804, in writeout_cache
    self._writeout_input_cache(conn)
  File "C:\Users\txrya\miniconda3\envs\contracosta\lib\site-packages\IPython\core\history.py", line 787, in _writeout_input_cache
    conn.execute("INSERT INTO history VALUES (?, ?, ?, ?)",
sqlite3.IntegrityError: UNIQUE constraint failed: history.session, history.line

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\txrya\miniconda3\envs\contracosta\lib\site-packages\IPython\core\history.py", line 859, in run
    self.history_manager.writeout_cache(self.db)
  File "C:\Users\txrya\miniconda3\envs\contracosta\lib\site-packages\decorator.py", line 232, in fun
    return caller(func, *(extras + args), **kw)
  File "C:\Users\txrya\miniconda3\envs\contracosta\lib\site-packages\IPy

In [None]:
df_subset.to_csv('pathfinder_sample.csv', index=False)