# Creating Rapid Rotator subsample

July 11, 2022  
Gully & Ryan H.

The goal of this notebook is to make the Rapid Rotator sample.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightkurve as lk
from tqdm import tqdm
import time
import astropy.units as u
import concurrent.futures


sns.set_context('notebook', font_scale=1.5)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
names = ['EPIC','Campaign','Teff','log g','Prot','ΔProt','hpeak','Rvar','Kp','MG']

In [None]:
df = pd.read_csv('../../data/Reinhold_Hekker2020/table2.dat', 
                 delim_whitespace=True, names=names, na_values='---')

In [None]:
df_rapid_rotator = pd.read_csv('../../data/Rapid_Rotator_Sample.csv')

Looks good!  We see the same trend we had in our proposal figure 2.

## Select a subsample of sources

First search for some high amplitude variable stars

In [None]:
criterion = (df.Prot < 10)

In [None]:
criterion.sum()

In [None]:
#plt.plot(df.Prot, df.Rvar, '.', alpha=0.02);
plt.plot(df.Prot[criterion], df.Rvar[criterion], '.');
#plt.ylim(3e2, 2e5)
plt.xlim(1e0, 1e2)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('$P_{\mathrm{rot}}$')
plt.ylabel('$\propto$ Amplitude (%)')
plt.title('Reinhold & Hekker 2020 Table 2')

In [None]:
df_subset=df[criterion].reset_index(drop=True)

## Make a subsub sample

### Prepopulate our columns

In [None]:
df_subset['N_EVEREST'] = np.NaN
df_subset['N_TESS_SPOC'] = np.NaN
df_subset['Period_TESS'] = 0
df_subset['Amplitude_TESS'] = 0
df_subset['Period_K2'] = 0
df_subset['Amplitude_K2'] = 0
df_subset['Sector'] = np.NaN

In [None]:
df_tiny = df_subset.head(15)

### Predownload so that it runs faster later

Let's find one of the sources that *also* has TESS data available

Delete the cell below if you want to run on the entire subset of 400+ sources...

df_subset = df_tiny

In [None]:
n_sources = len(df_subset)
n_sources

We want to have at least 1 EVEREST lightcurve and 1 SPOC lightcurve for all sources.

In [None]:
def add_data(data):
    # data = [mission, index, search result]
    mission, idx, sr = data
    def add_data_helper(mission, idx, sr, num):
        lc = sr[num].download()
        # remove NaNs and normalize the data
        lc = lc.remove_nans().remove_outliers()
        # find the amplitude percentage
        vector = lc.flux.value
        lo, hi = np.percentile(vector, (5, 95))
        peak_to_valley = hi-lo
        # add the data to the table
        df_subset.loc[idx, f'Amplitude_{mission}'] = peak_to_valley
        # change the lightcurve into a periodogram and find its period
        period = float(lc.to_periodogram(minimum_period=.1, maximum_period=10).period_at_max_power.to_value())
        # add the period to the data table
        df_subset.loc[idx, f'Period_{mission}'] = period
        if mission == 'TESS':
            # find the sector number and add it to the data table
            df_subset.loc[idx, 'Sector'] = lc.sector

    if len(sr) > 0:
        try:
            add_data_helper(mission, idx, sr, 0)
        except:
            add_data_helper(mission, idx, sr, 1)
        finally:
            return

def download(data):
    name, index, mission = data
    if mission == 0:
        sr = lk.search_lightcurve(name, mission='TESS')
        df_subset.loc[index, 'N_TESS_SPOC'] = len(sr)
    elif mission == 1:
        sr = lk.search_lightcurve(name, author='EVEREST')
        df_subset.loc[index, 'N_EVEREST'] = len(sr)
    return index, sr

In [None]:
start = time.time()
def main():

    TESS_download = []
    K2_download = []
    for i in range(n_sources):
        # find the name of the star
        name = 'EPIC ' + df_subset.iloc[i].EPIC.astype(int).astype(str)
        TESS_download.append([name, i, 0])
        K2_download.append([name, i, 1])


    TESS_data = []
    K2_data = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
        TESS_sr = executor.map(download, TESS_download)
        K2_sr = executor.map(download, K2_download)

        for result in TESS_sr:
            TESS_data.append(['TESS', result[0], result[1]])
        for result in K2_sr:
            K2_data.append(['K2', result[0], result[1]])

    for i in range(n_sources):
        add_data(TESS_data[i])
        add_data(K2_data[i])
    
end = time.time()

In [None]:
if __name__ == '__main__':
    main()

In [None]:
df_subset = df_rapid_rotator

## Complitation times

**15 Stars:**  
Desktop:  
fresh download time ~ 42 seconds  
pre-downloaded time ~ 33.65 seconds  
cached time ~ 2.6 seconds  
  

Laptop:  
fresh download time ~ 46 seconds  
pre-downloaded time ~ 33.8 seconds  
cached time ~ 2.39 seconds  

------------------------------------------------------------  

**416 Stars:**  
Desktop:  
fresh download time ~ 1359.3 seconds ~ 22.6 minutes  
pre-downloaded time ~ 808 seconds ~ 13.5 minutes  
cached time ~ 100 seconds  
  

Laptop:  
fresh download time ~ 1747.5 seconds ~ 29.1 minutes  
pre-downloaded time ~ 936.8 seconds ~ 15.6 minutes  
cached time ~ 66.4 seconds  

------------------------------------------------------------  

**4196 Stars:**
Desktop:  
fresh download time ~ ?? seconds ~ ?? minutes  
pre-downloaded time ~ ?? seconds ~ ?? minutes  
cached time ~ ?? seconds  
  

Laptop:  
fresh download time ~ ?? seconds ~ ?? minutes  
pre-downloaded time ~ ?? seconds ~ ?? minutes  
cached time ~ ?? seconds  

In [None]:
df_subset.to_csv('New_Rapid_Rotator_Sample.csv', index=False)

## Spot check one source...

In [None]:
name = '202083650'

In [None]:
k2_lc = lk.search_lightcurve(name, author='EVEREST').download().remove_nans().remove_outliers().flatten()
k2_lc = lk.SFFCorrector(k2_lc).correct(windows=20)

In [None]:
ax = k2_lc.plot()
# ax.set_ylim(.9, 1.1)
# ax.set_ylim(180000, 190000)

## Plotting the data

In [None]:
bad_mask1 = df_subset.Period_TESS > df_subset.Period_K2 * 0.8
bad_mask2 = df_subset.Period_K2 > df_subset.Period_TESS * 0.8
bad_mask3 = (df_subset.Period_TESS < 7) & (df_subset.Period_K2 < 7)
mask = bad_mask1 & bad_mask2 & bad_mask3

In [None]:
df_comparison = df_subset[mask].reset_index(drop=True)

In [None]:
plt.figure(figsize=(6,6))

plt.ylim(0.5, 10)
plt.xlim(0.5, 10)

plt.xlabel('$P_{\mathrm{Kepler}}$')
plt.ylabel('$P_{\mathrm{TESS}}$')

plt.title('Comparison Between TESS and Kepler Amplitudes')

plt.plot(df_comparison.Period_K2, df_comparison.Period_TESS, 'r.', label='Star Amplitude')

x = [0.5, 10]
y = [0.5, 10]
plt.plot(x, y, label='Perfect Correlation')


plt.legend()
plt.show()

In [None]:
len(df_comparison)

## Receate fig2.pdf plot from proposal

In [None]:
# create a new dataframe that only includes stars with Kepler and TESS data available
criterion = df_subset.Amplitude_TESS > 0.00005
df_both = df_subset[criterion].reset_index(drop=True)

In [None]:
TESS_mean = df_both['Amplitude_TESS'].mean()
K2_mean = df_both['Amplitude_K2'].mean()

In [None]:
ax = plt.subplot(111)

ax.plot(df_both.Period_K2, df_both.Amplitude_K2, '.', color='black', label='Kepler')
ax.plot(df_both.Period_TESS, df_both.Amplitude_TESS, '.', color='red', label='TESS')

fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 20
fig_size[1] = 5
plt.rcParams["figure.figsize"] = fig_size

ax.axhline(TESS_mean, linestyle='dashed', label='TESS mean amplitude', color='green')
ax.axhline(K2_mean, linestyle='dashed', label='Kepler mean amplitude', color='blue')

ax.set_xscale('log')
ax.set_yscale('log')

ax.set_xlim(0.5, 100)
ax.set_ylim(0.5, 1e5)

ax.axhline(1e3, linestyle='dotted', label='1%', color='purple')
ax.axvline(27, linestyle='dashed', label='27 days', color='purple')

chartBox = ax.get_position()
ax.set_position([chartBox.x0, chartBox.y0, chartBox.width*0.6, chartBox.height])
ax.legend(loc='upper center', bbox_to_anchor=(1.2, 0.75), shadow=True, ncol=1)

ax.set_xlabel('$P_{\mathrm{rot}}$')
ax.set_ylabel('$\propto$ Amplitude (%)')
ax.set_title('Predicted for 4000 < $T_{\mathrm{eff}}$ < 4500 in TESS')

plt.show()

In [None]:
ratio = (TESS_mean/K2_mean) * 100
print(f'{ratio} %')