# Creating Rapid Rotator subsample

June 27, 2022  
Gully & Ryan H.

The goal of this notebook is to make the Rapid Rotator sample.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightkurve as lk
from tqdm import tqdm
import time
import astropy.units as u


sns.set_context('notebook', font_scale=1.5)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
names = ['EPIC','Campaign','Teff','log g','Prot','ΔProt','hpeak','Rvar','Kp','MG']

In [None]:
df = pd.read_csv('../../data/Reinhold_Hekker2020/table2.dat', 
                 delim_whitespace=True, names=names, na_values='---')

Looks good!  We see the same trend we had in our proposal figure 2.

## Select a subsample of sources

First search for some high amplitude variable stars

In [None]:
criterion = (df.Prot < 10)

In [None]:
criterion.sum()

In [None]:
#plt.plot(df.Prot, df.Rvar, '.', alpha=0.02);
plt.plot(df.Prot[criterion], df.Rvar[criterion], '.');
#plt.ylim(3e2, 2e5)
plt.xlim(1e0, 1e2)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('$P_{\mathrm{rot}}$')
plt.ylabel('$\propto$ Amplitude (%)')
plt.title('Reinhold & Hekker 2020 Table 2')

In [None]:
df_subset=df[criterion].reset_index(drop=True)

## Make a subsub sample

### Prepopulate our columns

In [None]:
df_subset['N_EVEREST'] = np.NaN
df_subset['N_TESS_SPOC'] = np.NaN
df_subset['Period_TESS'] = 0
df_subset['Amplitude_TESS'] = 0
df_subset['Period_K2'] = 0
df_subset['Amplitude_K2'] = 0
df_subset['Sector'] = np.NaN

In [None]:
df_tiny = df_subset.head(15)

### Predownload so that it runs faster later

Let's find one of the sources that *also* has TESS data available

Delete the cell below if you want to run on the entire subset of 400+ sources...

df_subset = df_tiny

In [None]:
n_sources = len(df_subset)
n_sources

We want to have at least 1 EVEREST lightcurve and 1 SPOC lightcurve for all sources.

In [None]:
def download(name, mission, idx):
    if mission == 'TESS':
        sr = lk.search_lightcurve(name, mission=mission)
        df_subset.loc[idx, 'N_TESS_SPOC'] = len(sr)
    elif mission == 'K2':
        sr = lk.search_lightcurve(name, mission=mission, author='EVEREST')
        df_subset.loc[idx, 'N_EVEREST'] = len(sr)


    if len(sr) > 0:
        try:
            # download the data for the lightcurve
            lc = sr[0].download()
            add_data(mission, idx, lc)
        except:
            lc = sr[1].download()
            add_data(mission, idx, lc)
        finally:
            return

def add_data(mission, idx, lc):
        # remove NaNs and normalize the data        
        lc = lc.remove_nans().remove_outliers()

        # find the amplitude percentage
        vector = lc.flux.value
        lo, hi = np.percentile(vector, (5, 95))
        peak_to_valley = hi-lo

        # add the data to the table
        df_subset.loc[idx, f'Amplitude_{mission}'] = peak_to_valley

        # change the lightcurve into a periodogram and find its period
        period = float(lc.to_periodogram().period_at_max_power.to_value())

        # add the period to the data table
        df_subset.loc[idx, f'Period_{mission}'] = period

        if mission == "TESS":
            # find the sector number and add it to the data table
            df_subset.loc[idx, 'Sector'] = lc.sector

In [None]:
%%capture
start = time.time()
for i in tqdm(range(n_sources)):
    # find the name of the star
    name = 'EPIC ' + df_subset.iloc[i].EPIC.astype(int).astype(str)
    download(name, 'TESS', i)
    download(name, 'K2', i)
end = time.time()

In [None]:
df_subset

## Complitation times

**15 Stars:**  
Desktop:  
fresh download time ~ 42 seconds  
pre-downloaded time ~ 33.65 seconds  
cached time ~ 2.6 seconds  
  

Laptop:  
fresh download time ~ 46 seconds  
pre-downloaded time ~ 33.8 seconds  
cached time ~ 2.39 seconds  

------------------------------------------------------------  

**416 Stars:**  
Desktop:  
fresh download time ~ 1359.3 seconds ~ 22.6 minutes  
pre-downloaded time ~ 808 seconds ~ 13.5 minutes  
cached time ~ 100 seconds  
  

Laptop:  
fresh download time ~ 1747.5 seconds ~ 29.1 minutes  
pre-downloaded time ~ 936.8 seconds ~ 15.6 minutes  
cached time ~ 66.4 seconds  

------------------------------------------------------------  

**4196 Stars:**
Desktop:  
fresh download time ~ ?? seconds ~ ?? minutes  
pre-downloaded time ~ ?? seconds ~ ?? minutes  
cached time ~ ?? seconds  
  

Laptop:  
fresh download time ~ ?? seconds ~ ?? minutes  
pre-downloaded time ~ ?? seconds ~ ?? minutes  
cached time ~ ?? seconds  

In [None]:
df_subset.to_csv('Rapid_Rotator_Sample.csv', index=False)

## Spot check one source...

In [None]:
name = 'EPIC 202059229'

In [None]:
lc = lk.search_lightcurve(name, mission='TESS')#author='EVEREST')
lc
#lc_tess.plot()

In [None]:
lc = lc[0].download()#.remove_nans().remove_outliers()#.normalize().flatten()
lc.plot()#.to_periodogram().plot()

In [None]:
pg = lc.to_periodogram()
pg.plot()

In [None]:
pg.period_at_max_power

In [None]:
name = 'EPIC 220205464'

In [None]:
df_subset.iloc[236].EPIC

In [None]:
download(name, "TESS", 236)

In [None]:
tpf = lk.search_targetpixelfile('EPIC 246979864', author='K2').download()
pld = lk.correctors.PLDCorrector(tpf)
corrected_lc = pld.correct().remove_outliers().to_periodogram().period_at_max_power
corrected_lc.to_periodogram().period_at_max_power

## Plotting the data

In [None]:
plt.figure(figsize=(6,6))
plt.ylim(0.5, 10)
plt.xlim(0.5, 10)
plt.xlabel('$P_{\mathrm{Kepler}}$')
plt.ylabel('$P_{\mathrm{TESS}}$')
plt.title('Comparison between TESS and Kepler amplitudes')

x = [0.5, 10]
y = [0.5, 10]
plt.plot(x, y)

# plt.xticks(1)
# plt.yscale()

plt.plot(df_subset.Period_K2, df_subset.Period_TESS, 'r.')

plt.show()

## Receate fig2.pdf plot from proposal

In [None]:
plt.plot(df_subset.Period_K2, df_subset.Amplitude_K2, '.', color='black')
plt.plot(df_subset.Period_TESS, df_subset.Amplitude_TESS, '.', color='red')

# plt.ylim(3e2, 2e5)
# plt.xlim(1e0, 1e2)

plt.xscale('log')
plt.yscale('log')

plt.axhline(1e3, linestyle='dotted', label='1%', color='purple')
plt.axvline(27, linestyle='dashed', label='27 days', color='purple')
plt.legend()
plt.legend(fontsize=12)

plt.xlabel('$P_{\mathrm{rot}}$')
plt.ylabel('$\propto$ Amplitude (%)')
plt.title('Predicted for 4000 < $T_{\mathrm{eff}}$ < 4500 in TESS')