Estimating the size of the TESS/whatever dataset

In [None]:
# All guesses.
years = 8 
sample_interval_min = 5
pixels_per_sample = 10 * 10
bytes_per_pixel = 4
stars = 2000

# Estimating the size.
minutes = years * 365 * 24 * 60
samples_per_star = minutes / sample_interval_min
total_size = samples_per_star * pixels_per_sample * bytes_per_pixel * stars

print(f'{total_size:,} bytes')

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
DATA = Path.cwd().parent / "data" / "CONFIRMED"

In [None]:
K2_3 = DATA / "K2-3.csv"

In [None]:
df = pd.read_csv(K2_3)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
plt.figure(figsize=(12, 4))
plt.title("K2-3")
plt.plot(df["time"], df["flux"], "k-")
plt.show()

# Copying some code from astropy to learn

## Estimating periods

In [None]:
t = df["time"]
y = df["flux"]
dy = df["flux_err"]

In [None]:
from utils import auto_max_min_period, autoperiod, autophase

In [None]:
minimum_period, maximum_period, total_duration = auto_max_min_period(t)

In [None]:
minimum_period, maximum_period, total_duration

In [None]:
periods = autoperiod(minimum_period, maximum_period, total_duration)
print(len(periods))

In [None]:
import sys

from tqdm import tqdm

In [None]:
def spec_generator(t):
    periods = autoperiod(*auto_max_min_period(t))
    for p in tqdm(periods):
        durations = np.linspace(0.01, 0.05, 50) * p
        for d in durations:
            phase = autophase(period=p, duration=d)
            for phi in phase:
                yield p, d, phi

In [None]:
from numba import njit, cuda, jit


def compute_trel(t):
    trel = t - np.min(t)
    return trel


def normalize(y):
    y -= np.mean(y)
    y /= (np.std(y) + sys.float_info.epsilon)
    return y


def compute_weights(dy):
    w = 1.0 / dy**2
    assert np.fabs(np.sum(w)) > sys.float_info.epsilon
    w = w / np.sum(w)
    return w


@njit(parallel=True)
def model(trel, y, w, period, duration, phase):
    is_transit = \
        (np.fmod(trel, period) >= phase) & \
        (np.fmod(trel, period) <= phase + duration)
    r = np.sum(w * is_transit)
    s = np.sum(w * y * is_transit)
    wx = np.sum(w * y * y)
    d_value = wx - (s**2) / (r * (1 - r) + np.finfo(np.float64).eps)
    return d_value


def new_bls(t, y, dy, spec_generator):
    trel = compute_trel(t)
    y = normalize(y)
    w = compute_weights(dy)

    best_d_value = np.inf
    best_period = None
    best_duration = None
    best_phase = None

    for period, duration, phase in tqdm(spec_generator):
        d_value = model(trel, y, w, period, duration, phase)

        if d_value < best_d_value:
            best_d_value = d_value
            best_period = period
            best_duration = duration
            best_phase = phase

    return best_period, best_duration, best_phase, best_d_value

In [None]:
real_period = 50
real_phase = 5
real_duration = 0.1 * real_period
real_diff = 0.05

threshold = np.cos(np.pi * real_duration / real_period)

t = np.linspace(0, 400, 4000)
y = (np.cos(2.0 * np.pi \
        * (t - real_phase - real_duration / 2.0) / real_period) > threshold) \
    .astype(float)
y = 1.0 - real_diff * y
dy = 0.01 * np.ones(t.shape)

In [None]:
def spec_gen_gambiarra():
    for p in np.linspace(45, 55, 101):
        for d in np.linspace(1, 11, 11):
            for phi in np.arange(0, p, 0.5):
                yield p, d, phi

In [None]:
# spec_gen = spec_generator(t)
spec_gen = spec_gen_gambiarra()
best_period, best_duration, best_phase, best_d_value = new_bls(t, y, dy, spec_gen)

In [None]:
best_period, best_duration, best_phase, best_d_value