## Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ExpSineSquared, RationalQuadratic

## Load data

In [None]:
results_path = '../results/'
df = pd.read_csv(f'{results_path}/lines_norm_gdl_fit.csv', index_col=0).sort_values(by='spec_mjd')

In [None]:
df['17653.222_amp1']

In [None]:
time_centered = df['spec_mjd'] - int(df['spec_mjd'].min())

In [None]:
mask1_1 = (time_centered > 504) & (time_centered < 504.5)
mask1_2 = (time_centered > 504.58) & (time_centered < 505.515)
mask1_3 = (time_centered > 505.58) & (time_centered < 506.578)
mask1_4 = (time_centered > 506.61) & (time_centered < 507)
mask1 = mask1_1 | mask1_2 | mask1_3 | mask1_4 #32%
mask2 = (time_centered > 543) & (time_centered < 547)

In [None]:
plt.figure(figsize=[20,10])
plt.scatter(time_centered[mask1], df['17653.222_sum'][mask1])

In [None]:
X = time_centered[mask1].values.reshape(-1, 1)
y = df['17653.222_sum'][mask1].values
y = y / np.median(y)
y = y - y.mean()
X_train = X[20:320:2]
y_train = y[20:320:2]

In [None]:
X_train.shape

In [None]:
plt.figure(figsize=[30,10])
plt.plot(X, y)
plt.scatter(X_train, y_train, color='C1')

## GPR

In [None]:
length_bounds = (1e-1, 1e1)
period_bounds = (1e-1, 1e1)
noise_bounds = (1e-3, 1e3)

kernel_rbf = 1 * RBF(length_scale=1.0, length_scale_bounds=length_bounds)
kernel_sine = 1 * ExpSineSquared(length_scale=1.0, periodicity=1.0, 
                                 length_scale_bounds=length_bounds, periodicity_bounds=period_bounds)
kernel_rq = 1 * RationalQuadratic(length_scale=1.0, alpha=1.0,
                                  length_scale_bounds=length_bounds, alpha_bounds=period_bounds) 
kernel_noise =  WhiteKernel(noise_level=0.1**2, noise_level_bounds=noise_bounds)

kernel = kernel_sine + kernel_noise
gaussian_process = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100)

In [None]:
gaussian_process.fit(X_train, y_train)
gaussian_process.kernel_

In [None]:
X_test = np.linspace(X.min()-1, X.max()+1, 2000).reshape(-1,1)
mean_prediction, std_prediction = gaussian_process.predict(X_test, return_std=True)

plt.figure(figsize=[30,10])
plt.plot(X, y, linestyle="dotted")
plt.scatter(X_train, y_train, label="Estimated total flux contributions")
plt.plot(X_test, mean_prediction, label="Mean prediction", color='C1')
plt.fill_between(
    X_test.ravel(),
    mean_prediction - 1.96 * std_prediction,
    mean_prediction + 1.96 * std_prediction,
    alpha=0.5,
    label=r"95% confidence interval", color='C1'
)
#plt.xlim(504, 504.4)
plt.tick_params(labelsize=30)
plt.legend(fontsize=30)
plt.xlabel("Time since First Observation ($MJD_0$=58327)", fontsize=30)
plt.ylabel("Normalized Flux", fontsize=30)
_ = plt.title("Gaussian Process Regression on 1.7653${\mu}m$ Total Flux Contributions", fontsize=30)

In [None]:
gp_samples = gaussian_process.sample_y(X_test, n_samples=10).T

In [None]:
plt.figure(figsize=[30,10])
for gp_sample in gp_samples:
    plt.plot(X_test, gp_sample)