In [None]:
import numpy as np
import pandas as pd
import itertools
import h5py
import matplotlib.pyplot as plt
from scipy import stats
from scipy import special
from scipy import integrate
from scipy import interpolate
from scipy import linalg
from scipy import signal
from scipy.optimize import curve_fit
import time
from pathlib import Path
import os
import random
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ConstantKernel

plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 120
plt.rcParams['text.usetex'] = True

import sys
import mpld3

#mpld3.enable_notebook()

sys.path.insert(0, '..')
from modules import prh
from modules import regression as rg
from modules import utils

In [None]:
dataset = prh.LightCurvePRHDataset(input_file='../aux/HE0435_delay_50d_NMC_100.h5')

qso_data = dataset.getOriginalQSOData()

t = qso_data['time_domain'][()]
qso_lc_vals = qso_data['qso_light_curve_values'][()]
qso_lc_errs = qso_data['qso_light_curve_errors'][()]
qso_id = 'HE0435'

outdir = Path('../aux/outputs/')

r = dataset.getMCRealization(5)

y1 = r['yA'][()]
y2 = r['yB'][()]
err1 = r['errA'][()]
err2 = r['errB'][()]

In [None]:
t_begs = t[utils.get_season_begs(t)]
t_ends = t[utils.get_season_ends(t)]

In [None]:
kernel = ConstantKernel(2, (1e-3, 1e2)) * Matern(length_scale=200.0, length_scale_bounds=(1, 300), nu=1.5)

gp1 = rg.fit_GP_to_lightcurve(t, y1, err1, kernel)
gp2 = rg.fit_GP_to_lightcurve(t, y2, err2, kernel)

gp_step = 0.2
# number of points to add on the left and right ends of the domain in order to make possible to shift the signals
dt_ext = 100
support = np.arange(t[0] - dt_ext, t[-1] + dt_ext, gp_step)

ypred1, sigma1 = gp1.predict(np.expand_dims(support, 1), return_std=True)
ypred2, sigma2 = gp2.predict(np.expand_dims(support, 1), return_std=True)

In [None]:
n_p = 0
for beg, end in zip(t_begs, t_ends):
    n_p += len(support[(support >= beg) & (support <= end)])

In [None]:
(len(support) - n_p) / len(support)

In [None]:
label1 = 'A'
label2 = 'B'

CL = 0.95
fig = plt.figure()
plt.scatter(t, y1, label=label1, color='b')
plt.scatter(t, y2, label=label2, color='g')

plt.plot(support, ypred1, label='%s GP prediction' % label1, color = 'b')
plt.plot(support, ypred2, label='%s GP prediction' % label2, color = 'g')

plt.fill_between(support, ypred1 - special.erfinv(CL)*sigma1, ypred1 + special.erfinv(CL)*sigma1,
         alpha=.5, fc='b', ec='None', label=r'95\% confidence interval A')
plt.fill_between(support, ypred2 - special.erfinv(CL)*sigma2, ypred2 + special.erfinv(CL)*sigma2,
         alpha=.5, fc='g', ec='None', label=r'95\% confidence interval B')
plt.legend(fontsize=10)

plt.xlabel('t[MJD]')
plt.ylabel('mag')
plt.title('HE0047-1756 (MindStep data)')

In [None]:
def stencil_derivative(f, step):
    df = (f[:-4] - 8*f[1:-3] + 8*f[3:-1] - f[4:] )/(12*step)
    return df

def numeric_derivative(f: np.ndarray, step: float) -> np.ndarray:
    return (f[1:] - f[:-1]) / step

def WAV(f, sigma, step):
    f_prime = numeric_derivative(f, step)
    weights = 2 / (sigma[:-1] + sigma[1:])
    # f_prime = stencil_derivative(f, step)
    # weights = 2 / (sigma[2:-2] + sigma[2:-2])
    WAV = np.dot(np.abs(f_prime), weights) / weights.sum()
    return WAV

In [None]:
dt_min_days =  0
dt_max_days =  100

shift = np.arange(int(dt_min_days / gp_step),
                  int(dt_max_days / gp_step), 1)
win = int(dt_ext/gp_step)

WAV_values = []
for i in shift:
    diff = ypred1[win:-win] - ypred2[win + i:-win + i]
    sigma_diff = sigma1[win:-win] + sigma2[win + i:-win + i]
    WAV_values.append(WAV(diff, sigma_diff, gp_step))
    

delay_idx = np.argmin(WAV_values)
delay = shift[delay_idx] * gp_step
print(delay)

In [None]:
fig = plt.figure()
plt.plot(shift*gp_step, WAV_values)
plt.xlabel('time lag[days]')
plt.ylabel('WAV')
plt.title('WAV loss function for MC generated from %s' % qso_id)
fig.savefig(outdir / ('wav_MC_%s.pdf' % qso_id))

In [None]:
shift_idx = 251
myshift = shift[shift_idx]
time_shift = myshift*gp_step

fig, axs = plt.subplots(figsize=(15,30), nrows=3, ncols=1)

t_dom = support[win:-win] 

axs[0].plot(t_dom, ypred1[win:-win], 
            label='magA predicted')
axs[0].plot(t_dom, ypred2[win + myshift : -win + myshift], 
            label='magB shifted by %.2f days' % (time_shift))

diff = ypred1[win:-win] - ypred2[win + myshift : -win + myshift]

axs[1].plot(t_dom, diff, 
            label='difference curve for shift %.2f days' % (time_shift))

der_diff = stencil_derivative(diff, gp_step)

axs[2].plot(t_dom[2:-2], der_diff, 
            label='derivative of difference curve for shift %.2f days' % (time_shift))

axs[0].legend()
axs[1].legend()
axs[0].set_title('GP predicted light curves')
axs[1].set_title('Difference curve, WAV = %.2f' % WAV_values[myshift])
axs[2].set_title('Derivative of difference curve, shift %.2f' % time_shift)

fig.savefig(outdir / f'regression_diff_shift_{time_shift:.0f}.pdf')