In [None]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from scipy import stats
from scipy import special
from scipy import integrate
from scipy import interpolate
import time
from pathlib import Path
import os
import random
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ConstantKernel

plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['figure.dpi'] = 120
plt.rcParams['text.usetex'] = True

import sys

sys.path.insert(0, '..')
from our_qso_code import structure_func as sf
from our_qso_code import regression as rg

In [None]:
data = pd.read_table('../data/cosmograil_data/HE0435_Bonvin2016.rdb_.txt')

t = data['mhjd'].to_numpy(dtype=np.float64)
y = data['mag_A'].to_numpy(dtype=np.float64)
err_y = data['magerr_A'].to_numpy(dtype=np.float64)

N = len(t)

plt.scatter(t, y)

In [None]:
tau, v = sf.estimate_structure_func_from_data(t, y, err_y)

In [None]:
def power_law_sf(tau, slope, intercept):
    return 10**intercept * tau**slope

def fit_sf(tau, v, cut_off):
    pars = stats.linregress(np.log10(tau[:cut_off]), np.log10(v[:cut_off]))
    slope = pars.slope
    intercept = pars.intercept
    return slope, intercept

def spline_sf(tau, v):
    spline = interpolate.UnivariateSpline(tau, v, s=1e-6, k=5)
    return spline

In [None]:
cut_off = int(0.65*len(tau))
slope, intercept = fit_sf(tau, v, cut_off)
v_spline = spline_sf(tau, v)

plt.loglog(tau[:cut_off], v[:cut_off], linestyle='None', marker='o')
plt.loglog(tau[:cut_off], power_law_sf(tau[:cut_off], slope, intercept))
plt.loglog(tau[:cut_off], v_spline(tau[:cut_off]))
print('Max lag considered: %s days' % tau[cut_off-1])

In [None]:
delta = 20
t_doubled = np.concatenate([t, t-delta])
tau_doubled = sf.compute_lags_matrix(t_doubled)

y2_mean = (y**2).mean()
C = y2_mean - power_law_sf(tau_doubled, slope, intercept)
#C = y2_mean - v_spline(tau_doubled)
print('detC = %s' % np.linalg.det(C))
plt.imshow(C, cmap='bwr')
plt.colorbar()

In [None]:
np.random.seed(1234)

R = np.linalg.cholesky(C)

y_new = R @ np.random.normal(0, 1 ,size=2*N)

yA = y_new[:N]
yB = y_new[N:]

yA -= yA.mean()
yB -= yB.mean()

plt.scatter(t, yA, label='Montecarlo A')
plt.scatter(t, yB, label='Montecarlo A shifted')
plt.scatter(t, y-y.mean(), label='true A')
plt.legend()

In [None]:
kernel = ConstantKernel(2, (1e-3, 1e2)) * Matern(length_scale=200.0, length_scale_bounds=(1, 300), nu=1.5)

In [None]:
gp1 = rg.fit_GP_to_lightcurve(t, yA, err_y, kernel)
gp2 = rg.fit_GP_to_lightcurve(t, yB, err_y, kernel)

In [None]:
gp_step = 0.2
support = np.arange(t[0] - 5e1, t[-1] + 5e1, gp_step)
y_pred1, sigma1 = gp1.predict(np.expand_dims(support,1), return_std=True)
y_pred2, sigma2 = gp2.predict(np.expand_dims(support,1), return_std=True)

In [None]:
plt.figure()
plt.plot(t, yB, 'g.', markersize=3, label='MontecarloA')
plt.plot(t, yA, 'b.', markersize=3, label='MontecarloB')
plt.plot(support, y_pred1, 'b-', label='PredictionA')
plt.plot(support, y_pred2, 'g-', label='PredictionB')
plt.fill_between(support, y_pred1 - special.erfinv(0.95)*sigma1, y_pred1 + special.erfinv(0.95)*sigma1,
         alpha=.5, fc='b', ec='None', label='95% confidence interval B')
plt.fill_between(support, y_pred2 - special.erfinv(0.95)*sigma2, y_pred2 + special.erfinv(0.95)*sigma2,
         alpha=.5, fc='g', ec='None', label='95% confidence interval A')
plt.xlabel('$t$')
plt.ylabel('$f(t)$')
plt.legend()

In [None]:
delay = rg.time_delay_grid_search(y_pred1, y_pred2, sigma1, sigma2, gp_step,
                                  dt_min=0, dt_max=100)
print('Estimated time delay: %s days' % delay)

In [None]:
t0 = time.time()

true_delays = np.arange(10, 60, 1)
np.random.seed(1234)
simulated_curves = {}
for delay in true_delays:
    print(delay)
    t_doubled = np.concatenate([t, t-delay])
    tau_doubled = sf.compute_lags_matrix(t_doubled)

    y2_mean = (y**2).mean()
    C = y2_mean - power_law_sf(tau_doubled, slope, intercept)
    C += 1e-10*np.eye(C.shape[0])
    R = np.linalg.cholesky(C)
    
    y_new = R @ np.random.normal(0, 1 ,size=2*N)
    yA = y_new[:N]
    yB = y_new[N:]
    yA -= yA.mean()
    yB -= yB.mean()
    """
    gp1 = rg.fit_GP_to_lightcurve(t, yA, err_y, kernel)
    gp2 = rg.fit_GP_to_lightcurve(t, yB, err_y, kernel)
    gp_step = 0.2
    support = np.arange(t[0] - 5e1, t[-1] + 5e1, gp_step)
    y_pred1, sigma1 = gp1.predict(np.expand_dims(support,1), return_std=True)
    y_pred2, sigma2 = gp2.predict(np.expand_dims(support,1), return_std=True)
    estimated_delay = rg.time_delay_grid_search(y_pred1, y_pred2, sigma1, sigma2, gp_step,
                                                dt_min=0, dt_max=100)
    estimated_delays.append(estimated_delay)
    """
    simulated_curves[delay] = [yA, yB]
    

estimated_delays = np.array(estimated_delays)
tf = time.time()
print('Elapsed time: %s' % (tf - t0))

In [None]:
d = 20
plt.scatter(t, simulated_curves[d][0], label='ref delay %s ' % d)
plt.scatter(t, simulated_curves[d][1], label='shifted')
plt.scatter(t, y-y.mean(), label='HE0435')
plt.legend()