In [None]:
import numpy as np
import pandas as pd
import itertools
import h5py
from astropy.stats import sigma_clip
import matplotlib.pyplot as plt
from scipy import stats
from scipy import special
from scipy import integrate
from scipy import interpolate
from scipy import linalg
from scipy import signal
from scipy.optimize import curve_fit
import time
from pathlib import Path
import os
import random
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ConstantKernel

plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['figure.dpi'] = 120
#plt.rcParams['text.usetex'] = True

import sys
import mpld3

from typing import Tuple

def power_law_sf(tau, slope, intercept):
    return 10**intercept * tau**slope


def exp_sf(tau, V0, dt0):
    return V0*(1-np.exp(-tau/dt0))


def spline_sf(tau, v):
    spline = interpolate.UnivariateSpline(tau, v, s=1e-6, k=3)
    return spline


def compute_lags_matrix(t) -> np.ndarray:
    N = len(t)
    t_row_repeated = np.repeat(t[np.newaxis, :], N, axis=0)
    t_col_repeated = np.repeat(t[:, np.newaxis], N, axis=1)
    tau = np.abs(t_row_repeated - t_col_repeated)
    return tau


def estimate_structure_func_from_data(t, y, err_y) -> Tuple[np.ndarray]:
    N = len(t)
    y_row_repeated = np.repeat(y[np.newaxis, :], N, axis=0)
    y_col_repeated = np.repeat(y[:, np.newaxis], N, axis=1)
    err_y_row_repeated = np.repeat(err_y[np.newaxis, :], N, axis=0)
    err_y_col_repeated = np.repeat(err_y[:, np.newaxis], N, axis=1)
    v = (y_row_repeated - y_col_repeated)**2 - (err_y_row_repeated**2 + err_y_col_repeated**2)
    tau = compute_lags_matrix(t)
    tau_v_sorted = np.array([[mytau, myv] for mytau, myv in sorted(zip(tau.ravel(), v.ravel()))])
    tau_vals = tau_v_sorted[:, 0]
    v_vals = tau_v_sorted[:, 1]
    tau_binned = stats.binned_statistic(tau_vals, tau_vals, bins=100)[0]
    v_binned = stats.binned_statistic(tau_vals, v_vals, bins=100)[0]
    return tau_binned, v_binned

def generate_PRH_light_curves(support, y, sigma, slope, intercept, delay):
    N = len(support)
    t_doubled = np.concatenate([support, support - delay])
    err_doubled = np.concatenate([sigma, sigma])
    tau_doubled = compute_lags_matrix(t_doubled)
    s2 = (y**2).mean()
    C = s2 - power_law_sf(tau_doubled, slope, intercept)
    C += 1e-10*np.eye(2*N)
    L = np.linalg.cholesky(C)
    y_double = L @ np.random.normal(0, 1, 2*N) + err_doubled @ np.random.normal(0, 1, 2*N)
    
    #yA = y_double[:N] - y_double[:N].mean()
    #yB = y_double[N:] - y_double[N:].mean()
    yA = y_double[:N]
    yB = y_double[N:]
    
    return yA, yB

In [None]:
file_path = Path('../data/cosmograil/HE0435_Bonvin2016.rdb_.txt')

qso_id = file_path.name.split('_')[0]
qso_data = pd.read_table(file_path)

t = qso_data['mhjd'].to_numpy(dtype=np.float64)
A = qso_data['mag_A'].to_numpy(dtype=np.float64)
B = qso_data['mag_B'].to_numpy(dtype=np.float64)
C = qso_data['mag_C'].to_numpy(dtype=np.float64)
D = qso_data['mag_D'].to_numpy(dtype=np.float64)
errA = qso_data['magerr_A'].to_numpy(dtype=np.float64)
errB = qso_data['magerr_B'].to_numpy(dtype=np.float64)
errC = qso_data['magerr_C'].to_numpy(dtype=np.float64)
errD = qso_data['magerr_D'].to_numpy(dtype=np.float64)

In [None]:
plt.scatter(t, A, label=f'{qso_id} A')
plt.scatter(t, B, label=f'{qso_id} B')
plt.scatter(t, C, label=f'{qso_id} C')
plt.scatter(t, D, label=f'{qso_id} D')
plt.legend()

In [None]:
dts = np.diff(qso_data['mhjd'])

new_dts = sigma_clip(dts, masked=False, cenfunc='mean')

N_sampl_mean = (t[-1] - t[0])/new_dts.mean()

print(N_sampl_mean)

In [None]:
image1 = 'A'
image2 = 'D'

subtract_mean = True

curve_1 = qso_data[f'mag_{image1}'].values
curve_2 = qso_data[f'mag_{image2}'].values

if subtract_mean:
    curve_1 -= curve_1.mean()
    curve_2 -= curve_2.mean()

y_input = curve_1 + curve_2
err_input = qso_data[f'magerr_{image1}'].values + qso_data[f'magerr_{image2}'].values

In [None]:
kernel = ConstantKernel(2, (1e-3, 1e2)) * Matern(length_scale=200.0, length_scale_bounds=(1, 300), nu=1.5)

gp = GaussianProcessRegressor(kernel=kernel, alpha=err_input**2, n_restarts_optimizer=10, 
                              optimizer='fmin_l_bfgs_b', normalize_y=True)

gp.fit(np.expand_dims(t,1), y_input)

N = 2000
support = np.linspace(t[0], t[-1], N)

y, sigma = gp.predict(np.expand_dims(support, 1), return_std=True)

In [None]:
np.savetxt(f'{qso_id}_{image1}_sum_{image2}_zero_mean.txt', y)

In [None]:
plt.scatter(t, y_input, color='b')
plt.plot(support, y, color='b')
plt.fill_between(support, y - 1.96*sigma, y + 1.96*sigma, fc='b', alpha=0.5)

In [None]:
tau, v = estimate_structure_func_from_data(support, A_pred, sigmaA)

In [None]:
beg_off = int(0.10*len(tau))
cut_off = int(0.50*len(tau))

x = tau[beg_off:cut_off]
y = v[beg_off:cut_off]

pars = stats.linregress(np.log10(x), np.log10(y))
slope = pars.slope
intercept = pars.intercept

plt.loglog(x, y, linestyle='None', marker='o', color='blue')

plt.loglog(x, 10**intercept * x**slope, color='red')

In [None]:
y = A_pred
sigma = sigmaA
delay = 50
yA, yB = generate_PRH_light_curves(support, y, sigma, slope, intercept, delay)

plt.plot(support, yA)
plt.plot(support, yB)

print(yA.mean())
print(yB.mean())

In [None]:
np.savetxt(f'{qso_id}_B_sum_C.txt', y)

In [None]:
plt.scatter(t, A-B)

In [None]:
(A-B).mean()