In [None]:
import numpy as np
import pandas as pd
import itertools
import importlib
import h5py
from astropy.stats import sigma_clip
import matplotlib.pyplot as plt
from scipy import stats
from scipy import special
from scipy import integrate
from scipy import interpolate
from scipy import linalg
from scipy import signal
from scipy.optimize import curve_fit
import time
from pathlib import Path
import os
import re
import random
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ConstantKernel
from typing import Tuple

plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['figure.dpi'] = 120
#plt.rcParams['text.usetex'] = True

import sys

sys.path.insert(0, '../..')

from modules import prh_mc_utils as pmu
importlib.reload(pmu)

In [None]:
def RMSE(y1, y2):
    return np.sqrt(np.mean((y1-y2)**2))

def RMaxSE(y1, y2):
    return np.sqrt(np.max((y1-y2)**2))

def linear(x, p0, p1):
    return p0*x + p1

In [None]:
file_path = Path('../../data/cnn_base_data/original_data/HE0435_Bonvin2016.rdb_.txt')
img1 = 'C'
img2 = 'D'

In [None]:
qso_id = file_path.name.split('_')[0]
print(qso_id)
qso_data = pd.read_table(file_path)
images = [re.search(r'mag_([A-Z])', col).groups()[0] 
          for col in qso_data.columns if re.search(r'mag_([A-Z])', col)]
t = qso_data['mhjd'].to_numpy(dtype=np.float64)
mags = {key: qso_data[f'mag_{key}'] for key in images}
magerrs = {key: qso_data[f'magerr_{key}'] for key in images}

In [None]:
for image in mags:
    plt.scatter(t, mags[image], label=f'{qso_id} {image}')
plt.legend()

In [None]:
print(t[-1]-t[0])

In [None]:
mag1 = mags[img1]
mag2 = mags[img2]
magerr1 = magerrs[img1]
magerr2 = magerrs[img2]

y_input = pmu.mag_flux_sum(mag1, mag2)
err_input = pmu.flux_sum_err(mag1, mag2, magerr1, magerr2)


qso_dict = {'t': t, 
            f'{img1}': {'y': mag1, 'err_y': magerr1}, 
            f'{img1}': {'y': mag2, 'err_y': magerr2},
            f'{img1}+{img2}': {'y': y_input, 'err_y': err_input}
           }


plt.scatter(t, mag1, label=f'{img1}')
plt.scatter(t, mag2, label=f'{img2}')
plt.scatter(t, y_input, label=f'{img1}+{img2}')
plt.legend()

In [None]:
kernel = ConstantKernel(2, (1e-3, 1e2)) * Matern(length_scale=200.0, length_scale_bounds=(1, 300), nu=1.5)

gp = GaussianProcessRegressor(kernel=kernel, alpha=err_input**2, n_restarts_optimizer=10, 
                              optimizer='fmin_l_bfgs_b', normalize_y=True)

gp.fit(np.expand_dims(t,1), y_input)

N = 2000
dt_extension = 0
support, step = np.linspace(t[0] - dt_extension, t[-1] + dt_extension, N, retstep=True)

y_pred, cov_pred = gp.predict(np.expand_dims(support, 1), return_cov=True)
sigma_pred = np.sqrt(np.diag(cov_pred))
L = np.linalg.cholesky(cov_pred)
win = int(dt_extension/step)

gp_dict = {'t': support, 'y_pred': y_pred, 'sigma_pred': sigma_pred, 'cov_pred': cov_pred}

plt.scatter(t, y_input, color='b')
plt.plot(support, y_pred, color='b')
plt.fill_between(support, y_pred - 1.96*sigma_pred, y_pred + 1.96*sigma_pred, fc='b', alpha=0.5)

In [None]:
tau, v = pmu.estimate_structure_func_from_data(support, y_pred, sigma_pred, n_bins=50)
tau = tau[v>=0]
v = v[v>=0]

In [None]:
max_lag = 0.60*tau[-1]

tau_cut = tau[tau <= max_lag]
v_cut   = v[tau <= max_lag]

p = stats.linregress(np.log10(tau_cut), np.log10(v_cut))

sf_dict = {'tau_cut': tau_cut, 
           'v_cut': v_cut,
           'slope': p[0],
           'intercept': p[1],
           'tau_not_cut': tau,
           'v_not_cut': v}

print(f'Max lag: {tau_cut[-1]:.2f} days')
plt.loglog(tau_cut, v_cut, linestyle='None', marker='o')
plt.loglog(tau_cut, 10**p[1]*tau_cut**p[0], color='red')
print(f'slope: {p[0]:.3f}, intercept: {p[1]:.3f}')



In [None]:
N_MC = 2
delay_min = -200
delay_max = +200

delays = np.random.random(N_MC)*(delay_max - delay_min) + delay_min
mag_shifts = np.random.random(N_MC)*2
shrink_factors = np.random.random(N_MC)*(1.0 - 0.8) + 0.8

delay = -200
mag_shift = 0

yA, yB = pmu.generate_PRH_light_curves(support=support, 
                                       y=y_pred, 
                                       sigma=sigma_pred, 
                                       slope=p[0], intercept=p[1], 
                                       delay=delay, mag_shift=mag_shift)

plt.plot(support, yA, label=f'{img1}')
plt.plot(support, yB, label=f'{img2}')
#plt.plot(support, pmu.mag_flux_sum(yA, yB), label='A+B')
plt.legend()

In [None]:
pmu.create_qso_base_file(qso_dict=qso_dict, 
                         gp_dict=gp_dict, 
                         sf_dict=sf_dict, 
                         outfile=Path(f'../../data/cnn_base_data/{qso_id}_{img1}{img2}.h5'))

In [None]:
hf = h5py.File('../../data/cnn_base_data/qso_base_data/J1206_AB.h5', 'r')

In [None]:
hf['qso_base_data']['structure_function'].keys()