In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import special
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ConstantKernel

plt.rcParams['figure.figsize'] = (12, 10)

In [None]:
times_u = pd.read_csv("../data/gerlumph_data/u_dates.dat", header=None, sep=" ") 
imageA_u = pd.read_csv("../data/gerlumph_data/u_A.dat", header=None) 
imageB_u = pd.read_csv("../data/gerlumph_data/u_B.dat", header=None) 

In [None]:
times_i = pd.read_csv("../data/gerlumph_data/i_dates.dat", header=None, sep=" ") 
imageA_i = pd.read_csv("../data/gerlumph_data/i_A.dat", header=None) 

In [None]:
times_u.columns = ['t_u', 'dt_u']
imageA_u.columns = ['A_u']
imageB_u.columns = ['B_u']
times_i.columns = ['t_i', 'dt_i']
imageA_i.columns = ['A_i']

In [None]:
df = pd.concat([times_u, imageA_u, imageB_u, times_i, imageA_i], axis=1)

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(9.8, 8.5)
plt.plot(df['t_u'], df['A_u'], "bo")
plt.plot(df['t_i'], df['A_i'], "ro")
#plt.plot(b["JulianDate"], mag_B, "darkblue")
#plt.title("Magnitudini totali delle 4 immagini", size=25)
plt.xlabel("Julian Date", size=25)
plt.ylabel("Magnitude", size=25)
plt.grid(True)
plt.figtext(0.75, 0.25, "u-filter", color='blue', size=25)
plt.figtext(0.75, 0.20, "i-filter", color='red' , size=25)
#plt.figtext(0.8, 0.17, "C", color="darkslategrey", size=25)
#plt.figtext(0.8, 0.13, "D", color="black", size=25)

In [None]:
t_i = times_i['t_i'].values
t_u = times_u['t_u'].values
y_i = imageA_i['A_i'].values
y_u = imageA_u['A_u'].values

### Some useful variables

In [None]:
## IMPORTANT: Define these errors in a more precise way, for now simple dummy model

err_u = 1e-9*y_u
err_i = 1e-9*y_i

### Define kernel and fit gaussian processes to data points

For each curve, the following data are required:

* time domain, so the observation dates array
* curve values at those times
* experimental errors on the curve values

The steps are:

* Define the Kernel function of the gaussian process
* fit the gaussian processes to the data
* define the new uniform time domain and use the fitted model to predict the curve values on this domain

In [None]:
kernel = ConstantKernel(2, (1e-3, 1e2)) * Matern(length_scale=200.0, length_scale_bounds=(1, 300), nu=1.5)

# Define errors on curves 

gp_u = GaussianProcessRegressor(kernel=kernel, alpha=err_u**2,
                                n_restarts_optimizer=10, optimizer = 'fmin_l_bfgs_b', normalize_y =True)
gp_i = GaussianProcessRegressor(kernel=kernel, alpha=err_i**2,
                                n_restarts_optimizer=10, optimizer = 'fmin_l_bfgs_b', normalize_y =True)

# Do the fit
gp_u.fit(np.expand_dims(t_u,1), y_u)
gp_i.fit(np.expand_dims(t_i,1), y_i)


gp_step = 0.2
# number of points to add on the left and right ends of the domain in order to make possible to shift the signals
dt_ext = 100
t_min = min(t_u[0], t_i[0])
t_max = max(t_i[-1], t_u[-1])
support = np.arange(t_min - dt_ext, t_max + dt_ext, gp_step)

ypred_u, sigma_u = gp_u.predict(np.expand_dims(support, 1), return_std=True)
ypred_i, sigma_i = gp_i.predict(np.expand_dims(support, 1), return_std=True)

In [None]:
CL = 0.95

plt.scatter(t_u, y_u, color='blue')
plt.scatter(t_i, y_i, color='green')

plt.plot(support, ypred_u, label='U GP prediction', color = 'b')
plt.plot(support, ypred_i, label='I GP prediction', color = 'g')

plt.fill_between(support, 
                 ypred_u - special.erfinv(CL)*sigma_u, 
                 ypred_u + special.erfinv(CL)*sigma_u,
                 alpha=.5, fc='b', ec='None', label=r'95\% confidence interval U')
plt.fill_between(support, 
                 ypred_i - special.erfinv(CL)*sigma_i, 
                 ypred_i + special.erfinv(CL)*sigma_i,
                 alpha=.5, fc='g', ec='None', label=r'95\% confidence interval I')
plt.legend(fontsize=10)