In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
df_bilt = pd.read_csv('Data/de_bilt_weather.csv')

df_bilt2000 = df_bilt.loc[df_bilt['year'] == 2000].copy()
df_bilt2001 = df_bilt.loc[df_bilt['year'] == 2001].copy()
df_bilt2002 = df_bilt.loc[df_bilt['year'] == 2002].copy()
df_bilt2009 = df_bilt.loc[df_bilt['year'] == 2009].copy()

n_data = 365

In [None]:
variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity',   
            'pressure', 'global_radiation', 'precipitation', 'sunshine',
            'temp_mean', 'temp_min', 'temp_max'] 

n_var = len(variables)

In [None]:
# do not include 2010 (no data)
year_array = np.arange(2000, 2009)

In [None]:
def DFT(data):
    N = len(data)
    
    k_array = np.arange(N)
    t_array = np.arange(N)

    exponentials =  np.exp(-2j * np.pi * np.outer(k_array, t_array) / N)
    coefs = exponentials @ data
    
    return k_array, coefs / N

def reconstruct(t_array, k_array, coefs):
    N = len(coefs)
    data = np.zeros(len(t_array), dtype=object)

    for k in k_array:
        A = np.abs(coefs[k])
        phase = np.angle(coefs[k])
        data += A * np.cos(phase + 2 * np.pi * k * t_array / N)

    return data

In [None]:
coefs_list = []
data_list = []
for year in year_array:
    df_selection = df_bilt.loc[df_bilt['year'] == year]
    df_selection = df_selection[:n_data]

    coefs_matrix = []
    for variable_name in variables:
        data = np.array(df_selection[variable_name])                
        
        k, coefs = DFT(data)
        coefs_matrix.append(coefs)
    
    coefs_matrix = np.array(coefs_matrix)
    coefs_list.append(coefs_matrix)

In [None]:
coefs_matrix_mean = np.mean(coefs_list, axis=0)

for i, variable_name in enumerate(variables):
    fig, ax = plt.subplots()
    ax.plot(range(n_data), np.abs(coefs_matrix_mean[i, :]))
    ax.set_yscale('log')
    ax.hlines(0.01, 0, 360, color='black', linestyles='--')
    ax.set_title(variable_name)

In [None]:
threshold = 0.001
k_array = []
data_transformed = []
for i in range(n_var):
    coefs_matrix = coefs_matrix_mean[i, :]
    coefs_abs = np.abs(coefs_matrix)
    coefs_relative = coefs_abs / np.sum(coefs_abs)

    k_filtered = k[coefs_relative > threshold]
    k_array.append(k_filtered)

k_array = np.array(k_array, dtype=object)

In [None]:
t_array = np.arange(n_data)

coefs_list = []
data_list = []
for year in list(year_array) + [2009]:
    df_selection = df_bilt.loc[df_bilt['year'] == year]
    df_selection = df_selection[:n_data]

    data_matrix = []
    for variable_name in variables:
        data = np.array(df_selection[variable_name])
        data_matrix.append(data)
        
    data_matrix = np.array(data_matrix)  
    data_list.append(data_matrix)

data_fourier_matrix = []
for i in range(n_var):
    data_fourier_matrix.append(reconstruct(t_array, k_array[i], coefs_matrix_mean[i, :]))
data_fourier_matrix = np.array(data_fourier_matrix)

plt.plot(t_array, data_fourier_matrix[i, :])
plt.plot(t_array, data_list[-1][i, :])

In [None]:
for i, variable_name in enumerate(variables):
    prediction = reconstruct(t_array, k_array[i], coefs_matrix_mean[i, :])

    fig, ax = plt.subplots()
    ax.errorbar(t_array, prediction)
    ax.errorbar(t_array, df_bilt2009[variable_name][:n_data], fmt='o')
    ax.set_title(variable_name)

In [None]:
fourier_residuals = data_list[:-1] - data_fourier_matrix

big_fourier_residuals = np.hstack(fourier_residuals)

In [None]:
def var_initial_norm(params, data, n_var):
    matrix = params.reshape(n_var, n_var)
    prediction = matrix @ data[:, :-1]
    return np.linalg.norm(data[:, 1:] - prediction)

def fit_var(data):
    # quess logical values to help fit
    params = np.zeros((n_var, n_var)).flatten()
        
    # let scipy perform his magic
    # Powell is slower, but appears to perform better than BFGS
    result = scipy.optimize.minimize(var_initial_norm, params, method='Powell', args=(data, n_var))

    # extract fit result
    M_fit = result.x.reshape(n_var, n_var)

    return M_fit

M_fit = fit_var(big_fourier_residuals)

In [None]:
def F(t, k_array, coefs_matrix):
    
    N = len(coefs_matrix[0, :])
    data = np.zeros(len(k_array))

    i = 0
    for k_list, coefs in zip(k_array, coefs_matrix):
        for k in k_list:
            A = np.abs(coefs[k])
            phase = np.angle(coefs[k])
            data[i] += A * np.cos(phase + 2 * np.pi * k * t / N)
        i += 1

    return data

def weather_var(x, M, t, k_array, coefs_matrix, std):
    return F(t, k_array, coefs_matrix) + M @ (x - F(t-1, k_array, coefs_matrix)) + np.random.normal(0, std)

In [None]:
F_array = []
for t in t_array:
    F_array.append(F(t, k_array, coefs_matrix_mean))
F_array = np.array(F_array).T

In [None]:
residuals_matrix = []
for year_index in range(len(year_array)):
    data = data_list[year_index]
    residuals = data[:, 1:] - (F_array[:, 1:] + M_fit @ (data[:, :-1] - F_array[:, :-1]))
    residuals_matrix.append(residuals)

residuals_matrix = np.hstack(residuals_matrix)

std = np.std(residuals_matrix, axis=1)
std

In [None]:
def weather_hybrid(x, M, t, F_array, std):
    return F_array[:, t] + M @ (x - F_array[:, t-1]) + np.random.normal(0, std)

In [None]:
prediction_list = []
n_predictions = 1000
for j in range(n_predictions):
    prediction = [data_list[-1][:, 0]]
    for t in t_array[1:]:
        prediction.append(weather_hybrid(prediction[-1], M_fit, t, F_array, std))
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_matrix = np.array(prediction_list)

lower = np.percentile(prediction_matrix, 2.5, axis=0)
mean = np.mean(prediction_matrix, axis=0)
upper = np.percentile(prediction_matrix, 97.5, axis=0)

In [None]:
# plot for each variable
for i, variable_name in enumerate(variables):
    fig, ax = plt.subplots()
    for j in range(len(prediction_list)):
        ax.errorbar(t_array, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

    ax.errorbar(t_array[-1], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
    ax.errorbar(t_array, F_array[i, :], label='Fourier', color='tab:orange')
    ax.errorbar(t_array, lower[i, :], color='black', fmt='--', label='95% CI')
    ax.errorbar(t_array, upper[i, :], color='black', fmt='--')
    ax.errorbar(t_array, mean[i, :], color='black', fmt='-', label='prediction mean')
    ax.errorbar(t_array, data_list[-1][i, :], label='observed', color='tab:red', fmt='o')
    ax.set(xlabel='days', ylabel=variable_name, xlim=(0, 20))
    ax.legend(loc=1)

In [None]:
plt.errorbar(t_array, df_bilt2009['temp_mean'], label='observed', fmt='o', color='tab:blue', markersize=4)
plt.errorbar(t_array, F_array[8, :], label='Fourier prediction', fmt='-', color='black')
plt.xlim(-10, 400)
plt.ylim(-5, 30)
plt.xlabel('t')
plt.ylabel(r'$y(t)$')
plt.title('Mean temperature 2009')
plt.legend()
plt.savefig('Figures/fourier6.png', dpi=600)

In [None]:
plt.errorbar(t_array, df_bilt2009['temp_mean'], label='observed', fmt='o', color='tab:blue', markersize=4)
plt.errorbar(t_array, F_array[8, :], label='Fourier prediction', fmt='-', color='black')
plt.xlim(0, 50)
plt.ylim(-4, 8)
plt.xlabel('t')
plt.ylabel(r'$y(t)$')
plt.title('Mean temperature 2009')
plt.legend()
plt.savefig('Figures/fourier5.png', dpi=600)

In [None]:
fig, ax = plt.subplots()
i = 8
variable_name = variables[i]
for j in range(len(prediction_list)):
    ax.errorbar(t_array, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

ax.errorbar(t_array[-1], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
ax.errorbar(t_array, F_array[i, :], label='Fourier', color='tab:orange')
ax.errorbar(t_array, lower[i, :], color='black', fmt='--', label='95% CI')
ax.errorbar(t_array, upper[i, :], color='black', fmt='--')
ax.errorbar(t_array, mean[i, :], color='black', fmt='-', label='prediction mean')
ax.errorbar(t_array, data_list[-1][i, :], label='observed', color='tab:red', fmt='o')
ax.set(xlabel='t (days)', ylabel='Mean temperature (C)', xlim=(0, 50), ylim=(-10, 40))
ax.set_title('')
ax.legend(loc=1)
fig.savefig('Figures/fourier7.png', dpi=600)

In [None]:
fig, ax = plt.subplots()
i = 8
variable_name = variables[i]
for j in range(len(prediction_list)):
    ax.errorbar(t_array, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

ax.errorbar(t_array[-1], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
ax.errorbar(t_array, F_array[i, :], label='Fourier', color='tab:orange')
ax.errorbar(t_array, lower[i, :], color='black', fmt='--', label='95% CI')
ax.errorbar(t_array, upper[i, :], color='black', fmt='--')
ax.errorbar(t_array, mean[i, :], color='black', fmt='-', label='prediction mean')
ax.errorbar(t_array, data_list[-1][i, :], label='observed', color='tab:red', fmt='o', markersize=3)
ax.set(xlabel='t', ylabel='y(t)', xlim=(0, 380), ylim=(-10, 40))
ax.set_title('2009')
ax.legend(loc=1)
fig.savefig('Figures/fourier8.png', dpi=600)

In [None]:
fig, ax = plt.subplots()
i = 0
variable_name = variables[i]
for j in range(len(prediction_list)):
    ax.errorbar(t_array, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

ax.errorbar(t_array[-1], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
ax.errorbar(t_array, F_array[i, :], label='Fourier', color='tab:orange')
ax.errorbar(t_array, lower[i, :], color='black', fmt='--', label='95% CI')
ax.errorbar(t_array, upper[i, :], color='black', fmt='--')
ax.errorbar(t_array, mean[i, :], color='black', fmt='-', label='prediction mean')
ax.errorbar(t_array, data_list[-1][i, :], label='observed', color='tab:red', fmt='o')
ax.set(xlabel='t (days)', ylabel='Cloud coverage (okta)', xlim=(0, 50), ylim=(-2, 12))
ax.set_title('')
ax.legend(loc=1)
fig.savefig('Figures/fourier9.png', dpi=600)

In [None]:
fig, ax = plt.subplots()
i = 0
variable_name = variables[i]
for j in range(len(prediction_list)):
    ax.errorbar(t_array, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

ax.errorbar(t_array[-1], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
ax.errorbar(t_array, F_array[i, :], label='Fourier', color='tab:orange')
ax.errorbar(t_array, lower[i, :], color='black', fmt='--', label='95% CI')
ax.errorbar(t_array, upper[i, :], color='black', fmt='--')
ax.errorbar(t_array, mean[i, :], color='black', fmt='-', label='prediction mean')
ax.errorbar(t_array, data_list[-1][i, :], label='observed', color='tab:red', fmt='o')
ax.set(xlabel='t (days)', ylabel='Mean temperature (C)', xlim=(0, 50), ylim=(-10, 40))
ax.set_title('')
ax.legend(loc=1)
fig.savefig('Figures/fourier10.png', dpi=600)