In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
df_bilt = pd.read_csv('Data/de_bilt_weather.csv')
df_bilt['days'] = df_bilt['days'] - 1
df_bilt2000 = df_bilt.loc[df_bilt['year'] == 2000].copy()
df_bilt2001 = df_bilt.loc[df_bilt['year'] == 2001].copy()

variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity',          # george
            'pressure', 'global_radiation', 'precipitation', 'sunshine',    # skipper
            'temp_mean', 'temp_min', 'temp_max']                            # thijs

n_var = len(variables)

In [None]:
def DFT(data):
    N = len(data)
    
    k_array = np.arange(N)
    t_array = np.arange(N)

    exponentials =  np.exp(-2j * np.pi * np.outer(k_array, t_array) / N)
    coefs = exponentials @ data
    
    return k_array, coefs / N

def reconstruct(t_array, k_array, coefs):
    N = len(coefs)
    data = np.zeros(len(t_array))

    for k in k_array:
        A = np.abs(coefs[k])
        phase = np.angle(coefs[k])
        data += A * np.cos(phase + 2 * np.pi * k * t_array / N)

    return data

In [None]:
data = np.array(df_bilt2000['temp_mean'])
t_data = np.array(df_bilt2000['days'])

k, coefs = DFT(data)

threshold = 0.005
coefs_abs = np.abs(coefs)
coefs_relative = coefs_abs / np.sum(coefs_abs)

filtered_k = k[coefs_relative > threshold]

data_transformed = reconstruct(t_data, filtered_k, coefs)

plt.errorbar(t_data, data, label='data', color='tab:blue')

plt.errorbar(t_data, data_transformed, label='transformed_data', color='black', fmt='--')
plt.xlim(0, 100)
plt.show()

In [None]:
data_transformed = []
data_test = []
residuals = []
coefs_matrix = []
k_filtered_array = []
for variable_name in variables:
    data = np.array(df_bilt2000[variable_name])
    t_data = np.array(df_bilt2000['days'])

    k, coefs = DFT(data)
    coefs_matrix.append(coefs)

    coefs_abs = np.abs(coefs)
    coefs_relative = coefs_abs / np.sum(coefs_abs)

    k_filtered = k[coefs_relative > threshold]
    k_filtered_array.append(k_filtered)

    data_transformed.append(reconstruct(t_data, k_filtered, coefs))
    data_test.append(data)

data_transformed = np.array(data_transformed)
data_test = np.array(data_test)
fourier_residuals = data_test - data_transformed

k_filtered_array = np.array(k_filtered_array, dtype=object)
coefs_matrix = np.array(coefs_matrix)

In [None]:
def var_initial_norm(params, data, n_var):
    matrix = params.reshape(n_var, n_var)
    prediction = matrix @ data[:, :-1]
    return np.linalg.norm(data[:, 1:] - prediction)

def fit_var(data):
    # quess logical values to help fit
    params = np.zeros((n_var, n_var)).flatten()
        
    # let scipy perform his magic
    # Powell is slower, but appears to perform better than BFGS
    result = scipy.optimize.minimize(var_initial_norm, params, method='Powell', args=(data, n_var))

    # extract fit result
    M_fit = result.x.reshape(n_var, n_var)

    return M_fit

M_fit = fit_var(fourier_residuals)

In [None]:
def F(t, k_array, coefs_matrix):
    
    N = len(coefs_matrix[0, :])
    data = np.zeros(len(k_array))

    i = 0
    for k_list, coefs in zip(k_array, coefs_matrix):
        for k in k_list:
            A = np.abs(coefs[k])
            phase = np.angle(coefs[k])
            data[i] += A * np.cos(phase + 2 * np.pi * k * t / N)
        i += 1

    return data

def weather_var(x, M, t, k_array, coefs_matrix, std):
    return F(t, k_array, coefs_matrix) + M @ (x - F(t-1, k_array, coefs_matrix)) + np.random.normal(0, std)

In [None]:
F_array = []
for t in t_data:
    F_array.append(F(t, k_filtered_array, coefs_matrix))
F_array = np.array(F_array).T

residuals = data_test[:, 1:] - (F_array[:, 1:] + M_fit @ (data_test[:, :-1] - F_array[:, :-1]))
std = np.std(residuals, axis=1)

In [None]:
M_fit = fit_var(fourier_residuals)
prediction_range = 20

t_prediction = np.arange(t_data[-1], t_data[-1] + prediction_range)

prediction_list = []
n_predictions = 1000
for j in range(n_predictions):
    prediction = [data_test[:, -1]]
    for t in t_prediction[1:]:
        prediction.append(weather_var(prediction[-1], M_fit, t, k_filtered_array, coefs_matrix, std))
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_matrix = np.array(prediction_list)

lower = np.percentile(prediction_matrix, 2.5, axis=0)
mean = np.mean(prediction_matrix, axis=0)
upper = np.percentile(prediction_matrix, 97.5, axis=0)

In [None]:
F_prediction = []
for t in t_prediction:
    F_prediction.append(F(t, k_filtered_array, coefs_matrix))
F_prediction = np.array(F_prediction).T

F_prediction

In [None]:
# plot for each variable
for i, variable_name in enumerate(variables):
    fig, ax = plt.subplots()
    for j in range(len(prediction_list)):
        ax.errorbar(t_prediction, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

    ax.errorbar(t_prediction[-1], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
    ax.errorbar(t_prediction, F_prediction[i, :], label='Fourier', color='tab:orange')
    ax.errorbar(t_prediction, lower[i, :], color='black', fmt='--', label='95% CI')
    ax.errorbar(t_prediction, upper[i, :], color='black', fmt='--')
    ax.errorbar(t_prediction, mean[i, :], color='black', fmt='-', label='prediction mean')
    ax.errorbar(df_bilt['days'][t_prediction[0]:t_prediction[-1]], df_bilt[variable_name][t_prediction[0]:t_prediction[-1]], label='observed', color='tab:red', fmt='o')
    ax.set(xlabel='days', ylabel=variable_name)
    ax.legend(loc=1)