In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
df_bilt = pd.read_csv('Data/de_bilt_weather.csv')
df_bilt2000 = df_bilt.loc[df_bilt['year'] == 2000].copy()
df_bilt2000['days'] = df_bilt['days'] - df_bilt['days'].min() + 1

variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity',          # george
            'pressure', 'global_radiation', 'precipitation', 'sunshine',    # skipper
            'temp_mean', 'temp_min', 'temp_max']                            # thijs

n_var = len(variables)

df_bilt

In [None]:
# input variables
t_data = np.arange(31)
prediction_range = 20

In [None]:
def var_initial_norm(params, x, y, n_var):
    c = params[:n_var]
    matrix = params[n_var:].reshape(n_var, n_var)
    prediction = c[:, None] + matrix @ x
    return np.linalg.norm(y - prediction)

# reshape data into 2D array [var, t]
data = []
for variable_name in variables:
    data.append(df_bilt[variable_name][t_data])
data = np.vstack(data)

x_data = data[:, :-1]
y_data = data[:, 1:]

# quess logical values to help fit
c_quess = np.zeros(n_var)
M_quess = np.zeros((n_var, n_var))
np.fill_diagonal(M_quess, 1)

# convert to 1D array 
# scipy.optimize.minimize only accepts list-like as params
params = np.hstack([c_quess, M_quess.flatten()])

# let scipy perform his magic
# Powell is slower, but appears to perform better than BFGS
result = scipy.optimize.minimize(var_initial_norm, params, method='Powell', args=(x_data, y_data, n_var))

# extract fit result
c1 = result.x[:n_var]
M1 = result.x[n_var:].reshape(n_var, n_var)

# calculate residuals and std of error (assuming normality)
residuals = y_data - (c1[:, None] + M1 @ x_data)
std1 = np.std(residuals, axis=1)

In [None]:
def weather_var(x, c, M, std):
    return c + M @ x + np.random.normal(0, std)

# +1 as days starts at 1 and index at 0
t_prediction = np.arange(t_data[-1] + 1, t_data[-1] + prediction_range + 1)

prediction_list = []
n_predictions = 1000
for j in range(n_predictions):
    prediction = [data[:, -1]]
    for _ in range(1, len(t_prediction)):
        prediction.append(weather_var(prediction[-1], c1, M1, std1))
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_array = np.array(prediction_list)

# plot for each variable
for i, variable_name in enumerate(variables):
    fig, ax = plt.subplots()
    for j in range(len(prediction_list)):
        ax.errorbar(t_prediction, prediction_array[j, i, :], color='tab:blue', alpha=0.1)

    ax.errorbar(1, 1, color='tab:blue', label=f'{n_predictions} predictions')
    ax.errorbar(df_bilt['days'][:t_prediction[-1]], df_bilt[variable_name][:t_prediction[-1]], label='observed', color='black')
    ax.set(xlabel='days', ylabel=variable_name)
    ax.legend(loc=1)
