In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
df_bilt = pd.read_csv('Data/de_bilt_weather.csv')
df_bilt2000 = df_bilt.loc[df_bilt['year'] == 2000].copy()
df_bilt2000['days'] = df_bilt['days'] - df_bilt['days'].min() + 1

variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity',          # george
            'pressure', 'global_radiation', 'precipitation', 'sunshine',    # skipper
            'temp_mean', 'temp_min', 'temp_max']                            # thijs

n_var = len(variables)

In [None]:
# define lrm fit function without error
def fit_lrm(x, alfa, beta):
    return alfa + beta*x

In [None]:
alfa_list = []
beta_list = []
std_list = []

# fit lrm for each variable
for variable_name in variables:
    # lag data by 1 day
    x = np.array(df_bilt[variable_name][:-1])
    y = np.array(df_bilt[variable_name][1:])

    # find optimal params where error is smallest
    params, _ = scipy.optimize.curve_fit(fit_lrm, x, y)
    alfa, beta = params

    # calculate std assuming normality
    std = np.std(y - fit_lrm(x, alfa, beta))

    alfa_list.append(alfa)
    beta_list.append(beta)
    std_list.append(std)

std_list = np.array(std_list)

In [None]:
# define function to fit matrix function
# returns the norm of the error matrix
def fit_matrix(params, x, y, n_var):
    c = params[:n_var]
    matrix = params[n_var:].reshape(n_var, n_var)
    prediction = c[:, None] + matrix @ x
    return np.linalg.norm(y - prediction)

# reshape data into [var, t]
# where input is the x
# and output the y, which is lagged by 1
input_matrix = []
output_matrix = []
for variable in variables:
    input_matrix.append(df_bilt[variable][:-1])
    output_matrix.append(df_bilt[variable][1:])

input_matrix = np.vstack(input_matrix)
output_matrix = np.vstack(output_matrix)

# quess the matrix based on individual lrm fits
M_quess = np.zeros((n_var, n_var))
np.fill_diagonal(M_quess, beta_list)

# flatten c + M into a 1D array
# required for scipy.optimize.minimize (only accepts a list-like)
params = np.hstack([np.zeros(n_var), M_quess.flatten()])

# let scipy do the hard part
result = scipy.optimize.minimize(fit_matrix, params, method='Powell', args=(input_matrix, output_matrix, n_var))

In [None]:
# unpack the fit result
c_fit = result.x[:n_var]
M_fit = result.x[n_var:].reshape(n_var, n_var)

plt.imshow(M_fit)
plt.colorbar()

In [None]:
def weather_var(x, c, matrix, std):
    return c + matrix @ x + np.random.normal(0, std)

In [None]:
t_array = np.arange(21)
n_predictions = 1000

# prediction list is 3D with [n_prediction, var, t]
prediction_list = []
for _ in range(n_predictions):
    prediction = [input_matrix[:, 0]]
    for _ in t_array[1:]:
        prediction.append(weather_var(prediction[-1], c_fit, M_fit, std_list))

    # reshape prediction to [var, t]
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_list = np.array(prediction_list)

# plot for each variable
for i, variable_name in enumerate(variables):
    fig, ax = plt.subplots()

    for j in range(n_predictions):
        ax.errorbar(t_array, prediction_list[j, i, :], color='tab:blue', alpha=0.2)

    ax.errorbar(t_array, df_bilt[variable_name][:len(t_array)], label='observed', color='black')
    ax.errorbar(1, 1, label=f'{n_predictions} predictions')
    ax.set(xlabel='days', ylabel=variable_name)
    ax.legend(loc=1)
