In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
df_bilt = pd.read_csv('Data/de_bilt_weather.csv')

df_bilt['days'] = df_bilt['days'] - df_bilt['days'].min()

variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity',          # george
            'pressure', 'global_radiation', 'precipitation', 'sunshine',    # skipper
            'temp_mean', 'temp_min', 'temp_max'] 

n_var = len(variables)  

In [None]:
# input variables
train_range = 30
test_range = 10 + 1

In [None]:
# reshape data
data = []
for variable_name in variables:
    data.append(df_bilt[variable_name][:train_range + test_range])
data = np.vstack(data)

train_data = data[:, :train_range]
test_data = data[:, train_range:]

print(train_data.shape)
print(test_data.shape)

In [None]:
def var_initial_norm(params, data, means, n_var):
    c = params[:n_var]
    matrix = params[n_var:].reshape(n_var, n_var)
    prediction = c[:, None] + matrix @ data[:, :-1]
    return np.linalg.norm((data[:, 1:] - prediction) / means[:, None])

In [None]:
params = np.zeros(n_var + n_var**2)

means = np.mean(train_data, axis=1)

# let scipy perform his magic
# Powell is slower, but appears to perform better than BFGS
result = scipy.optimize.minimize(var_initial_norm, params, method='Powell', args=(train_data, means, n_var))

# extract fit result
c_var = result.x[:n_var]
M_var = result.x[n_var:].reshape(n_var, n_var)

# calculate residuals and std of error (assuming normality)
residuals = train_data[:, 1:] - (c_var[:, None] + M_var @ train_data[:, :-1])
std_var = np.std(residuals, axis=1)

std_var

In [None]:
plt.hist(residuals[8, :], range=(-4, 4), bins=8)

In [None]:
def weather_var(x, c, M, std):
    return c + M @ x + np.random.normal(0, std)

t_train_data = np.arange(train_range)
t_test_data = np.arange(train_range, train_range + test_range)

prediction_list = []
n_predictions = 1000
for j in range(n_predictions):
    prediction = [test_data[:, 0]]
    for _ in t_test_data[1:]:
        prediction.append(weather_var(prediction[-1], c_var, M_var, std_var))
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_matrix = np.array(prediction_list)

lower = np.percentile(prediction_matrix, 2.5, axis=0)
mean = np.mean(prediction_matrix, axis=0)
upper = np.percentile(prediction_matrix, 97.5, axis=0)

In [None]:
# plot for each variable
for i, variable_name in enumerate(variables):
    fig, ax = plt.subplots()
    for j in range(len(prediction_list)):
        ax.errorbar(t_test_data, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

    ax.errorbar(t_test_data[0], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
    ax.errorbar(t_test_data, lower[i, :], color='black', fmt='--', label='95% CI')
    ax.errorbar(t_test_data, upper[i, :], color='black', fmt='--')
    ax.errorbar(t_test_data, mean[i, :], color='black', fmt='-', label='prediction mean')
    ax.errorbar(t_test_data, test_data[i, :], label='observed', color='tab:red', fmt='o')
    ax.set(xlabel='days', ylabel=variable_name)
    ax.legend(loc=1)

In [None]:
i = 0
print(variables[i])

fig, ax = plt.subplots()
for j in range(len(prediction_list)):
    ax.errorbar(t_test_data, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

ax.errorbar(t_test_data[0], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
ax.errorbar(t_test_data, lower[i, :], color='black', fmt='--', label='95% CI')
ax.errorbar(t_test_data, upper[i, :], color='black', fmt='--')
ax.errorbar(t_test_data, mean[i, :], color='black', fmt='-', label='prediction mean')
ax.errorbar(t_test_data, test_data[i, :], label='observed', color='tab:red', fmt='o')
ax.set(xlabel='t (days)', ylabel='Cloud coverage', xlim=(30, 40), ylim=(-15, 25))
ax.legend(loc=1, framealpha=1)
fig.savefig('Figures/var1.png', dpi=600)

In [None]:
i = 8
print(variables[i])

fig, ax = plt.subplots()
for j in range(len(prediction_list)):
    ax.errorbar(t_test_data, prediction_matrix[j, i, :], color='tab:blue', alpha=0.1)

ax.errorbar(t_test_data[0], prediction_matrix[0, i, 0], color='tab:blue', label=f'{n_predictions} predictions')
ax.errorbar(t_test_data, lower[i, :], color='black', fmt='--', label='95% CI')
ax.errorbar(t_test_data, upper[i, :], color='black', fmt='--')
ax.errorbar(t_test_data, mean[i, :], color='black', fmt='-', label='prediction mean')
ax.errorbar(t_test_data, test_data[i, :], label='observed', color='tab:red', fmt='o')
ax.set(xlabel='t (days)', ylabel='Mean temperature', xlim=(30, 40), ylim=(-20, 30))
ax.legend(loc=1, framealpha=1)
fig.savefig('Figures/var2.png', dpi=600)