In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy 

In [None]:
df_bilt = pd.read_csv('Data/de_bilt_weather.csv')
df_bilt2000 = df_bilt.loc[df_bilt['year'] == 2000].copy()
df_bilt2000['days'] = df_bilt['days'] - df_bilt['days'].min() + 1

variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity',          # george
            'pressure', 'global_radiation', 'precipitation', 'sunshine',    # skipper
            'temp_mean', 'temp_min', 'temp_max']                            # thijs

n_var = len(variables)

In [None]:
# var 1 (without c)

# define lrm fit function without error
def fit_lrm(x, alfa, beta):
    return alfa + beta*x

alfa_list = []
beta_list = []
std_list = []

# fit lrm for each variable
for variable_name in variables:
    # lag data by 1 day
    x = np.array(df_bilt[variable_name][:-1])
    y = np.array(df_bilt[variable_name][1:])

    # find optimal params where error is smallest
    params, _ = scipy.optimize.curve_fit(fit_lrm, x, y)
    alfa, beta = params

    # calculate std assuming normality
    std = np.std(y - fit_lrm(x, alfa, beta))

    alfa_list.append(alfa)
    beta_list.append(beta)
    std_list.append(std)

std_list = np.array(std_list)

In [None]:
# define function to fit matrix function
# returns the norm of the error matrix
def fit_matrix(params, x, y, n_var):
    c = params[:n_var]
    matrix = params[n_var:].reshape(n_var, n_var)
    prediction = c[:, None] + matrix @ x
    return np.linalg.norm(y - prediction)

# reshape data into [var, t]
# where input is the x
# and output the y, which is lagged by 1
input_matrix = []
output_matrix = []
for variable in variables:
    input_matrix.append(df_bilt[variable][:-1])
    output_matrix.append(df_bilt[variable][1:])

input_matrix = np.vstack(input_matrix)
output_matrix = np.vstack(output_matrix)

# quess the matrix based on individual lrm fits
M_quess = np.zeros((n_var, n_var))
np.fill_diagonal(M_quess, beta_list)

# flatten c + M into a 1D array
# required for scipy.optimize.minimize (only accepts a list-like)
params = np.hstack([np.zeros(n_var), M_quess.flatten()])

# let scipy do the hard part
result = scipy.optimize.minimize(fit_matrix, params, method='Powell', args=(input_matrix, output_matrix, n_var))

In [None]:
# unpack the fit result
c_fit = result.x[:n_var]
M_fit = result.x[n_var:].reshape(n_var, n_var)

plt.imshow(M_fit)
plt.colorbar()

In [None]:
def weather_var(x, c, matrix, std):
    return c + matrix @ x + np.random.normal(0, std)

t_array = np.arange(21)
n_predictions = 1000

# prediction list is 3D with [n_prediction, var, t]
prediction_list = []
for _ in range(n_predictions):
    prediction = [input_matrix[:, 0]]
    for _ in t_array[1:]:
        prediction.append(weather_var(prediction[-1], c_fit, M_fit, std_list))

    # reshape prediction to [var, t]
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_list = np.array(prediction_list)

#plot mean temperature and cloud coverage

observed_cloud_cover = df_bilt2000['cloud_cover'].values[:21]
observed_temp_mean = df_bilt2000['temp_mean'].values[:21]

cloud_cover_predictions = prediction_list[:, variables.index('cloud_cover'), :]
temp_mean_predictions = prediction_list[:, variables.index('temp_mean'), :]

plt.figure(figsize=(8, 6))
for prediction in cloud_cover_predictions:
    plt.plot(t_array, prediction, color='blue', alpha=0.2, linewidth=0.5)  
plt.plot(t_array, observed_cloud_cover, color='black', linewidth=2, label='observed')  
plt.xlabel('Days')
plt.ylabel('Cloud Coverage (Okta)')
plt.title('VAR Model Cloud Coverage Over Time')
plt.legend()
plt.savefig("Figures/Cloud Coverage var1")
plt.show()

plt.figure(figsize=(8, 6))
for prediction in temp_mean_predictions:
    plt.plot(t_array, prediction, color='blue', alpha=0.2, linewidth=0.5) 
plt.plot(t_array, observed_temp_mean, color='black', linewidth=2, label='observed')  
plt.xlabel('Days')
plt.ylabel('Mean Temperature (°C)')
plt.title('VAR Model Mean Temperature Over Time')
plt.legend()
plt.savefig("Figures/Mean Temperature var1")
plt.show()



In [None]:
# var 2

def var_initial_norm(params, x, y, n_var):
    c = params[:n_var]
    matrix = params[n_var:].reshape(n_var, n_var)
    prediction = c[:, None] + matrix @ x
    return np.linalg.norm(y - prediction)

# reshape data into 2D array [var, t]
data = []
for variable_name in variables:
    data.append(df_bilt[variable_name][:30])
data = np.vstack(data)

x_data = data[:, :-1]
y_data = data[:, 1:]

# quess logical values to help fit
c_quess = np.zeros(n_var)
M_quess = np.zeros((n_var, n_var))
np.fill_diagonal(M_quess, 1)

# convert to 1D array 
# scipy.optimize.minimize only accepts list-like as params
params = np.hstack([c_quess, M_quess.flatten()])

# let scipy perform his magic
# Powell is slower, but appears to perform better than BFGS
result = scipy.optimize.minimize(var_initial_norm, params, method='Powell', args=(x_data, y_data, n_var))

# extract fit result
c1 = result.x[:n_var]
M1 = result.x[n_var:].reshape(n_var, n_var)

# calculate residuals and std of error (assuming normality)
residuals = y_data - (c1[:, None] + M1 @ x_data)
std1 = np.std(residuals, axis=1)

def weather_var(x, c, M, std):
    return c + M @ x + np.random.normal(0, std)

t_prediction = np.arange(len(x_data[0, :]), 50)

prediction_list = []
n_predictions = 1000
for j in range(n_predictions):
    prediction = [y_data[:, -1]]
    for _ in range(1, len(t_prediction)):
        prediction.append(weather_var(prediction[-1], c1, M1, std1))
    prediction = np.array(prediction).T
    prediction_list.append(prediction)

prediction_array = np.array(prediction_list)

# plot for mean temperature and cloud coverage
# Zorg ervoor dat observed_cloud_cover dezelfde lengte heeft als t_prediction
observed_cloud_cover = df_bilt2000['cloud_cover'].values[len(x_data[0, :]):50]
observed_temp_mean = df_bilt2000['temp_mean'].values[len(x_data[0, :]):50]

cloud_cover_predictions = prediction_array[:, variables.index('cloud_cover'), :]
temp_mean_predictions = prediction_array[:, variables.index('temp_mean'), :]

plt.figure(figsize=(8, 6))
for prediction in cloud_cover_predictions:
    plt.plot(t_prediction, prediction, color='blue', alpha=0.2, linewidth=0.5)  
plt.plot(t_prediction, observed_cloud_cover, color='black', linewidth=2, label='observed')  
plt.ylabel('Cloud Coverage (Okta)')
plt.title('VAR model Cloud Coverage Over Time')
plt.legend()
plt.savefig("Figures/Cloud Coverage var2")
plt.show()

plt.figure(figsize=(8, 6))
for prediction in temp_mean_predictions:
    plt.plot(t_prediction, prediction, color='blue', alpha=0.2, linewidth=0.5) 
plt.plot(t_prediction, observed_temp_mean, color='black', linewidth=2, label='observed')  
plt.xlabel('Days')
plt.ylabel('Mean Temperature (°C)')
plt.title('VAR Model Mean Temperature Over Time')
plt.legend()
plt.savefig("Figures/Mean Temperature var2")
plt.show()



In [None]:
#LRM mean temperature

plt.errorbar(df_bilt['temp_mean'][:-1], df_bilt['temp_mean'][1:], fmt='o', color='tab:blue')
plt.errorbar(df_bilt['temp_mean'][:-1], fit_lrm(df_bilt['temp_mean'][:-1], alfa, beta), fmt='-', color='black')
plt.title('LRM fit for Mean Temperature')
plt.ylabel('T(t)')
plt.xlabel('t-1')
plt.savefig("Figures/Mean Temperature LRM")
plt.show()

In [None]:
# LRM plot cloud coverage

plt.errorbar(df_bilt['cloud_cover'][:-1], df_bilt['cloud_cover'][1:], fmt='o', color='tab:blue')
plt.errorbar(df_bilt['cloud_cover'][:-1], fit_lrm(df_bilt['cloud_cover'][:-1], alfa, beta), fmt='-', color='black')
plt.title('LRM fit for Cloud Coverage')
plt.ylabel('T(t)')
plt.xlabel('t')
plt.savefig("Figures/Cloud Coverage LRM")
plt.show()