In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import scipy
import numpy as np

In [None]:
#data 

df_bilt = pd.read_csv('Data/de_bilt_weather.csv')
df_bilt2001 = df_bilt.loc[df_bilt['year'] == 2001].copy()
df_bilt2002 = df_bilt.loc[df_bilt['year'] == 2002].copy()

df_bilt2001['days'] = df_bilt2001['days'] - min(df_bilt2001['days']) + 1
df_bilt2002['days'] = df_bilt2002['days'] - min(df_bilt2002['days']) + 1

In [None]:

#visualisation 

plt.errorbar(df_bilt2001['days'], df_bilt2001['pressure'])
plt.xlabel('days')
plt.ylabel('sea level pressure (1000 hPa)')
plt.show()

plt.errorbar(df_bilt2001['days'], df_bilt2001['global_radiation'])
plt.xlabel('days')
plt.ylabel('global_radiation (100 W/m2)')
plt.show()

plt.errorbar(df_bilt2001['days'], df_bilt2001['precipitation'])
plt.xlabel('days')
plt.ylabel('precipitation amount (10 mm)')
plt.show()

plt.errorbar(df_bilt2001['days'], df_bilt2001['sunshine'])
plt.xlabel('days')
plt.ylabel('sunshine (1 hours)')
plt.show()


In [None]:
# Pressure fitting 

# fitting function: 

def fit_lrm(x, alpha, beta):
    return alpha + beta * x

# lag

pressure_lag_x = df_bilt['pressure'][:-1].values
pressure_lag_y = df_bilt['pressure'][1:].values

optimal_parameters_pressure, covariance_pressure = scipy.optimize.curve_fit(fit_lrm, pressure_lag_x, pressure_lag_y)

optimal_alpha_pressure, optimal_beta_pressure = optimal_parameters_pressure

print("Optimal alpha (pressure):", optimal_alpha_pressure)
print("Optimal beta (pressure):", optimal_beta_pressure)

residuals_pressure = pressure_lag_y - fit_lrm(pressure_lag_y, optimal_alpha_pressure, optimal_beta_pressure)

std_pressure = np.sqrt(np.sum(residuals_pressure ** 2) / (len(pressure_lag_x) - 2))

plt.errorbar(df_bilt['pressure'][:-1], df_bilt['pressure'][1:], fmt='o', label = 'Observed data')
plt.errorbar(df_bilt['pressure'][:-1], fit_lrm(df_bilt['pressure'][:-1], optimal_alpha_pressure, optimal_beta_pressure), label = 'Fitted line')
plt.xlabel('Pressure (lagged x)')
plt.ylabel('Pressure (lagged y)')
plt.legend()
plt.title('Linear regression fit for lagged pressure')
plt.show()



In [None]:
# Global radiation fitting 

# fitting function: 

def fit_lrm(x, alpha, beta):
    return alpha + beta * x

# lag

gr_lag_x = df_bilt['global_radiation'][:-1].values
gr_lag_y = df_bilt['global_radiation'][1:].values

optimal_parameters_gr, covariance_gr = scipy.optimize.curve_fit(fit_lrm, gr_lag_x, gr_lag_y)

optimal_alpha_gr, optimal_beta_gr = optimal_parameters_gr

print("Optimal alpha (global radiation):", optimal_alpha_gr)
print("Optimal beta (global radiation):", optimal_beta_gr)

residuals_gr = gr_lag_y - fit_lrm(gr_lag_y, optimal_alpha_gr, optimal_beta_gr)

std_gr = np.sqrt(np.sum(residuals_gr ** 2) / (len(gr_lag_x) - 2))

plt.errorbar(df_bilt['global_radiation'][:-1], df_bilt['global_radiation'][1:], fmt='o', label = 'Observed data')
plt.errorbar(df_bilt['global_radiation'][:-1], fit_lrm(df_bilt['global_radiation'][:-1], optimal_alpha_gr, optimal_beta_gr), label = 'Fitted line')
plt.xlabel('Global radiation (lagged x)')
plt.ylabel('Global radiation (lagged y)')
plt.legend()
plt.title('Linear regression fit for lagged global radiation')
plt.show()

In [None]:
# Precipitation fitting 

# fitting function: 

def fit_lrm(x, alpha, beta):
    return alpha + beta * x

# lag

precipitation_lag_x = df_bilt['precipitation'][:-1].values
precipitation_lag_y = df_bilt['precipitation'][1:].values

optimal_parameters_precipitation, covariance_precipitation = scipy.optimize.curve_fit(fit_lrm, precipitation_lag_x, precipitation_lag_y)

optimal_alpha_precipitation, optimal_beta_precipitation = optimal_parameters_precipitation

print("Optimal alpha (precipitation):", optimal_alpha_precipitation)
print("Optimal beta (precipitation):", optimal_beta_precipitation)

residuals_precipitation = precipitation_lag_y - fit_lrm(precipitation_lag_y, optimal_alpha_precipitation, optimal_beta_precipitation)

std_precipitation = np.sqrt(np.sum(residuals_precipitation ** 2) / (len(precipitation_lag_x) - 2))

plt.errorbar(df_bilt['precipitation'][:-1], df_bilt['precipitation'][1:], fmt='o', label = 'Observed data')
plt.errorbar(df_bilt['precipitation'][:-1], fit_lrm(df_bilt['precipitation'][:-1], optimal_alpha_precipitation, optimal_beta_precipitation), label = 'Fitted line')
plt.xlabel('Precipitation (lagged x)')
plt.ylabel('Precipitation (lagged y)')
plt.legend()
plt.title('Linear regression fit for lagged precipitation')
plt.show()

In [None]:
# Sunshine fitting 

# fitting function: 

def fit_lrm(x, alpha, beta):
    return alpha + beta * x

# lag

sunshine_lag_x = df_bilt['sunshine'][:-1].values
sunshine_lag_y = df_bilt['sunshine'][1:].values

optimal_parameters_sunshine, covariance_sunshine = scipy.optimize.curve_fit(fit_lrm, sunshine_lag_x, sunshine_lag_y)

optimal_alpha_sunshine, optimal_beta_sunshine = optimal_parameters_sunshine

print("Optimal alpha (sunshine):", optimal_alpha_sunshine)
print("Optimal beta (sunshine):", optimal_beta_sunshine)

residuals_sunshine = sunshine_lag_y - fit_lrm(sunshine_lag_y, optimal_alpha_sunshine, optimal_beta_sunshine)

std_sunshine = np.sqrt(np.sum(residuals_sunshine ** 2) / (len(sunshine_lag_x) - 2))

plt.errorbar(df_bilt['sunshine'][:-1], df_bilt['sunshine'][1:], fmt='o', label = 'Observed data')
plt.errorbar(df_bilt['sunshine'][:-1], fit_lrm(df_bilt['sunshine'][:-1], optimal_alpha_sunshine, optimal_beta_sunshine), label = 'Fitted line')
plt.xlabel('Sunshine (lagged x)')
plt.ylabel('Sunshine (lagged y)')
plt.legend()
plt.title('Linear regression fit for lagged sunshine')
plt.show()