In [None]:
import pandas as pd 
import numpy as np
import scipy
import matplotlib.pyplot as plt

In [None]:
# load data and use lrm functions

df_bilt = pd.read_csv('Data/de_bilt_weather.csv')

def fit_lrm(x, alfa, beta):
    return alfa + beta * x

def lrm(x, alfa, beta, sigma):
    return alfa + beta * x + np.random.normal(0, sigma)

# train lrm on first 100 days

x_train = df_bilt['temp_mean'][:100]
y_train = df_bilt['temp_mean'][1:101]

params, _ = scipy.optimize.curve_fit(fit_lrm, x_train, y_train)
alfa, beta = params

residuals = y_train - fit_lrm(x_train, alfa, beta)
std = np.sqrt(np.sum(residuals**2) / (len(x_train) - 2))

all_trajectories = []
start_value = df_bilt['temp_mean'][100]

for _ in range(1000):
    trajectory = [start_value]
    current_value = start_value

    for _ in range(10):
        next_value = lrm(current_value, alfa, beta, std)
        trajectory.append(next_value)
        current_value = next_value

    all_trajectories.append(trajectory)

mean_trajectory = np.mean(all_trajectories, axis=0)

# plotting

plt.figure(figsize=(10, 6))
plt.plot(range(90, 101), df_bilt['temp_mean'][90:101], color='blue', label='Actual Data (t < 100)')
plt.plot(range(100, 112), df_bilt['temp_mean'][100:112], color='green', label='Actual Data (t >= 100)', linewidth=2)

for trajectory in all_trajectories:
    plt.plot(range(101, 112), trajectory, color='blue', alpha=0.1)

plt.plot(range(101, 112), mean_trajectory, color='black', linewidth=2, label='Mean Prediction')

plt.xlabel('Time (days)')
plt.ylabel('Temperature (°C)')
plt.title('Linear Regression Model: 1000 Predictions with Error')
plt.axvline(x=100, color='gray', linestyle=':', label='Training Cutoff (t=100)')
plt.xlim(90, 112)
plt.ylim(0, 15)
plt.legend()
plt.savefig("Figures/Mean Temperature LRM prediction")
plt.show()


In [None]:
# function for plotting lrm prediction for all variables

def plot_lrm_predictions(variable):
    x_train = df_bilt[variable][:100]
    y_train = df_bilt[variable][1:101]

    params, _ = scipy.optimize.curve_fit(fit_lrm, x_train, y_train)
    alfa, beta = params

    residuals = y_train - fit_lrm(x_train, alfa, beta)
    std = np.sqrt(np.sum(residuals**2) / (len(x_train) - 2))

    all_trajectories = []
    start_value = df_bilt[variable][100]

    for _ in range(1000):
        trajectory = [start_value]
        current_value = start_value

        for _ in range(10):
            next_value = lrm(current_value, alfa, beta, std)
            trajectory.append(next_value)
            current_value = next_value

        all_trajectories.append(trajectory)

    mean_trajectory = np.mean(all_trajectories, axis=0) 
    
    plt.figure(figsize=(10, 6))
    plt.plot(range(90, 101), df_bilt[variable][90:101], color='blue', label=f'Actual Data (t < 100) - {variable}')
    plt.plot(range(100, 112), df_bilt[variable][100:112], color='green', label=f'Actual Data (t >= 100) - {variable}', linewidth=2)

    for trajectory in all_trajectories:
        plt.plot(range(101, 112), trajectory, color='blue', alpha=0.1)

    plt.plot(range(101, 112), mean_trajectory, color='black', linewidth=2, label='Mean Prediction')

    plt.xlabel('Time (days)')
    plt.ylabel(variable.replace('_', ' ').title())
    plt.title(f'Linear Regression Model: 1000 Predictions with Error - {variable.replace("_", " ").title()}')
    plt.axvline(x=100, color='gray', linestyle=':', label='Training Cutoff (t=100)')
    plt.legend()
    plt.savefig(f'Figures/{variable}_LRM_prediction.png')
    plt.show()

variables = ['cloud_cover', 'wind_speed', 'wind_gust', 'humidity', 'pressure', 
             'global_radiation', 'precipitation', 'sunshine', 'temp_min', 'temp_max']

for var in variables:
    plot_lrm_predictions(var)