In [607]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

#### 1. Loading in our data

In [608]:
weather_df: pd.DataFrame = pd.read_feather("./training_data/processed_data/preprocessed_weather_hartbeespoort.feather")
images_df: pd.DataFrame = pd.read_feather("./training_data/processed_data/preprocessed_image_test.feather")

In [609]:
weather_df

Unnamed: 0_level_0,windspeed,winddir
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-04,19.1,292.4
2023-01-09,15.4,325.7
2023-01-11,23.8,69.0
2023-01-14,11.6,38.4
2023-01-16,16.3,47.8
...,...,...
2024-04-10,19.6,194.2
2024-04-13,17.2,341.4
2024-04-18,16.2,241.7
2024-04-20,16.9,292.4


In [610]:
images_df

Unnamed: 0_level_0,center_x_1,center_y_1,x_axis_length_1,y_axis_length_1,angle_1,center_x_2,center_y_2,x_axis_length_2,y_axis_length_2,angle_2,center_x_3,center_y_3,x_axis_length_3,y_axis_length_3,angle_3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-01-04,774.568298,464.119568,291.609772,653.578308,73.510216,1146.988159,312.367767,57.650665,146.202179,48.961891,876.470886,140.222977,36.524986,106.530754,119.656212
2023-01-09,646.504028,420.321869,278.589264,730.667725,82.895424,1291.589722,435.230927,10.093279,45.888073,78.674332,281.132996,427.735809,16.663977,27.203815,149.128738
2023-01-11,863.524353,387.239929,464.276306,597.633362,20.470768,490.644684,588.939270,19.481133,33.339855,86.833237,281.148712,430.134521,16.615612,24.192135,140.445236
2023-01-14,906.893433,383.717712,466.869843,570.567200,66.743622,1298.106934,398.989075,30.712812,65.594879,103.238312,189.705276,579.077698,23.601635,54.288506,165.871780
2023-01-16,624.036072,339.108948,325.282562,731.776062,57.742481,279.872925,429.268707,16.362089,23.391392,139.758148,797.798828,427.500702,13.700137,35.471203,133.861191
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-10,1324.350586,422.794281,17.715271,51.442062,68.193947,1134.241943,220.593307,11.787232,39.232216,79.759109,1073.128418,207.650436,9.741089,55.922703,114.016281
2024-04-13,752.452454,289.341919,13.216461,32.141590,62.891289,714.009460,306.258728,7.657600,39.223877,66.653549,627.675842,413.542480,11.451272,24.320898,0.623103
2024-04-18,668.551453,445.496552,32.465782,66.018997,27.909737,237.793610,557.887268,6.805774,15.332954,112.110847,360.941437,496.977661,7.062338,13.159757,43.582386
2024-04-20,768.659729,94.758255,21.042046,71.317238,135.777664,832.362061,284.696594,7.747338,49.398838,177.851105,915.509277,569.841309,6.616477,22.889042,163.148788


#### 2. Preparing our data

In [611]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

result: list[pd.DataFrame] = train_test_split(weather_df, images_df, test_size=0.2, random_state=69)
X_train: pd.DataFrame = result[0]
X_valid: pd.DataFrame = result[1]
y_train: pd.DataFrame = result[2]
y_valid: pd.DataFrame = result[3]

In [612]:
def print_metrics(actual, prediction, data_type) -> None:
    print("Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)")
    print(f"{data_type} DATA: MSE: {mean_squared_error(actual, prediction)}")
    print(f"{data_type} DATA: RMSE: {mean_squared_error(actual, prediction, squared=False)}")
    print(f"{data_type} DATA: MAE: {mean_absolute_error(actual, prediction)}\n")

#### 3. Testing different types of regression models

##### 3.1. Decision Tree Regressor

In [613]:
from sklearn.tree import DecisionTreeRegressor

tree_model = DecisionTreeRegressor()

tree_model.fit(X_train, y_train)

tree_y_pred: np.ndarray = tree_model.predict(X_train)
print_metrics(y_train, tree_y_pred, "KNOWN")

tree_y_pred: np.ndarray = tree_model.predict(X_valid)
print_metrics(y_valid, tree_y_pred, "NEW")

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
KNOWN DATA: MSE: 0.0
KNOWN DATA: RMSE: 0.0
KNOWN DATA: MAE: 0.0

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
NEW DATA: MSE: 65537.87110016512
NEW DATA: RMSE: 200.07042910674767
NEW DATA: MAE: 161.0678206647237



##### 3.2. Random Forest Regressor

In [614]:
from sklearn.ensemble import RandomForestRegressor

forest_model = RandomForestRegressor()

forest_model.fit(X_train, y_train)

forest_y_pred: np.ndarray = forest_model.predict(X_train)
print_metrics(y_train, forest_y_pred, "KNOWN")

forest_y_pred: np.ndarray = forest_model.predict(X_valid)
print_metrics(y_valid, forest_y_pred, "NEW")

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
KNOWN DATA: MSE: 5031.522662457353
KNOWN DATA: RMSE: 55.82412904292937
KNOWN DATA: MAE: 45.068774299693686

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
NEW DATA: MSE: 34878.29873650347
NEW DATA: RMSE: 145.39860848350293
NEW DATA: MAE: 115.955795088466



##### 3.3. Linear Regressor

In [615]:
from sklearn.linear_model import LinearRegression

linear_model = LinearRegression()

linear_model.fit(X_train, y_train)

linear_y_pred: np.ndarray = linear_model.predict(X_train)
print_metrics(y_train, linear_y_pred, "KNOWN")

linear_y_pred: np.ndarray = linear_model.predict(X_valid)
print_metrics(y_valid, linear_y_pred, "NEW")

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
KNOWN DATA: MSE: 29337.332495587772
KNOWN DATA: RMSE: 134.8365669410871
KNOWN DATA: MAE: 111.04436678878454

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
NEW DATA: MSE: 26661.185391312607
NEW DATA: RMSE: 126.29722438787681
NEW DATA: MAE: 103.61787139449103



##### 3.4. Nearest Neighbour Regressor

In [616]:
from sklearn.neighbors import KNeighborsRegressor

neighbour_model = KNeighborsRegressor()

neighbour_model.fit(X_train, y_train)

neighbour_y_pred: np.ndarray = neighbour_model.predict(X_train)
print_metrics(y_train, neighbour_y_pred, "KNOWN")

neighbour_y_pred: np.ndarray = neighbour_model.predict(X_valid)
print_metrics(y_valid, neighbour_y_pred, "NEW")

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
KNOWN DATA: MSE: 22235.597905527207
KNOWN DATA: RMSE: 118.01592150129993
KNOWN DATA: MAE: 94.88348599153696

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
NEW DATA: MSE: 31459.358370296555
NEW DATA: RMSE: 141.49494425290862
NEW DATA: MAE: 113.14831115779874



##### 3.5. Multi-Layer Perceptron (MLP) Regressor

In [617]:
from sklearn.neural_network import MLPRegressor

mlp_model = MLPRegressor()

mlp_model.fit(X_train, y_train)

mlp_y_pred: np.ndarray = mlp_model.predict(X_train)
print_metrics(y_train, mlp_y_pred, "KNOWN")

mlp_y_pred: np.ndarray = mlp_model.predict(X_valid)
print_metrics(y_valid, mlp_y_pred, "NEW")

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
KNOWN DATA: MSE: 61094.81297709543
KNOWN DATA: RMSE: 191.0631354971546
KNOWN DATA: MAE: 152.06287885840848

Mean Squared Error, Root Mean Squared Error, Mean Absolute Error (perfect = 0.0)
NEW DATA: MSE: 68901.29620961491
NEW DATA: RMSE: 197.80441286382433
NEW DATA: MAE: 159.59219804155848



In [618]:
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.neuralforecast import NeuralForecastRNN

import prophet

In [619]:
naive_model = NaiveForecaster(strategy="last")



In [620]:
rnn_model = NeuralForecastRNN()