# <font color="red">Forecasting with Gradient Boosting Regressor</font>

# Library Import

In [1]:
# Data manipulation and visualization
import pandas as pd
import math
import json
import multiprocessing

# Preprocessing
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor

import sys
# sys.path.append('../../') # Uncomment this line if running locally
sys.path.append('/kaggle/input/weatherforecasting') # Uncomment this line if running on Kaggle
from historyManagement import *

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Datasets Import

In [2]:
# Load the weather dataset for visibility prediction
# weather_data = pd.read_csv('../../FinalDatasets/finalDataset.csv') # Uncomment this line if running locally
weather_data = pd.read_csv('/kaggle/input/weatherforecasting/finalDataset.csv') # Uncomment this line if running on Kaggle
weather_data['DateTime'] = pd.to_datetime(weather_data['DateTime'])
weather_data.head()

Unnamed: 0,DateTime,Year,Month,Day,Hour,Dew,Precip,PrecipProb,Windgust,WindSpeed,...,Conditions_t+3,Conditions_t+4,Conditions_t+5,Conditions_t+6,Icon_t+1,Icon_t+2,Icon_t+3,Icon_t+4,Icon_t+5,Icon_t+6
0,2023-01-02 00:00:00,2023,1,2,0,15.19,0.0,0.0,10.31,0.24,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2023-01-02 01:00:00,2023,1,2,1,14.72,0.0,0.0,9.72,0.21,...,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0
2,2023-01-02 02:00:00,2023,1,2,2,14.72,0.0,0.0,10.66,0.25,...,0.0,0.0,2.0,2.0,1.0,1.0,1.0,1.0,4.0,4.0
3,2023-01-02 03:00:00,2023,1,2,3,14.23,0.0,0.0,9.91,0.33,...,0.0,2.0,2.0,2.0,1.0,1.0,1.0,4.0,4.0,4.0
4,2023-01-02 04:00:00,2023,1,2,4,14.72,0.0,0.0,10.12,0.2,...,2.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,4.0,4.0


In [3]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14586 entries, 0 to 14585
Columns: 575 entries, DateTime to Icon_t+6
dtypes: datetime64[ns](1), float64(568), int64(6)
memory usage: 64.0 MB


In [4]:
weather_data.describe()

Unnamed: 0,DateTime,Year,Month,Day,Hour,Dew,Precip,PrecipProb,Windgust,WindSpeed,...,Conditions_t+3,Conditions_t+4,Conditions_t+5,Conditions_t+6,Icon_t+1,Icon_t+2,Icon_t+3,Icon_t+4,Icon_t+5,Icon_t+6
count,14586,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,...,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0
mean,2023-11-01 20:30:00,2023.40107,5.727684,15.753599,11.496298,20.789805,0.165143,19.505391,17.075919,8.096791,...,2.044289,2.044358,2.044426,2.044495,3.795283,3.795352,3.79542,3.795489,3.795557,3.795626
min,2023-01-02 00:00:00,2023.0,1.0,1.0,0.0,2.02,0.0,0.0,0.66,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2023-06-02 22:15:00,2023.0,3.0,8.0,5.0,17.1,0.0,0.0,9.17,2.88,...,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0
50%,2023-11-01 20:30:00,2023.0,6.0,16.0,11.0,21.85,0.0,0.0,14.66,7.67,...,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0
75%,2024-04-01 18:45:00,2024.0,8.0,23.0,17.0,24.98,0.01,6.45,23.4175,11.75,...,2.0,2.0,2.0,2.0,5.0,5.0,5.0,5.0,5.0,5.0
max,2024-08-31 17:00:00,2024.0,12.0,31.0,23.0,28.88,44.5,100.0,84.12,71.97,...,5.0,5.0,5.0,5.0,6.0,6.0,6.0,6.0,6.0,6.0
std,,0.490132,3.192277,8.786893,6.921353,4.824925,0.844688,37.410656,10.200773,6.248622,...,1.567964,1.567897,1.567829,1.567761,1.986011,1.985931,1.985852,1.985773,1.985694,1.985614


# Data Split

In [5]:
# Split the dataset by index: first 80% for training, last 20% for testing (no shuffle, maintain serial order)
split_index = int(len(weather_data) * 0.8)
weather_data_train = weather_data.iloc[:split_index]
weather_data_test = weather_data.iloc[split_index:]

# Some Reused Parameters

In [6]:
grid_param = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10, 15, 20],
    'min_samples_leaf': [1, 2, 4, 6],
    'subsample': [0.7, 0.8, 0.9],
    'max_features': ['sqrt', 'log2', None],
    'loss': ['squared_error', 'huber']
}

In [7]:
number_of_model_for_randomized_grid = 2
model_name_for_saving = "Gradient Boosting"
horizon = 6
# core_to_use = max(1, multiprocessing.cpu_count() - 2) # uncomment if you want to use your laptop while training
core_to_use = -1 # uncomment if you want to use all available cores

columns_with_t_plus = [col for col in weather_data.columns if '_t+' in col]
column_to_exclude = columns_with_t_plus + ['DateTime', 'Year']

column_to_predict = [col for col in weather_data.columns if '_t+' in col]
# Remove columns containing 'Conditions' or 'Icon' from column_to_predict
column_to_predict = [col for col in column_to_predict if 'Conditions' not in col and 'Icon' not in col]

print(column_to_exclude)
print(column_to_predict)
print("Core to use:", core_to_use)

['Temp_t+1', 'Temp_t+2', 'Temp_t+3', 'Temp_t+4', 'Temp_t+5', 'Temp_t+6', 'FeelsLike_t+1', 'FeelsLike_t+2', 'FeelsLike_t+3', 'FeelsLike_t+4', 'FeelsLike_t+5', 'FeelsLike_t+6', 'Humidity_t+1', 'Humidity_t+2', 'Humidity_t+3', 'Humidity_t+4', 'Humidity_t+5', 'Humidity_t+6', 'Visibility_t+1', 'Visibility_t+2', 'Visibility_t+3', 'Visibility_t+4', 'Visibility_t+5', 'Visibility_t+6', 'SolarEnergy_t+1', 'SolarEnergy_t+2', 'SolarEnergy_t+3', 'SolarEnergy_t+4', 'SolarEnergy_t+5', 'SolarEnergy_t+6', 'SolarRadiation_t+1', 'SolarRadiation_t+2', 'SolarRadiation_t+3', 'SolarRadiation_t+4', 'SolarRadiation_t+5', 'SolarRadiation_t+6', 'Conditions_t+1', 'Conditions_t+2', 'Conditions_t+3', 'Conditions_t+4', 'Conditions_t+5', 'Conditions_t+6', 'Icon_t+1', 'Icon_t+2', 'Icon_t+3', 'Icon_t+4', 'Icon_t+5', 'Icon_t+6', 'DateTime', 'Year']
['Temp_t+1', 'Temp_t+2', 'Temp_t+3', 'Temp_t+4', 'Temp_t+5', 'Temp_t+6', 'FeelsLike_t+1', 'FeelsLike_t+2', 'FeelsLike_t+3', 'FeelsLike_t+4', 'FeelsLike_t+5', 'FeelsLike_t+6', 

# All Features

In [8]:
X_train_raw = weather_data_train.drop(columns=column_to_exclude)
y_train = weather_data_train[column_to_predict]

In [9]:
X_test_raw = weather_data_test.drop(columns=column_to_exclude)
y_test = weather_data_test[column_to_predict]

In [10]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11668
y_train:  11668

X_test:  2918
y_test:  2918


## MinMax Scaler

In [11]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [12]:
X_train

array([[0.        , 0.03333333, 0.        , ..., 0.16666667, 0.16666667,
        0.16666667],
       [0.        , 0.03333333, 0.04347826, ..., 0.16666667, 0.16666667,
        0.16666667],
       [0.        , 0.03333333, 0.08695652, ..., 0.5       , 0.16666667,
        0.16666667],
       ...,
       [0.36363636, 0.03333333, 0.04347826, ..., 0.83333333, 0.83333333,
        0.83333333],
       [0.36363636, 0.03333333, 0.08695652, ..., 0.83333333, 0.83333333,
        0.83333333],
       [0.36363636, 0.03333333, 0.13043478, ..., 0.83333333, 0.83333333,
        0.83333333]])

In [13]:
X_test

array([[0.36363636, 0.03333333, 0.17391304, ..., 0.66666667, 0.83333333,
        0.83333333],
       [0.36363636, 0.03333333, 0.2173913 , ..., 0.66666667, 0.66666667,
        0.83333333],
       [0.36363636, 0.03333333, 0.26086957, ..., 0.66666667, 0.66666667,
        0.66666667],
       ...,
       [0.63636364, 1.        , 0.65217391, ..., 0.66666667, 0.66666667,
        0.66666667],
       [0.63636364, 1.        , 0.69565217, ..., 1.        , 0.66666667,
        0.66666667],
       [0.63636364, 1.        , 0.73913043, ..., 0.83333333, 1.        ,
        0.66666667]])

### Default Parameters

In [14]:
gbr_model = GradientBoostingRegressor(random_state=42)
training_model = MultiOutputRegressor(gbr_model)
training_model

In [15]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 1h 3min 22s, sys: 703 ms, total: 1h 3min 22s
Wall time: 1h 3min 23s


In [16]:
y_pred = training_model.predict(X_test)
y_pred

array([[ 28.58877246,  28.90606942,  29.8791327 , ..., 385.27856433,
        619.37225553, 762.72602353],
       [ 29.0381504 ,  29.60140327,  30.45830111, ..., 608.47332157,
        794.7096399 , 897.63997678],
       [ 30.64543027,  31.29251091,  32.01119235, ..., 764.16654453,
        875.09078932, 833.38945938],
       ...,
       [ 32.92386518,  32.0943843 ,  31.22469018, ...,  20.83637753,
         11.4300263 ,   3.73853322],
       [ 31.14240146,  30.53848848,  29.51148013, ...,   3.16084742,
          8.10768073, -10.06561889],
       [ 30.39551377,  29.93813687,  29.42124934, ...,   9.1292137 ,
          5.25636933, -28.57637738]])

In [17]:
y_test

Unnamed: 0,Temp_t+1,Temp_t+2,Temp_t+3,Temp_t+4,Temp_t+5,Temp_t+6,FeelsLike_t+1,FeelsLike_t+2,FeelsLike_t+3,FeelsLike_t+4,...,SolarEnergy_t+3,SolarEnergy_t+4,SolarEnergy_t+5,SolarEnergy_t+6,SolarRadiation_t+1,SolarRadiation_t+2,SolarRadiation_t+3,SolarRadiation_t+4,SolarRadiation_t+5,SolarRadiation_t+6
11668,28.78,29.63,30.87,31.94,33.42,34.10,35.16,38.06,40.18,40.30,...,0.29,0.63,0.86,2.05,0.00,3.13,79.55,181.94,229.71,574.29
11669,29.63,30.87,31.94,33.42,34.10,35.00,38.06,40.18,40.30,39.58,...,0.63,0.86,2.05,2.76,3.13,79.55,181.94,229.71,574.29,775.42
11670,30.87,31.94,33.42,34.10,35.00,35.63,40.18,40.30,39.58,40.38,...,0.86,2.05,2.76,3.19,79.55,181.94,229.71,574.29,775.42,887.00
11671,31.94,33.42,34.10,35.00,35.63,37.05,40.30,39.58,40.38,43.03,...,2.05,2.76,3.19,3.52,181.94,229.71,574.29,775.42,887.00,976.61
11672,33.42,34.10,35.00,35.63,37.05,37.94,39.58,40.38,43.03,42.41,...,2.76,3.19,3.52,2.99,229.71,574.29,775.42,887.00,976.61,835.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14581,33.45,33.12,32.45,31.42,30.19,29.50,39.85,39.42,38.52,37.19,...,1.44,0.78,0.29,0.00,659.35,538.10,395.90,210.35,82.81,0.00
14582,33.12,32.45,31.42,30.19,29.50,29.02,39.42,38.52,37.19,35.19,...,0.78,0.29,0.00,0.00,538.10,395.90,210.35,82.81,0.00,0.00
14583,32.45,31.42,30.19,29.50,29.02,28.61,38.52,37.19,35.19,34.10,...,0.29,0.00,0.00,0.00,395.90,210.35,82.81,0.00,0.00,0.00
14584,31.42,30.19,29.50,29.02,28.61,28.21,37.19,35.19,34.10,33.60,...,0.00,0.00,0.00,0.00,210.35,82.81,0.00,0.00,0.00,0.00


In [18]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [19]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1746.60086595712
MAE Score: 10.999891273569627
R2 Score: 0.6533680103834993
RMSE Score: 41.792354156677035


In [20]:
performance_metrics = create_history_df_regression(y_test, y_pred, horizon)
performance_metrics

Unnamed: 0,Target,MSE,MAE,R2,RMSE
0,Temp_t+1,0.690175,0.568093,0.89073,0.830768
1,Temp_t+2,1.158717,0.737658,0.816544,1.076437
2,Temp_t+3,1.495436,0.837528,0.763236,1.22288
3,Temp_t+4,1.87576,0.958434,0.70301,1.369584
4,Temp_t+5,2.127149,1.024107,0.663141,1.458475
5,Temp_t+6,2.306059,1.071346,0.634579,1.518571
6,Temp,1.608883,0.866194,0.745207,1.268417
7,FeelsLike_t+1,3.85625,1.366738,0.845613,1.963734
8,FeelsLike_t+2,6.228925,1.770192,0.75063,2.495782
9,FeelsLike_t+3,8.252628,2.062029,0.669609,2.872739


In [21]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "MinMaxScaler",
    'performance_metrics_df': performance_metrics,
    'parameters': json.dumps(make_serializable(training_model.get_params()))
}

save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 1746.600866, R²: 0.653368
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Randomized Grid Search

## Standard Scaler

In [22]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [23]:
X_train

array([[-1.29540078, -1.55866853, -1.66051296, ..., -1.24648639,
        -1.24626261, -1.24603889],
       [-1.29540078, -1.55866853, -1.51607747, ..., -1.24648639,
        -1.24626261, -1.24603889],
       [-1.29540078, -1.55866853, -1.37164198, ..., -0.26491096,
        -1.24626261, -1.24603889],
       ...,
       [-0.15173127, -1.55866853, -1.51607747, ...,  0.71666448,
         0.71680079,  0.71693711],
       [-0.15173127, -1.55866853, -1.37164198, ...,  0.71666448,
         0.71680079,  0.71693711],
       [-0.15173127, -1.55866853, -1.2272065 , ...,  0.71666448,
         0.71680079,  0.71693711]])

In [24]:
X_test

array([[-0.15173127, -1.55866853, -1.08277101, ...,  0.22587676,
         0.71680079,  0.71693711],
       [-0.15173127, -1.55866853, -0.93833552, ...,  0.22587676,
         0.22603494,  0.71693711],
       [-0.15173127, -1.55866853, -0.79390003, ...,  0.22587676,
         0.22603494,  0.22619311],
       ...,
       [ 0.70602087,  1.74193343,  0.50601936, ...,  0.22587676,
         0.22603494,  0.22619311],
       [ 0.70602087,  1.74193343,  0.65045485, ...,  1.2074522 ,
         0.22603494,  0.22619311],
       [ 0.70602087,  1.74193343,  0.79489033, ...,  0.71666448,
         1.20756664,  0.22619311]])

### Default Parameters

In [25]:
gbr_model = GradientBoostingRegressor(random_state=42)
training_model = MultiOutputRegressor(gbr_model)
training_model

In [26]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 1h 3min 25s, sys: 722 ms, total: 1h 3min 26s
Wall time: 1h 3min 28s


In [27]:
y_pred = training_model.predict(X_test)
y_pred

array([[ 28.58877246,  28.90606942,  29.8791327 , ..., 385.27856433,
        619.37225553, 762.72602353],
       [ 29.0381504 ,  29.60140327,  30.45830111, ..., 608.47332157,
        794.7096399 , 897.63997678],
       [ 30.64543027,  31.29251091,  32.01119235, ..., 764.16654453,
        875.09078932, 833.38945938],
       ...,
       [ 32.92386518,  32.0943843 ,  31.22469018, ...,  20.83637753,
         11.4300263 ,   3.73853322],
       [ 31.14240146,  30.53848848,  29.51148013, ...,   3.16084742,
          8.10768073, -10.06561889],
       [ 30.39551377,  29.93813687,  29.42124934, ...,   9.1292137 ,
          5.25636933, -28.57637738]])

In [28]:
y_test

Unnamed: 0,Temp_t+1,Temp_t+2,Temp_t+3,Temp_t+4,Temp_t+5,Temp_t+6,FeelsLike_t+1,FeelsLike_t+2,FeelsLike_t+3,FeelsLike_t+4,...,SolarEnergy_t+3,SolarEnergy_t+4,SolarEnergy_t+5,SolarEnergy_t+6,SolarRadiation_t+1,SolarRadiation_t+2,SolarRadiation_t+3,SolarRadiation_t+4,SolarRadiation_t+5,SolarRadiation_t+6
11668,28.78,29.63,30.87,31.94,33.42,34.10,35.16,38.06,40.18,40.30,...,0.29,0.63,0.86,2.05,0.00,3.13,79.55,181.94,229.71,574.29
11669,29.63,30.87,31.94,33.42,34.10,35.00,38.06,40.18,40.30,39.58,...,0.63,0.86,2.05,2.76,3.13,79.55,181.94,229.71,574.29,775.42
11670,30.87,31.94,33.42,34.10,35.00,35.63,40.18,40.30,39.58,40.38,...,0.86,2.05,2.76,3.19,79.55,181.94,229.71,574.29,775.42,887.00
11671,31.94,33.42,34.10,35.00,35.63,37.05,40.30,39.58,40.38,43.03,...,2.05,2.76,3.19,3.52,181.94,229.71,574.29,775.42,887.00,976.61
11672,33.42,34.10,35.00,35.63,37.05,37.94,39.58,40.38,43.03,42.41,...,2.76,3.19,3.52,2.99,229.71,574.29,775.42,887.00,976.61,835.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14581,33.45,33.12,32.45,31.42,30.19,29.50,39.85,39.42,38.52,37.19,...,1.44,0.78,0.29,0.00,659.35,538.10,395.90,210.35,82.81,0.00
14582,33.12,32.45,31.42,30.19,29.50,29.02,39.42,38.52,37.19,35.19,...,0.78,0.29,0.00,0.00,538.10,395.90,210.35,82.81,0.00,0.00
14583,32.45,31.42,30.19,29.50,29.02,28.61,38.52,37.19,35.19,34.10,...,0.29,0.00,0.00,0.00,395.90,210.35,82.81,0.00,0.00,0.00
14584,31.42,30.19,29.50,29.02,28.61,28.21,37.19,35.19,34.10,33.60,...,0.00,0.00,0.00,0.00,210.35,82.81,0.00,0.00,0.00,0.00


In [29]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [30]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1747.0080823532257
MAE Score: 11.002342749294431
R2 Score: 0.6533589184031183
RMSE Score: 41.7972257734078


In [31]:
performance_metrics = create_history_df_regression(y_test, y_pred, horizon)
performance_metrics

Unnamed: 0,Target,MSE,MAE,R2,RMSE
0,Temp_t+1,0.690178,0.568101,0.89073,0.830769
1,Temp_t+2,1.158775,0.737674,0.816534,1.076464
2,Temp_t+3,1.494966,0.837317,0.76331,1.222688
3,Temp_t+4,1.875766,0.958427,0.703009,1.369586
4,Temp_t+5,2.127171,1.024152,0.663138,1.458482
5,Temp_t+6,2.306532,1.071403,0.634504,1.518727
6,Temp,1.608898,0.866179,0.745204,1.268423
7,FeelsLike_t+1,3.856304,1.366768,0.845611,1.963748
8,FeelsLike_t+2,6.229377,1.770193,0.750612,2.495872
9,FeelsLike_t+3,8.250846,2.061913,0.66968,2.872429


In [32]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "StandardScaler",
    'performance_metrics_df': performance_metrics,
    'parameters': json.dumps(make_serializable(training_model.get_params()))
}

save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 1747.008082, R²: 0.653359
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

## Robust Scaler

In [33]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [34]:
X_train

array([[-0.66666667, -0.93333333, -0.91666667, ..., -0.75      ,
        -0.75      , -0.75      ],
       [-0.66666667, -0.93333333, -0.83333333, ..., -0.75      ,
        -0.75      , -0.75      ],
       [-0.66666667, -0.93333333, -0.75      , ..., -0.25      ,
        -0.75      , -0.75      ],
       ...,
       [ 0.        , -0.93333333, -0.83333333, ...,  0.25      ,
         0.25      ,  0.25      ],
       [ 0.        , -0.93333333, -0.75      , ...,  0.25      ,
         0.25      ,  0.25      ],
       [ 0.        , -0.93333333, -0.66666667, ...,  0.25      ,
         0.25      ,  0.25      ]])

In [35]:
X_test

array([[ 0.        , -0.93333333, -0.58333333, ...,  0.        ,
         0.25      ,  0.25      ],
       [ 0.        , -0.93333333, -0.5       , ...,  0.        ,
         0.        ,  0.25      ],
       [ 0.        , -0.93333333, -0.41666667, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.5       ,  1.        ,  0.33333333, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.5       ,  1.        ,  0.41666667, ...,  0.5       ,
         0.        ,  0.        ],
       [ 0.5       ,  1.        ,  0.5       , ...,  0.25      ,
         0.5       ,  0.        ]])

### Default Parameters

In [36]:
gbr_model = GradientBoostingRegressor(random_state=42)
training_model = MultiOutputRegressor(gbr_model)
training_model

In [37]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 1h 3min 19s, sys: 727 ms, total: 1h 3min 20s
Wall time: 1h 3min 21s


In [38]:
y_pred = training_model.predict(X_test)
y_pred

array([[ 28.58877246,  28.90606942,  29.8791327 , ..., 385.27856433,
        619.37225553, 762.72602353],
       [ 29.0381504 ,  29.60140327,  30.45830111, ..., 608.47332157,
        794.7096399 , 897.63997678],
       [ 30.64543027,  31.29251091,  32.01119235, ..., 764.16654453,
        875.09078932, 833.38945938],
       ...,
       [ 32.92386518,  32.0943843 ,  31.22469018, ...,  20.83637753,
         11.4300263 ,   3.73853322],
       [ 31.14240146,  30.53848848,  29.51148013, ...,   3.16084742,
          8.10768073, -10.06561889],
       [ 30.39551377,  29.93813687,  29.42124934, ...,   9.1292137 ,
          5.25636933, -28.57637738]])

In [39]:
y_test

Unnamed: 0,Temp_t+1,Temp_t+2,Temp_t+3,Temp_t+4,Temp_t+5,Temp_t+6,FeelsLike_t+1,FeelsLike_t+2,FeelsLike_t+3,FeelsLike_t+4,...,SolarEnergy_t+3,SolarEnergy_t+4,SolarEnergy_t+5,SolarEnergy_t+6,SolarRadiation_t+1,SolarRadiation_t+2,SolarRadiation_t+3,SolarRadiation_t+4,SolarRadiation_t+5,SolarRadiation_t+6
11668,28.78,29.63,30.87,31.94,33.42,34.10,35.16,38.06,40.18,40.30,...,0.29,0.63,0.86,2.05,0.00,3.13,79.55,181.94,229.71,574.29
11669,29.63,30.87,31.94,33.42,34.10,35.00,38.06,40.18,40.30,39.58,...,0.63,0.86,2.05,2.76,3.13,79.55,181.94,229.71,574.29,775.42
11670,30.87,31.94,33.42,34.10,35.00,35.63,40.18,40.30,39.58,40.38,...,0.86,2.05,2.76,3.19,79.55,181.94,229.71,574.29,775.42,887.00
11671,31.94,33.42,34.10,35.00,35.63,37.05,40.30,39.58,40.38,43.03,...,2.05,2.76,3.19,3.52,181.94,229.71,574.29,775.42,887.00,976.61
11672,33.42,34.10,35.00,35.63,37.05,37.94,39.58,40.38,43.03,42.41,...,2.76,3.19,3.52,2.99,229.71,574.29,775.42,887.00,976.61,835.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14581,33.45,33.12,32.45,31.42,30.19,29.50,39.85,39.42,38.52,37.19,...,1.44,0.78,0.29,0.00,659.35,538.10,395.90,210.35,82.81,0.00
14582,33.12,32.45,31.42,30.19,29.50,29.02,39.42,38.52,37.19,35.19,...,0.78,0.29,0.00,0.00,538.10,395.90,210.35,82.81,0.00,0.00
14583,32.45,31.42,30.19,29.50,29.02,28.61,38.52,37.19,35.19,34.10,...,0.29,0.00,0.00,0.00,395.90,210.35,82.81,0.00,0.00,0.00
14584,31.42,30.19,29.50,29.02,28.61,28.21,37.19,35.19,34.10,33.60,...,0.00,0.00,0.00,0.00,210.35,82.81,0.00,0.00,0.00,0.00


In [40]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [41]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1747.0835634446785
MAE Score: 11.002242672128745
R2 Score: 0.6533643330544895
RMSE Score: 41.79812870745147


In [42]:
performance_metrics = create_history_df_regression(y_test, y_pred, horizon)
performance_metrics

Unnamed: 0,Target,MSE,MAE,R2,RMSE
0,Temp_t+1,0.690202,0.56812,0.890726,0.830784
1,Temp_t+2,1.158718,0.737661,0.816543,1.076438
2,Temp_t+3,1.495532,0.837503,0.763221,1.222919
3,Temp_t+4,1.875759,0.958419,0.70301,1.369584
4,Temp_t+5,2.127172,1.024156,0.663138,1.458483
5,Temp_t+6,2.306135,1.071397,0.634567,1.518596
6,Temp,1.60892,0.866209,0.745201,1.268432
7,FeelsLike_t+1,3.854022,1.366424,0.845702,1.963166
8,FeelsLike_t+2,6.229508,1.77033,0.750607,2.495898
9,FeelsLike_t+3,8.250955,2.062008,0.669676,2.872448


In [43]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "RobustScaler",
    'performance_metrics_df': performance_metrics,
    'parameters': json.dumps(make_serializable(training_model.get_params()))
}

save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 1747.083563, R²: 0.653364
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

# All Performance

In [44]:
show_model_history(model_type='regression', model_name=model_name_for_saving)


📊 REGRESSION Model Performance History
🔍 Filters Applied:
   • Model Name contains: 'Gradient Boosting'
   • Target: 'Overall' only
----------------------------------------------------------------------------------------------------
            Model         Scaler  Target         MSE       MAE       R2      RMSE
Gradient Boosting   MinMaxScaler Overall 1746.600866 10.999891 0.653368 41.792354
Gradient Boosting   RobustScaler Overall 1747.083563 11.002243 0.653364 41.798129
Gradient Boosting StandardScaler Overall 1747.008082 11.002343 0.653359 41.797226

📈 Total models shown: 3
🏆 Best R² Score: 0.653368
    Model: Gradient Boosting
    Scaler: MinMaxScaler
    Target: Overall
    MSE: 1746.600866


# <center><font size="50" color="red">Thank You</font></center>