# <font color="red">Forecasting with Lasso Regressor</font>

# Library Import

In [1]:
# Data manipulation and visualization
import pandas as pd
import math
import json
import multiprocessing

# Preprocessing
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import Lasso as LassoR

import sys
# sys.path.append('../../') # Uncomment this line if running locally
sys.path.append('/kaggle/input/weatherforecasting') # Uncomment this line if running on Kaggle
from historyManagement import *

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Datasets Import

In [2]:
# Load the weather dataset for visibility prediction
# weather_data = pd.read_csv('../../FinalDatasets/finalDataset.csv') # Uncomment this line if running locally
weather_data = pd.read_csv('/kaggle/input/weatherforecasting/finalDataset.csv') # Uncomment this line if running on Kaggle
weather_data['DateTime'] = pd.to_datetime(weather_data['DateTime'])
weather_data.head()

Unnamed: 0,DateTime,Year,Month,Day,Hour,Dew,Precip,PrecipProb,Windgust,WindSpeed,...,Conditions_t+3,Conditions_t+4,Conditions_t+5,Conditions_t+6,Icon_t+1,Icon_t+2,Icon_t+3,Icon_t+4,Icon_t+5,Icon_t+6
0,2023-01-02 00:00:00,2023,1,2,0,15.19,0.0,0.0,10.31,0.24,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2023-01-02 01:00:00,2023,1,2,1,14.72,0.0,0.0,9.72,0.21,...,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0
2,2023-01-02 02:00:00,2023,1,2,2,14.72,0.0,0.0,10.66,0.25,...,0.0,0.0,2.0,2.0,1.0,1.0,1.0,1.0,4.0,4.0
3,2023-01-02 03:00:00,2023,1,2,3,14.23,0.0,0.0,9.91,0.33,...,0.0,2.0,2.0,2.0,1.0,1.0,1.0,4.0,4.0,4.0
4,2023-01-02 04:00:00,2023,1,2,4,14.72,0.0,0.0,10.12,0.2,...,2.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,4.0,4.0


In [3]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14586 entries, 0 to 14585
Columns: 575 entries, DateTime to Icon_t+6
dtypes: datetime64[ns](1), float64(568), int64(6)
memory usage: 64.0 MB


In [4]:
weather_data.describe()

Unnamed: 0,DateTime,Year,Month,Day,Hour,Dew,Precip,PrecipProb,Windgust,WindSpeed,...,Conditions_t+3,Conditions_t+4,Conditions_t+5,Conditions_t+6,Icon_t+1,Icon_t+2,Icon_t+3,Icon_t+4,Icon_t+5,Icon_t+6
count,14586,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,...,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0,14586.0
mean,2023-11-01 20:30:00,2023.40107,5.727684,15.753599,11.496298,20.789805,0.165143,19.505391,17.075919,8.096791,...,2.044289,2.044358,2.044426,2.044495,3.795283,3.795352,3.79542,3.795489,3.795557,3.795626
min,2023-01-02 00:00:00,2023.0,1.0,1.0,0.0,2.02,0.0,0.0,0.66,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2023-06-02 22:15:00,2023.0,3.0,8.0,5.0,17.1,0.0,0.0,9.17,2.88,...,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0
50%,2023-11-01 20:30:00,2023.0,6.0,16.0,11.0,21.85,0.0,0.0,14.66,7.67,...,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0
75%,2024-04-01 18:45:00,2024.0,8.0,23.0,17.0,24.98,0.01,6.45,23.4175,11.75,...,2.0,2.0,2.0,2.0,5.0,5.0,5.0,5.0,5.0,5.0
max,2024-08-31 17:00:00,2024.0,12.0,31.0,23.0,28.88,44.5,100.0,84.12,71.97,...,5.0,5.0,5.0,5.0,6.0,6.0,6.0,6.0,6.0,6.0
std,,0.490132,3.192277,8.786893,6.921353,4.824925,0.844688,37.410656,10.200773,6.248622,...,1.567964,1.567897,1.567829,1.567761,1.986011,1.985931,1.985852,1.985773,1.985694,1.985614


# Data Split

In [5]:
# Split the dataset by index: first 80% for training, last 20% for testing (no shuffle, maintain serial order)
split_index = int(len(weather_data) * 0.8)
weather_data_train = weather_data.iloc[:split_index]
weather_data_test = weather_data.iloc[split_index:]

# Some Reused Parameters

In [6]:
grid_param = {
    'alpha': [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 50.0, 100.0],
    'max_iter': [10000],
    'tol': [1e-6, 1e-5, 1e-4, 1e-3, 1e-2],
    'selection': ['cyclic', 'random'],
    'positive': [False, True],
    'fit_intercept': [True, False],
    'precompute': [False, True, 'auto'],
    'warm_start': [False, True]
}

In [7]:
number_of_model_for_randomized_grid = 10
model_name_for_saving = "LassoR"
horizon = 6
# core_to_use = max(1, multiprocessing.cpu_count() - 2) # uncomment if you want to use your laptop while training
core_to_use = -1 # uncomment if you want to use all available cores

columns_with_t_plus = [col for col in weather_data.columns if '_t+' in col]
column_to_exclude = columns_with_t_plus + ['DateTime', 'Year']

column_to_predict = [col for col in weather_data.columns if '_t+' in col]
# Remove columns containing 'Conditions' or 'Icon' from column_to_predict
column_to_predict = [col for col in column_to_predict if 'Conditions' not in col and 'Icon' not in col]

print(column_to_exclude)
print(column_to_predict)
print("Core to use:", core_to_use)

['Temp_t+1', 'Temp_t+2', 'Temp_t+3', 'Temp_t+4', 'Temp_t+5', 'Temp_t+6', 'FeelsLike_t+1', 'FeelsLike_t+2', 'FeelsLike_t+3', 'FeelsLike_t+4', 'FeelsLike_t+5', 'FeelsLike_t+6', 'Humidity_t+1', 'Humidity_t+2', 'Humidity_t+3', 'Humidity_t+4', 'Humidity_t+5', 'Humidity_t+6', 'Visibility_t+1', 'Visibility_t+2', 'Visibility_t+3', 'Visibility_t+4', 'Visibility_t+5', 'Visibility_t+6', 'SolarEnergy_t+1', 'SolarEnergy_t+2', 'SolarEnergy_t+3', 'SolarEnergy_t+4', 'SolarEnergy_t+5', 'SolarEnergy_t+6', 'SolarRadiation_t+1', 'SolarRadiation_t+2', 'SolarRadiation_t+3', 'SolarRadiation_t+4', 'SolarRadiation_t+5', 'SolarRadiation_t+6', 'Conditions_t+1', 'Conditions_t+2', 'Conditions_t+3', 'Conditions_t+4', 'Conditions_t+5', 'Conditions_t+6', 'Icon_t+1', 'Icon_t+2', 'Icon_t+3', 'Icon_t+4', 'Icon_t+5', 'Icon_t+6', 'DateTime', 'Year']
['Temp_t+1', 'Temp_t+2', 'Temp_t+3', 'Temp_t+4', 'Temp_t+5', 'Temp_t+6', 'FeelsLike_t+1', 'FeelsLike_t+2', 'FeelsLike_t+3', 'FeelsLike_t+4', 'FeelsLike_t+5', 'FeelsLike_t+6', 

# All Features

In [8]:
X_train_raw = weather_data_train.drop(columns=column_to_exclude)
y_train = weather_data_train[column_to_predict]

In [9]:
X_test_raw = weather_data_test.drop(columns=column_to_exclude)
y_test = weather_data_test[column_to_predict]

In [10]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11668
y_train:  11668

X_test:  2918
y_test:  2918


## MinMax Scaler

In [11]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [12]:
X_train

array([[0.        , 0.03333333, 0.        , ..., 0.16666667, 0.16666667,
        0.16666667],
       [0.        , 0.03333333, 0.04347826, ..., 0.16666667, 0.16666667,
        0.16666667],
       [0.        , 0.03333333, 0.08695652, ..., 0.5       , 0.16666667,
        0.16666667],
       ...,
       [0.36363636, 0.03333333, 0.04347826, ..., 0.83333333, 0.83333333,
        0.83333333],
       [0.36363636, 0.03333333, 0.08695652, ..., 0.83333333, 0.83333333,
        0.83333333],
       [0.36363636, 0.03333333, 0.13043478, ..., 0.83333333, 0.83333333,
        0.83333333]])

In [13]:
X_test

array([[0.36363636, 0.03333333, 0.17391304, ..., 0.66666667, 0.83333333,
        0.83333333],
       [0.36363636, 0.03333333, 0.2173913 , ..., 0.66666667, 0.66666667,
        0.83333333],
       [0.36363636, 0.03333333, 0.26086957, ..., 0.66666667, 0.66666667,
        0.66666667],
       ...,
       [0.63636364, 1.        , 0.65217391, ..., 0.66666667, 0.66666667,
        0.66666667],
       [0.63636364, 1.        , 0.69565217, ..., 1.        , 0.66666667,
        0.66666667],
       [0.63636364, 1.        , 0.73913043, ..., 0.83333333, 1.        ,
        0.66666667]])

### Default Parameters

In [14]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [15]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 49min 31s, sys: 2.2 s, total: 49min 33s
Wall time: 12min 36s


In [16]:
y_pred = training_model.predict(X_test)
y_pred

array([[ 28.48908268,  28.44888813,  29.14367742, ..., 369.16552825,
        573.31270701, 796.23391956],
       [ 28.67219912,  29.42068111,  30.85097198, ..., 576.08441666,
        801.35997266, 931.12291153],
       [ 29.99731981,  31.26990565,  33.1890853 , ..., 791.36925279,
        922.6971376 , 916.4499918 ],
       ...,
       [ 32.40758542,  31.59320494,  30.73953535, ..., 136.1382418 ,
        130.12877877, 125.97502919],
       [ 31.4814357 ,  30.49753588,  29.94392116, ..., 134.71507661,
        132.93381269, 107.034647  ],
       [ 30.33080361,  29.64925162,  29.4605843 , ..., 134.85805695,
        119.2041516 , 127.73276751]])

In [17]:
y_test

Unnamed: 0,Temp_t+1,Temp_t+2,Temp_t+3,Temp_t+4,Temp_t+5,Temp_t+6,FeelsLike_t+1,FeelsLike_t+2,FeelsLike_t+3,FeelsLike_t+4,...,SolarEnergy_t+3,SolarEnergy_t+4,SolarEnergy_t+5,SolarEnergy_t+6,SolarRadiation_t+1,SolarRadiation_t+2,SolarRadiation_t+3,SolarRadiation_t+4,SolarRadiation_t+5,SolarRadiation_t+6
11668,28.78,29.63,30.87,31.94,33.42,34.10,35.16,38.06,40.18,40.30,...,0.29,0.63,0.86,2.05,0.00,3.13,79.55,181.94,229.71,574.29
11669,29.63,30.87,31.94,33.42,34.10,35.00,38.06,40.18,40.30,39.58,...,0.63,0.86,2.05,2.76,3.13,79.55,181.94,229.71,574.29,775.42
11670,30.87,31.94,33.42,34.10,35.00,35.63,40.18,40.30,39.58,40.38,...,0.86,2.05,2.76,3.19,79.55,181.94,229.71,574.29,775.42,887.00
11671,31.94,33.42,34.10,35.00,35.63,37.05,40.30,39.58,40.38,43.03,...,2.05,2.76,3.19,3.52,181.94,229.71,574.29,775.42,887.00,976.61
11672,33.42,34.10,35.00,35.63,37.05,37.94,39.58,40.38,43.03,42.41,...,2.76,3.19,3.52,2.99,229.71,574.29,775.42,887.00,976.61,835.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14581,33.45,33.12,32.45,31.42,30.19,29.50,39.85,39.42,38.52,37.19,...,1.44,0.78,0.29,0.00,659.35,538.10,395.90,210.35,82.81,0.00
14582,33.12,32.45,31.42,30.19,29.50,29.02,39.42,38.52,37.19,35.19,...,0.78,0.29,0.00,0.00,538.10,395.90,210.35,82.81,0.00,0.00
14583,32.45,31.42,30.19,29.50,29.02,28.61,38.52,37.19,35.19,34.10,...,0.29,0.00,0.00,0.00,395.90,210.35,82.81,0.00,0.00,0.00
14584,31.42,30.19,29.50,29.02,28.61,28.21,37.19,35.19,34.10,33.60,...,0.00,0.00,0.00,0.00,210.35,82.81,0.00,0.00,0.00,0.00


In [18]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [19]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1884.5211652598964
MAE Score: 12.539218793081568
R2 Score: 0.7014882262302587
RMSE Score: 43.41107192018986


In [20]:
performance_metrics = create_history_df_regression(y_test, y_pred, horizon)
performance_metrics

Unnamed: 0,Target,MSE,MAE,R2,RMSE
0,Temp_t+1,0.678739,0.566602,0.892541,0.823856
1,Temp_t+2,1.110168,0.72172,0.82423,1.053645
2,Temp_t+3,1.454618,0.818268,0.769698,1.206076
3,Temp_t+4,1.817955,0.925919,0.712163,1.348316
4,Temp_t+5,2.067086,0.990398,0.672653,1.437736
5,Temp_t+6,2.249415,1.04044,0.643555,1.499805
6,Temp,1.562997,0.843891,0.752473,1.250199
7,FeelsLike_t+1,3.608769,1.311736,0.855521,1.899676
8,FeelsLike_t+2,5.774312,1.678929,0.76883,2.40298
9,FeelsLike_t+3,7.16635,1.872392,0.713097,2.677004


In [21]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "MinMaxScaler",
    'performance_metrics_df': performance_metrics,
    'parameters': json.dumps(training_model.get_params())
}

save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 1884.521165, R²: 0.701488
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

## Standard Scaler

In [22]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [23]:
X_train

array([[-1.29540078, -1.55866853, -1.66051296, ..., -1.24648639,
        -1.24626261, -1.24603889],
       [-1.29540078, -1.55866853, -1.51607747, ..., -1.24648639,
        -1.24626261, -1.24603889],
       [-1.29540078, -1.55866853, -1.37164198, ..., -0.26491096,
        -1.24626261, -1.24603889],
       ...,
       [-0.15173127, -1.55866853, -1.51607747, ...,  0.71666448,
         0.71680079,  0.71693711],
       [-0.15173127, -1.55866853, -1.37164198, ...,  0.71666448,
         0.71680079,  0.71693711],
       [-0.15173127, -1.55866853, -1.2272065 , ...,  0.71666448,
         0.71680079,  0.71693711]])

In [24]:
X_test

array([[-0.15173127, -1.55866853, -1.08277101, ...,  0.22587676,
         0.71680079,  0.71693711],
       [-0.15173127, -1.55866853, -0.93833552, ...,  0.22587676,
         0.22603494,  0.71693711],
       [-0.15173127, -1.55866853, -0.79390003, ...,  0.22587676,
         0.22603494,  0.22619311],
       ...,
       [ 0.70602087,  1.74193343,  0.50601936, ...,  0.22587676,
         0.22603494,  0.22619311],
       [ 0.70602087,  1.74193343,  0.65045485, ...,  1.2074522 ,
         0.22603494,  0.22619311],
       [ 0.70602087,  1.74193343,  0.79489033, ...,  0.71666448,
         1.20756664,  0.22619311]])

### Default Parameters

In [25]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [26]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 1h 27min 20s, sys: 3.75 s, total: 1h 27min 24s
Wall time: 22min 14s


In [27]:
y_pred = training_model.predict(X_test)
y_pred

array([[ 29.10889117,  29.03446676,  29.63399219, ..., 368.48021581,
        572.74858988, 795.21341142],
       [ 28.79704618,  29.5745894 ,  31.0331712 , ..., 574.69761012,
        799.09625756, 931.73908683],
       [ 30.05483008,  31.40458219,  33.47266724, ..., 790.76006844,
        922.66581095, 917.43455427],
       ...,
       [ 32.69598956,  32.48651319,  31.91479085, ..., 136.77755722,
        131.00643665, 126.48182678],
       [ 31.94530248,  31.40879985,  31.29398271, ..., 135.16927581,
        134.60672572, 108.11596699],
       [ 30.78626274,  30.64660608,  30.98552339, ..., 137.06272796,
        120.07438324, 129.25411879]])

In [28]:
y_test

Unnamed: 0,Temp_t+1,Temp_t+2,Temp_t+3,Temp_t+4,Temp_t+5,Temp_t+6,FeelsLike_t+1,FeelsLike_t+2,FeelsLike_t+3,FeelsLike_t+4,...,SolarEnergy_t+3,SolarEnergy_t+4,SolarEnergy_t+5,SolarEnergy_t+6,SolarRadiation_t+1,SolarRadiation_t+2,SolarRadiation_t+3,SolarRadiation_t+4,SolarRadiation_t+5,SolarRadiation_t+6
11668,28.78,29.63,30.87,31.94,33.42,34.10,35.16,38.06,40.18,40.30,...,0.29,0.63,0.86,2.05,0.00,3.13,79.55,181.94,229.71,574.29
11669,29.63,30.87,31.94,33.42,34.10,35.00,38.06,40.18,40.30,39.58,...,0.63,0.86,2.05,2.76,3.13,79.55,181.94,229.71,574.29,775.42
11670,30.87,31.94,33.42,34.10,35.00,35.63,40.18,40.30,39.58,40.38,...,0.86,2.05,2.76,3.19,79.55,181.94,229.71,574.29,775.42,887.00
11671,31.94,33.42,34.10,35.00,35.63,37.05,40.30,39.58,40.38,43.03,...,2.05,2.76,3.19,3.52,181.94,229.71,574.29,775.42,887.00,976.61
11672,33.42,34.10,35.00,35.63,37.05,37.94,39.58,40.38,43.03,42.41,...,2.76,3.19,3.52,2.99,229.71,574.29,775.42,887.00,976.61,835.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14581,33.45,33.12,32.45,31.42,30.19,29.50,39.85,39.42,38.52,37.19,...,1.44,0.78,0.29,0.00,659.35,538.10,395.90,210.35,82.81,0.00
14582,33.12,32.45,31.42,30.19,29.50,29.02,39.42,38.52,37.19,35.19,...,0.78,0.29,0.00,0.00,538.10,395.90,210.35,82.81,0.00,0.00
14583,32.45,31.42,30.19,29.50,29.02,28.61,38.52,37.19,35.19,34.10,...,0.29,0.00,0.00,0.00,395.90,210.35,82.81,0.00,0.00,0.00
14584,31.42,30.19,29.50,29.02,28.61,28.21,37.19,35.19,34.10,33.60,...,0.00,0.00,0.00,0.00,210.35,82.81,0.00,0.00,0.00,0.00


In [29]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [30]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1887.5960509548559
MAE Score: 12.574724821865026
R2 Score: 0.7115509368989719
RMSE Score: 43.446473400666896


In [31]:
performance_metrics = create_history_df_regression(y_test, y_pred, horizon)
performance_metrics

Unnamed: 0,Target,MSE,MAE,R2,RMSE
0,Temp_t+1,0.67603,0.564283,0.89297,0.82221
1,Temp_t+2,1.108073,0.722614,0.824562,1.05265
2,Temp_t+3,1.433959,0.814496,0.772969,1.19748
3,Temp_t+4,1.813191,0.93092,0.712917,1.346548
4,Temp_t+5,2.076848,0.999917,0.671107,1.441127
5,Temp_t+6,2.266213,1.050405,0.640893,1.505395
6,Temp,1.562386,0.847106,0.75257,1.249954
7,FeelsLike_t+1,3.6565,1.32311,0.85361,1.912198
8,FeelsLike_t+2,5.842719,1.68639,0.766091,2.417172
9,FeelsLike_t+3,7.269935,1.872713,0.70895,2.696282


In [32]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "StandardScaler",
    'performance_metrics_df': performance_metrics,
    'parameters': json.dumps(training_model.get_params())
}

save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 1887.596051, R²: 0.711551
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

## Robust Scaler

In [33]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [34]:
X_train

array([[-0.66666667, -0.93333333, -0.91666667, ..., -0.75      ,
        -0.75      , -0.75      ],
       [-0.66666667, -0.93333333, -0.83333333, ..., -0.75      ,
        -0.75      , -0.75      ],
       [-0.66666667, -0.93333333, -0.75      , ..., -0.25      ,
        -0.75      , -0.75      ],
       ...,
       [ 0.        , -0.93333333, -0.83333333, ...,  0.25      ,
         0.25      ,  0.25      ],
       [ 0.        , -0.93333333, -0.75      , ...,  0.25      ,
         0.25      ,  0.25      ],
       [ 0.        , -0.93333333, -0.66666667, ...,  0.25      ,
         0.25      ,  0.25      ]])

In [35]:
X_test

array([[ 0.        , -0.93333333, -0.58333333, ...,  0.        ,
         0.25      ,  0.25      ],
       [ 0.        , -0.93333333, -0.5       , ...,  0.        ,
         0.        ,  0.25      ],
       [ 0.        , -0.93333333, -0.41666667, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.5       ,  1.        ,  0.33333333, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.5       ,  1.        ,  0.41666667, ...,  0.5       ,
         0.        ,  0.        ],
       [ 0.5       ,  1.        ,  0.5       , ...,  0.25      ,
         0.5       ,  0.        ]])

### Default Parameters

In [36]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [37]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 1h 29min 52s, sys: 3.89 s, total: 1h 29min 56s
Wall time: 22min 59s


In [38]:
y_pred = training_model.predict(X_test)
y_pred

array([[ 28.99493959,  28.94409965,  29.54607147, ..., 368.64490002,
        573.01919259, 795.51321519],
       [ 28.72683665,  29.50003958,  30.93398066, ..., 575.12262321,
        799.62782622, 932.11707823],
       [ 30.00912432,  31.33796715,  33.41901058, ..., 790.76582528,
        922.75562912, 917.3701145 ],
       ...,
       [ 32.6116237 ,  32.35509452,  31.73986917, ..., 136.54282204,
        130.73918607, 126.11241381],
       [ 31.854808  ,  31.28919662,  31.04814722, ..., 135.24548657,
        134.21654696, 107.94412416],
       [ 30.66525573,  30.4922354 ,  30.73956417, ..., 136.56942467,
        120.14672551, 129.22515539]])

In [39]:
y_test

Unnamed: 0,Temp_t+1,Temp_t+2,Temp_t+3,Temp_t+4,Temp_t+5,Temp_t+6,FeelsLike_t+1,FeelsLike_t+2,FeelsLike_t+3,FeelsLike_t+4,...,SolarEnergy_t+3,SolarEnergy_t+4,SolarEnergy_t+5,SolarEnergy_t+6,SolarRadiation_t+1,SolarRadiation_t+2,SolarRadiation_t+3,SolarRadiation_t+4,SolarRadiation_t+5,SolarRadiation_t+6
11668,28.78,29.63,30.87,31.94,33.42,34.10,35.16,38.06,40.18,40.30,...,0.29,0.63,0.86,2.05,0.00,3.13,79.55,181.94,229.71,574.29
11669,29.63,30.87,31.94,33.42,34.10,35.00,38.06,40.18,40.30,39.58,...,0.63,0.86,2.05,2.76,3.13,79.55,181.94,229.71,574.29,775.42
11670,30.87,31.94,33.42,34.10,35.00,35.63,40.18,40.30,39.58,40.38,...,0.86,2.05,2.76,3.19,79.55,181.94,229.71,574.29,775.42,887.00
11671,31.94,33.42,34.10,35.00,35.63,37.05,40.30,39.58,40.38,43.03,...,2.05,2.76,3.19,3.52,181.94,229.71,574.29,775.42,887.00,976.61
11672,33.42,34.10,35.00,35.63,37.05,37.94,39.58,40.38,43.03,42.41,...,2.76,3.19,3.52,2.99,229.71,574.29,775.42,887.00,976.61,835.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14581,33.45,33.12,32.45,31.42,30.19,29.50,39.85,39.42,38.52,37.19,...,1.44,0.78,0.29,0.00,659.35,538.10,395.90,210.35,82.81,0.00
14582,33.12,32.45,31.42,30.19,29.50,29.02,39.42,38.52,37.19,35.19,...,0.78,0.29,0.00,0.00,538.10,395.90,210.35,82.81,0.00,0.00
14583,32.45,31.42,30.19,29.50,29.02,28.61,38.52,37.19,35.19,34.10,...,0.29,0.00,0.00,0.00,395.90,210.35,82.81,0.00,0.00,0.00
14584,31.42,30.19,29.50,29.02,28.61,28.21,37.19,35.19,34.10,33.60,...,0.00,0.00,0.00,0.00,210.35,82.81,0.00,0.00,0.00,0.00


In [40]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [41]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1887.6256245884392
MAE Score: 12.572260723610418
R2 Score: 0.7108845982757565
RMSE Score: 43.446813744950724


In [42]:
performance_metrics = create_history_df_regression(y_test, y_pred, horizon)
performance_metrics

Unnamed: 0,Target,MSE,MAE,R2,RMSE
0,Temp_t+1,0.677016,0.563385,0.892814,0.82281
1,Temp_t+2,1.109399,0.722303,0.824352,1.05328
2,Temp_t+3,1.442555,0.816609,0.771608,1.201064
3,Temp_t+4,1.819125,0.931572,0.711977,1.348749
4,Temp_t+5,2.079596,0.999562,0.670672,1.44208
5,Temp_t+6,2.266459,1.048934,0.640854,1.505476
6,Temp,1.565692,0.847061,0.752046,1.251276
7,FeelsLike_t+1,3.650924,1.319577,0.853834,1.910739
8,FeelsLike_t+2,5.834025,1.68359,0.76644,2.415373
9,FeelsLike_t+3,7.265903,1.873434,0.709112,2.695534


In [43]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "RobustScaler",
    'performance_metrics_df': performance_metrics,
    'parameters': json.dumps(training_model.get_params())
}

save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 1887.625625, R²: 0.710885
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

# All Performance

In [44]:
show_model_history('regression', model_name=model_name_for_saving)


📊 REGRESSION Model Performance History
🔍 Filters Applied:
   • Model Name contains: 'LassoR'
   • Target: 'Overall' only
----------------------------------------------------------------------------------------------------
 Model         Scaler  Target         MSE       MAE       R2      RMSE
LassoR StandardScaler Overall 1887.596051 12.574725 0.711551 43.446473
LassoR   RobustScaler Overall 1887.625625 12.572261 0.710885 43.446814
LassoR   MinMaxScaler Overall 1884.521165 12.539219 0.701488 43.411072

📈 Total models shown: 3
🏆 Best R² Score: 0.711551
    Model: LassoR
    Scaler: StandardScaler
    Target: Overall
    MSE: 1887.596051


# <center><font size="50" color="red">Thank You</font></center>