# <font color="red">Solar Energy Prediction with Lasso Regressor</font>

# Library Import

In [1]:
# Data manipulation and visualization
import pandas as pd
import math
import json

# Preprocessing
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import Lasso as LassoR

import sys
# sys.path.append('../../') # Uncomment this line if running locally
sys.path.append('/kaggle/input/weatherdata') # Uncomment this line if running on Kaggle
from historyManagement import *

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Datasets Import

In [2]:
# Load the weather dataset for SolarEnergy prediction
# weather_data = pd.read_csv('../../FinalDatasets/finalDataset.csv') # Uncomment this line if running locally
weather_data = pd.read_csv('/kaggle/input/weatherdata/finalDataset.csv') # Uncomment this line if running on Kaggle
weather_data['DateTime'] = pd.to_datetime(weather_data['DateTime'])
weather_data.head()

Unnamed: 0,DateTime,Year,Month,Day,Hour,DaySegments,DaySegments_Afternoon,DaySegments_Early Morning,DaySegments_Evening,DaySegments_Late Night,...,WindDir,SeaLevelPressure,CloudCover,UVIndex,SevereRisk,Visibility,SolarRadiation,SolarEnergy,Conditions,Icon
0,2023-01-01 00:00:00,2023,1,1,0,Late Night,0,0,0,1,...,0.49,1018.68,0.03,0.0,10.0,2.76,0.0,0.0,Clear,clear-night
1,2023-01-01 01:00:00,2023,1,1,1,Late Night,0,0,0,1,...,0.54,1018.03,0.11,0.0,10.0,1.75,0.0,0.0,Clear,clear-night
2,2023-01-01 02:00:00,2023,1,1,2,Late Night,0,0,0,1,...,30.51,1017.56,0.03,0.0,10.0,1.75,0.0,0.0,Clear,clear-night
3,2023-01-01 03:00:00,2023,1,1,3,Late Night,0,0,0,1,...,49.23,1018.05,0.0,0.0,10.0,2.28,0.0,0.0,Clear,clear-night
4,2023-01-01 04:00:00,2023,1,1,4,Late Night,0,0,0,1,...,49.9,1018.0,86.17,0.0,10.0,1.27,0.0,0.0,Partially cloudy,fog


In [3]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14616 entries, 0 to 14615
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   DateTime                   14616 non-null  datetime64[ns]
 1   Year                       14616 non-null  int64         
 2   Month                      14616 non-null  int64         
 3   Day                        14616 non-null  int64         
 4   Hour                       14616 non-null  int64         
 5   DaySegments                14616 non-null  object        
 6   DaySegments_Afternoon      14616 non-null  int64         
 7   DaySegments_Early Morning  14616 non-null  int64         
 8   DaySegments_Evening        14616 non-null  int64         
 9   DaySegments_Late Night     14616 non-null  int64         
 10  DaySegments_Midday         14616 non-null  int64         
 11  DaySegments_Morning        14616 non-null  int64         
 12  DayS

In [4]:
weather_data.describe()

Unnamed: 0,DateTime,Year,Month,Day,Hour,DaySegments_Afternoon,DaySegments_Early Morning,DaySegments_Evening,DaySegments_Late Night,DaySegments_Midday,...,Windgust,WindSpeed,WindDir,SeaLevelPressure,CloudCover,UVIndex,SevereRisk,Visibility,SolarRadiation,SolarEnergy
count,14616,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,...,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0
mean,2023-11-01 11:30:00,2023.400657,5.720854,15.735632,11.5,0.131294,0.06055,0.098043,0.249726,0.116585,...,17.06856,8.090783,159.739178,1008.265012,53.579923,2.263555,16.504787,4.368134,227.640683,0.819217
min,2023-01-01 00:00:00,2023.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.66,0.0,0.0,982.42,0.0,0.0,3.0,0.0,0.0,0.0
25%,2023-06-02 05:45:00,2023.0,3.0,8.0,5.75,0.0,0.0,0.0,0.0,0.0,...,9.17,2.87,80.965,1003.2775,26.67,0.0,10.0,3.99,0.0,0.0
50%,2023-11-01 11:30:00,2023.0,6.0,16.0,11.5,0.0,0.0,0.0,0.0,0.0,...,14.645,7.67,163.79,1007.97,51.54,0.0,10.0,4.65,10.665,0.01
75%,2024-04-01 17:15:00,2024.0,8.0,23.0,17.25,0.0,0.0,0.0,0.0,0.0,...,23.41,11.75,241.4025,1013.4,87.79,4.74,12.58,4.65,459.745,1.66
max,2024-08-31 23:00:00,2024.0,12.0,31.0,23.0,1.0,1.0,1.0,1.0,1.0,...,84.12,71.97,360.0,1022.06,100.0,10.0,97.74,24.1,1026.65,3.7
std,,0.490048,3.195073,8.80361,6.922423,0.337734,0.238511,0.297383,0.432869,0.320936,...,10.195911,6.250203,104.422027,6.010683,35.166637,3.083074,15.031827,1.376994,307.219387,1.106444


# Data Split

In [5]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
for train_idx, test_idx in sss.split(weather_data, weather_data['DaySegments']):
    weather_data_train = weather_data.iloc[train_idx]
    weather_data_test = weather_data.iloc[test_idx]

In [6]:
# Show the count of each unique class in DaySegments
class_counts = weather_data['DaySegments'].value_counts().reset_index()
class_counts.columns = ['DaySegments', 'Count']

print("Distribution of data based on DaySegments:")
print(class_counts)

Distribution of data based on DaySegments:
     DaySegments  Count
0     Late Night   3650
1        Morning   3504
2      Afternoon   1919
3         Midday   1704
4          Night   1521
5        Evening   1433
6  Early Morning    885


In [7]:
print("Distribution of DaySegments in Training Set:")
print(weather_data_train['DaySegments'].value_counts())

print("\nDistribution of DaySegments in Test Set:")
print(weather_data_test['DaySegments'].value_counts())

Distribution of DaySegments in Training Set:
DaySegments
Late Night       2920
Morning          2803
Afternoon        1535
Midday           1363
Night            1217
Evening          1146
Early Morning     708
Name: count, dtype: int64

Distribution of DaySegments in Test Set:
DaySegments
Late Night       730
Morning          701
Afternoon        384
Midday           341
Night            304
Evening          287
Early Morning    177
Name: count, dtype: int64


# Some Reused Parameters

In [8]:
grid_param = {
    'alpha': [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 50.0, 100.0],
    'max_iter': [1000, 2000, 3000, 5000, 10000, 15000],
    'tol': [1e-6, 1e-5, 1e-4, 1e-3, 1e-2],
    'selection': ['cyclic', 'random'],
    'positive': [False, True],
    'fit_intercept': [True, False],
    'precompute': [False, True, 'auto'],
    'warm_start': [False, True]
}

In [9]:
number_of_model_for_randomized_grid = 500
model_name_for_saving = "LassoR"
target_name_for_saving = "SolarEnergy"

# All Features

In [10]:
X_train_raw = weather_data_train.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_train = weather_data_train['SolarEnergy']

In [11]:
X_test_raw = weather_data_test.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_test = weather_data_test['SolarEnergy']

In [12]:
feature_columns = [col for col in X_train_raw.columns]
feature_columns_for_saving = ','.join(feature_columns)

In [13]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11692
y_train:  11692

X_test:  2924
y_test:  2924


## MinMax Scaler

In [14]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [15]:
X_train

array([[0.72727273, 0.83333333, 0.43478261, ..., 0.4922    , 0.697     ,
        0.31560059],
       [0.90909091, 0.26666667, 0.69565217, ..., 0.        , 0.303     ,
        0.07388643],
       [0.09090909, 0.43333333, 0.        , ..., 0.        , 0.        ,
        0.07388643],
       ...,
       [0.63636364, 0.86666667, 0.43478261, ..., 0.8756    , 0.306     ,
        0.09436352],
       [0.        , 0.4       , 0.91304348, ..., 0.        , 0.        ,
        0.07388643],
       [0.72727273, 0.26666667, 0.34782609, ..., 0.8925    , 0.29      ,
        0.27812962]])

In [16]:
X_test

array([[0.63636364, 0.06666667, 0.95652174, ..., 0.894     , 0.        ,
        0.07388643],
       [0.45454545, 0.26666667, 0.65217391, ..., 0.9677    , 0.074     ,
        0.07388643],
       [0.63636364, 0.03333333, 0.2173913 , ..., 0.9935    , 0.        ,
        0.07388643],
       ...,
       [0.09090909, 0.2       , 0.73913043, ..., 0.0013    , 0.203     ,
        0.07388643],
       [0.27272727, 0.13333333, 0.86956522, ..., 0.        , 0.        ,
        0.25575259],
       [0.45454545, 0.73333333, 0.95652174, ..., 0.5161    , 0.        ,
        0.07388643]])

### Default Parameters

In [17]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [18]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 30.4 ms, sys: 3.2 ms, total: 33.6 ms
Wall time: 21.7 ms


In [19]:
y_pred = training_model.predict(X_test)
y_pred

array([0.0095616 , 0.27627364, 0.01336806, ..., 0.73458202, 0.0095616 ,
       0.0095616 ])

In [20]:
y_test

5158     0.00
3831     0.36
5117     0.00
6326     2.70
11359    0.49
         ... 
10290    0.30
13995    0.00
9665     0.90
2276     0.00
4174     0.00
Name: SolarEnergy, Length: 2924, dtype: float64

In [21]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [22]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 0.004488285401968958
MAE Score: 0.04476146048531177
R2 Score: 0.9963162363065593
RMSE Score: 0.06699466696662472


In [23]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "MinMaxScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 0.004488, R²: 0.996316
   Features: 27
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

In [24]:
grid_search = GridSearchCV(
    LassoR(random_state=42), 
    grid_param, 
    cv=5, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, # allow parallel processing, you cant use your own laptop while this runs
    verbose=1
)

In [None]:
%%time
print("Starting hyperparameter tuning...")
grid_search.fit(X_train, y_train)

In [26]:
print("TUNING RESULTS")
print("="*50)
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score (Negative MSE):", grid_search.best_score_)
print("Best Cross-Validation Score (MSE):", -grid_search.best_score_)
print("Best Cross-Validation Score (RMSE):", math.sqrt(-grid_search.best_score_))

TUNING RESULTS
Best Parameters: {'alpha': 1e-05, 'fit_intercept': True, 'max_iter': 2000, 'positive': True, 'precompute': True, 'selection': 'cyclic', 'tol': 1e-06, 'warm_start': False}
Best Cross-Validation Score (Negative MSE): -0.004306512969330758
Best Cross-Validation Score (MSE): 0.004306512969330758
Best Cross-Validation Score (RMSE): 0.06562402737816964


In [27]:
grid_search_best_model = grid_search.best_estimator_
y_pred = grid_search_best_model.predict(X_test)

In [28]:
mse_grid_search = mean_squared_error(y_test, y_pred)
mae_grid_search = mean_absolute_error(y_test, y_pred)
r2_grid_search = r2_score(y_test, y_pred)
rmse_grid_search = math.sqrt(mse_grid_search)

In [29]:
print("MSE Score:", mse_grid_search)
print("MAE Score:", mae_grid_search)
print("R2 Score:", r2_grid_search)
print("RMSE Score:", rmse_grid_search)

MSE Score: 0.004413316074618187
MAE Score: 0.04324760348517494
R2 Score: 0.9963777674396052
RMSE Score: 0.06643279366862564


In [30]:
regression_params = {
    'model_name': model_name_for_saving + " Tuned",
    'scaler_name': "MinMaxScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse_grid_search,
    'mae': mae_grid_search,
    'rmse': rmse_grid_search,
    'r2': r2_grid_search,
    'parameters' : json.dumps({
        "best_params": grid_search.best_params_,
        "cv_score": grid_search.best_score_,
        "all_params": grid_search_best_model.get_params()
    })
}
save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 0.004413, R²: 0.996378
   Features: 27
💾 Registry updated: Model_Training_History/History_Regression.csv


## Standard Scaler

In [31]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [32]:
X_train

array([[ 1.01884116,  1.17229173, -0.21938027, ..., -0.12088825,
         1.52387472,  1.09644529],
       [ 1.64376461, -0.75947244,  0.64624572, ..., -1.52028666,
         0.24645772, -0.43343649],
       [-1.16839092, -0.1913065 , -1.66209024, ..., -1.52028666,
        -0.73592134, -0.43343649],
       ...,
       [ 0.70637943,  1.28592492, -0.21938027, ...,  0.96917545,
         0.25618424, -0.30383078],
       [-1.48085264, -0.30493969,  1.36760071, ..., -1.52028666,
        -0.73592134, -0.43343649],
       [ 1.01884116, -0.75947244, -0.50792226, ...,  1.01722469,
         0.20430944,  0.85928021]])

In [33]:
X_test

array([[ 0.70637943, -1.44127155,  1.51187171, ...,  1.02148941,
        -0.73592134, -0.43343649],
       [ 0.08145598, -0.75947244,  0.50197472, ...,  1.23102956,
        -0.49600038, -0.43343649],
       [ 0.70637943, -1.55490474, -0.94073525, ...,  1.30438283,
        -0.73592134, -0.43343649],
       ...,
       [-1.16839092, -0.98673881,  0.79051672, ..., -1.51659057,
        -0.07775979, -0.43343649],
       [-0.54346747, -1.21400518,  1.22332971, ..., -1.52028666,
        -0.73592134,  0.71764924],
       [ 0.08145598,  0.83139217,  1.51187171, ..., -0.05293696,
        -0.73592134, -0.43343649]])

### Default Parameters

In [34]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [35]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 28.3 ms, sys: 0 ns, total: 28.3 ms
Wall time: 11.6 ms


In [36]:
y_pred = training_model.predict(X_test)
y_pred

array([0.00247851, 0.27406501, 0.00369553, ..., 0.74374264, 0.00578442,
       0.00278918])

In [37]:
y_test

5158     0.00
3831     0.36
5117     0.00
6326     2.70
11359    0.49
         ... 
10290    0.30
13995    0.00
9665     0.90
2276     0.00
4174     0.00
Name: SolarEnergy, Length: 2924, dtype: float64

In [38]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [39]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 0.004426547062889539
MAE Score: 0.04306790881285143
R2 Score: 0.9963669080958119
RMSE Score: 0.06653230089880809


In [40]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "StandardScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 0.004427, R²: 0.996367
   Features: 27
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

In [41]:
grid_search = GridSearchCV(
    LassoR(random_state=42), 
    grid_param, 
    cv=5, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, # allow parallel processing, you cant use your own laptop while this runs
    verbose=1
)

In [None]:
%%time
print("Starting hyperparameter tuning...")
grid_search.fit(X_train, y_train)

In [43]:
print("TUNING RESULTS")
print("="*50)
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score (Negative MSE):", grid_search.best_score_)
print("Best Cross-Validation Score (MSE):", -grid_search.best_score_)
print("Best Cross-Validation Score (RMSE):", math.sqrt(-grid_search.best_score_))

TUNING RESULTS
Best Parameters: {'alpha': 0.0001, 'fit_intercept': True, 'max_iter': 1000, 'positive': True, 'precompute': True, 'selection': 'cyclic', 'tol': 1e-06, 'warm_start': False}
Best Cross-Validation Score (Negative MSE): -0.004307154409673224
Best Cross-Validation Score (MSE): 0.004307154409673224
Best Cross-Validation Score (RMSE): 0.06562891443314618


In [44]:
grid_search_best_model = grid_search.best_estimator_
y_pred = grid_search_best_model.predict(X_test)

In [45]:
mse_grid_search = mean_squared_error(y_test, y_pred)
mae_grid_search = mean_absolute_error(y_test, y_pred)
r2_grid_search = r2_score(y_test, y_pred)
rmse_grid_search = math.sqrt(mse_grid_search)

In [46]:
print("MSE Score:", mse_grid_search)
print("MAE Score:", mae_grid_search)
print("R2 Score:", r2_grid_search)
print("RMSE Score:", rmse_grid_search)

MSE Score: 0.004413161518530677
MAE Score: 0.04320329304747381
R2 Score: 0.9963778942916328
RMSE Score: 0.06643163040698818


In [47]:
regression_params = {
    'model_name': model_name_for_saving + " Tuned",
    'scaler_name': "StandardScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse_grid_search,
    'mae': mae_grid_search,
    'rmse': rmse_grid_search,
    'r2': r2_grid_search,
    'parameters' : json.dumps({
        "best_params": grid_search.best_params_,
        "cv_score": grid_search.best_score_,
        "all_params": grid_search_best_model.get_params()
    })
}
save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 0.004413, R²: 0.996378
   Features: 27
💾 Registry updated: Model_Training_History/History_Regression.csv


## Robust Scaler

In [48]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [49]:
X_train

array([[ 0.6       ,  0.66666667, -0.16666667, ..., -0.03738547,
         1.44008264,  8.87596899],
       [ 1.        , -0.46666667,  0.33333333, ..., -0.84268652,
         0.62603306,  0.        ],
       [-0.8       , -0.13333333, -1.        , ..., -0.84268652,
         0.        ,  0.        ],
       ...,
       [ 0.4       ,  0.73333333, -0.16666667, ...,  0.5899051 ,
         0.6322314 ,  0.75193798],
       [-1.        , -0.2       ,  0.75      , ..., -0.84268652,
         0.        ,  0.        ],
       [ 0.6       , -0.46666667, -0.33333333, ...,  0.61755563,
         0.59917355,  7.5       ]])

In [50]:
X_test

array([[ 4.00000000e-01, -8.66666667e-01,  8.33333333e-01, ...,
         6.20009817e-01,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00, -4.66666667e-01,  2.50000000e-01, ...,
         7.40592277e-01,  1.52892562e-01,  0.00000000e+00],
       [ 4.00000000e-01, -9.33333333e-01, -5.83333333e-01, ...,
         7.82804319e-01,  0.00000000e+00,  0.00000000e+00],
       ...,
       [-8.00000000e-01, -6.00000000e-01,  4.16666667e-01, ...,
        -8.40559555e-01,  4.19421488e-01,  0.00000000e+00],
       [-4.00000000e-01, -7.33333333e-01,  6.66666667e-01, ...,
        -8.42686518e-01,  0.00000000e+00,  6.67829457e+00],
       [ 0.00000000e+00,  4.66666667e-01,  8.33333333e-01, ...,
         1.71793194e-03,  0.00000000e+00,  0.00000000e+00]])

### Default Parameters

In [51]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [52]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 27.8 ms, sys: 763 µs, total: 28.5 ms
Wall time: 10.8 ms


In [53]:
y_pred = training_model.predict(X_test)
y_pred

array([0.00991474, 0.27015121, 0.00894535, ..., 0.73888469, 0.0118139 ,
       0.01010325])

In [54]:
y_test

5158     0.00
3831     0.36
5117     0.00
6326     2.70
11359    0.49
         ... 
10290    0.30
13995    0.00
9665     0.90
2276     0.00
4174     0.00
Name: SolarEnergy, Length: 2924, dtype: float64

In [55]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [56]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 0.004465635576647832
MAE Score: 0.044245535301014015
R2 Score: 0.9963348261681007
RMSE Score: 0.06682541115958683


In [57]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "RobustScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 0.004466, R²: 0.996335
   Features: 27
💾 Registry updated: Model_Training_History/History_Regression.csv


### Tuning with Grid Search

In [58]:
grid_search = GridSearchCV(
    LassoR(random_state=42), 
    grid_param, 
    cv=5, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, # allow parallel processing, you cant use your own laptop while this runs
    verbose=1
)

In [None]:
%%time
print("Starting hyperparameter tuning...")
grid_search.fit(X_train, y_train)

In [60]:
print("TUNING RESULTS")
print("="*50)
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score (Negative MSE):", grid_search.best_score_)
print("Best Cross-Validation Score (MSE):", -grid_search.best_score_)
print("Best Cross-Validation Score (RMSE):", math.sqrt(-grid_search.best_score_))

TUNING RESULTS
Best Parameters: {'alpha': 1e-05, 'fit_intercept': True, 'max_iter': 2000, 'positive': True, 'precompute': True, 'selection': 'cyclic', 'tol': 1e-06, 'warm_start': False}
Best Cross-Validation Score (Negative MSE): -0.004307384736957252
Best Cross-Validation Score (MSE): 0.004307384736957252
Best Cross-Validation Score (RMSE): 0.06563066917956309


In [61]:
grid_search_best_model = grid_search.best_estimator_
y_pred = grid_search_best_model.predict(X_test)

In [62]:
mse_grid_search = mean_squared_error(y_test, y_pred)
mae_grid_search = mean_absolute_error(y_test, y_pred)
r2_grid_search = r2_score(y_test, y_pred)
rmse_grid_search = math.sqrt(mse_grid_search)

In [63]:
print("MSE Score:", mse_grid_search)
print("MAE Score:", mae_grid_search)
print("R2 Score:", r2_grid_search)
print("RMSE Score:", rmse_grid_search)

MSE Score: 0.004412190999895848
MAE Score: 0.04329480765859905
R2 Score: 0.9963786908455483
RMSE Score: 0.0664243253627453


In [64]:
regression_params = {
    'model_name': model_name_for_saving + " Tuned",
    'scaler_name': "RobustScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse_grid_search,
    'mae': mae_grid_search,
    'rmse': rmse_grid_search,
    'r2': r2_grid_search,
    'parameters' : json.dumps({
        "best_params": grid_search.best_params_,
        "cv_score": grid_search.best_score_,
        "all_params": grid_search_best_model.get_params()
    })
}
save_model_performance_if_better('regression', regression_params)

✨ New regression model added:
   MSE: 0.004412, R²: 0.996379
   Features: 27
💾 Registry updated: Model_Training_History/History_Regression.csv


# Selected Features

In [65]:
numeric_cols = weather_data.select_dtypes(include=[float, int]).columns
correlations = weather_data[numeric_cols].corr()['SolarEnergy'].sort_values(ascending=False)
print(correlations)

SolarEnergy                  1.000000
SolarRadiation               0.999867
UVIndex                      0.998178
DaySegments_Midday           0.594829
Temp                         0.400703
FeelsLike                    0.344667
DaySegments_Morning          0.243917
WindSpeed                    0.216874
DaySegments_Afternoon        0.216045
WindDir                      0.181173
SevereRisk                   0.125741
Hour                         0.095805
Season_Summer                0.093686
Visibility                   0.054855
SeaLevelPressure             0.026544
Year                         0.022062
Dew                          0.014013
Day                         -0.001489
PrecipProb                  -0.013347
Season_Rainy                -0.032901
Season_Autumn               -0.035246
Season_Winter               -0.035617
Precip                      -0.039120
CloudCover                  -0.055435
Month                       -0.061170
Windgust                    -0.100191
DaySegments_

In [66]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14616 entries, 0 to 14615
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   DateTime                   14616 non-null  datetime64[ns]
 1   Year                       14616 non-null  int64         
 2   Month                      14616 non-null  int64         
 3   Day                        14616 non-null  int64         
 4   Hour                       14616 non-null  int64         
 5   DaySegments                14616 non-null  object        
 6   DaySegments_Afternoon      14616 non-null  int64         
 7   DaySegments_Early Morning  14616 non-null  int64         
 8   DaySegments_Evening        14616 non-null  int64         
 9   DaySegments_Late Night     14616 non-null  int64         
 10  DaySegments_Midday         14616 non-null  int64         
 11  DaySegments_Morning        14616 non-null  int64         
 12  DayS

## Run Again

In [67]:
X_train_raw = weather_data_train.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon', # Target variable
                                                'Day', # same day from different month act differently
                                                'WindDir', # very low correlation with SolarEnergy
                                                'Precip']) # very low correlation with SolarEnergy
y_train = weather_data_train['SolarEnergy']

In [68]:
X_test_raw = weather_data_test.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon', # Target variable
                                                'Day', # same day from different month act differently
                                                'WindDir', # very low correlation with SolarEnergy
                                                'Precip']) # very low correlation with SolarEnergy
y_test = weather_data_test['SolarEnergy']

In [69]:
feature_columns = [col for col in X_train_raw.columns]
feature_columns_for_saving = ','.join(feature_columns)

In [70]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11692
y_train:  11692

X_test:  2924
y_test:  2924


### MinMax Scaler

In [71]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [72]:
X_train

array([[0.72727273, 0.43478261, 0.        , ..., 0.4922    , 0.697     ,
        0.31560059],
       [0.90909091, 0.69565217, 1.        , ..., 0.        , 0.303     ,
        0.07388643],
       [0.09090909, 0.        , 0.        , ..., 0.        , 0.        ,
        0.07388643],
       ...,
       [0.63636364, 0.43478261, 0.        , ..., 0.8756    , 0.306     ,
        0.09436352],
       [0.        , 0.91304348, 0.        , ..., 0.        , 0.        ,
        0.07388643],
       [0.72727273, 0.34782609, 0.        , ..., 0.8925    , 0.29      ,
        0.27812962]])

In [73]:
X_test

array([[0.63636364, 0.95652174, 0.        , ..., 0.894     , 0.        ,
        0.07388643],
       [0.45454545, 0.65217391, 1.        , ..., 0.9677    , 0.074     ,
        0.07388643],
       [0.63636364, 0.2173913 , 0.        , ..., 0.9935    , 0.        ,
        0.07388643],
       ...,
       [0.09090909, 0.73913043, 1.        , ..., 0.0013    , 0.203     ,
        0.07388643],
       [0.27272727, 0.86956522, 0.        , ..., 0.        , 0.        ,
        0.25575259],
       [0.45454545, 0.95652174, 0.        , ..., 0.5161    , 0.        ,
        0.07388643]])

#### Default Parameters

In [74]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [75]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 15.8 ms, sys: 0 ns, total: 15.8 ms
Wall time: 7.08 ms


In [76]:
y_pred = training_model.predict(X_test)
y_pred

array([0.00956023, 0.27627211, 0.01336701, ..., 0.73457965, 0.00956023,
       0.00956023])

In [77]:
y_test

5158     0.00
3831     0.36
5117     0.00
6326     2.70
11359    0.49
         ... 
10290    0.30
13995    0.00
9665     0.90
2276     0.00
4174     0.00
Name: SolarEnergy, Length: 2924, dtype: float64

In [78]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [79]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 0.004488281850305023
MAE Score: 0.04476137893734301
R2 Score: 0.9963162392215902
RMSE Score: 0.06699464045955485


In [80]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "MinMaxScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

✅ Regression model improved with better performance!
   MSE: 0.004488 → 0.004488 (lower is better)
   R²:  0.996316 → 0.996316 (higher is better)
   Features: 27 → 24
💾 Registry updated: Model_Training_History/History_Regression.csv


#### Tuning with Grid Search

In [81]:
grid_search = GridSearchCV(
    LassoR(random_state=42), 
    grid_param, 
    cv=5, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, # allow parallel processing, you cant use your own laptop while this runs
    verbose=1
)

In [None]:
%%time
print("Starting hyperparameter tuning...")
grid_search.fit(X_train, y_train)

In [83]:
print("TUNING RESULTS")
print("="*50)
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score (Negative MSE):", grid_search.best_score_)
print("Best Cross-Validation Score (MSE):", -grid_search.best_score_)
print("Best Cross-Validation Score (RMSE):", math.sqrt(-grid_search.best_score_))

TUNING RESULTS
Best Parameters: {'alpha': 1e-05, 'fit_intercept': True, 'max_iter': 1000, 'positive': True, 'precompute': True, 'selection': 'random', 'tol': 0.0001, 'warm_start': False}
Best Cross-Validation Score (Negative MSE): -0.004305503520428994
Best Cross-Validation Score (MSE): 0.004305503520428994
Best Cross-Validation Score (RMSE): 0.06561633577417283


In [84]:
grid_search_best_model = grid_search.best_estimator_
y_pred = grid_search_best_model.predict(X_test)

In [85]:
mse_grid_search = mean_squared_error(y_test, y_pred)
mae_grid_search = mean_absolute_error(y_test, y_pred)
r2_grid_search = r2_score(y_test, y_pred)
rmse_grid_search = math.sqrt(mse_grid_search)

In [86]:
print("MSE Score:", mse_grid_search)
print("MAE Score:", mae_grid_search)
print("R2 Score:", r2_grid_search)
print("RMSE Score:", rmse_grid_search)

MSE Score: 0.004414310249123902
MAE Score: 0.04322898992449584
R2 Score: 0.9963769514701155
RMSE Score: 0.06644027580559779


In [87]:
regression_params = {
    'model_name': model_name_for_saving + " Tuned",
    'scaler_name': "MinMaxScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse_grid_search,
    'mae': mae_grid_search,
    'rmse': rmse_grid_search,
    'r2': r2_grid_search,
    'parameters' : json.dumps({
        "best_params": grid_search.best_params_,
        "cv_score": grid_search.best_score_,
        "all_params": grid_search_best_model.get_params()
    })
}
save_model_performance_if_better('regression', regression_params)

✅ Regression model improved with same performance but fewer features!
   MSE: 0.004413 ≈ 0.004414 (similar)
   R²:  0.996378 ≈ 0.996377 (similar)
   Features: 27 → 24 (🎯 3 fewer features!)
💾 Registry updated: Model_Training_History/History_Regression.csv


### Standard Scaler

In [88]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [89]:
X_train

array([[ 1.01884116, -0.21938027, -0.38875095, ..., -0.12088825,
         1.52387472,  1.09644529],
       [ 1.64376461,  0.64624572,  2.57234098, ..., -1.52028666,
         0.24645772, -0.43343649],
       [-1.16839092, -1.66209024, -0.38875095, ..., -1.52028666,
        -0.73592134, -0.43343649],
       ...,
       [ 0.70637943, -0.21938027, -0.38875095, ...,  0.96917545,
         0.25618424, -0.30383078],
       [-1.48085264,  1.36760071, -0.38875095, ..., -1.52028666,
        -0.73592134, -0.43343649],
       [ 1.01884116, -0.50792226, -0.38875095, ...,  1.01722469,
         0.20430944,  0.85928021]])

In [90]:
X_test

array([[ 0.70637943,  1.51187171, -0.38875095, ...,  1.02148941,
        -0.73592134, -0.43343649],
       [ 0.08145598,  0.50197472,  2.57234098, ...,  1.23102956,
        -0.49600038, -0.43343649],
       [ 0.70637943, -0.94073525, -0.38875095, ...,  1.30438283,
        -0.73592134, -0.43343649],
       ...,
       [-1.16839092,  0.79051672,  2.57234098, ..., -1.51659057,
        -0.07775979, -0.43343649],
       [-0.54346747,  1.22332971, -0.38875095, ..., -1.52028666,
        -0.73592134,  0.71764924],
       [ 0.08145598,  1.51187171, -0.38875095, ..., -0.05293696,
        -0.73592134, -0.43343649]])

#### Default Parameters

In [91]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [92]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 9.19 ms


In [93]:
y_pred = training_model.predict(X_test)
y_pred

array([0.00263847, 0.27358283, 0.00365318, ..., 0.74359454, 0.00497524,
       0.0028812 ])

In [94]:
y_test

5158     0.00
3831     0.36
5117     0.00
6326     2.70
11359    0.49
         ... 
10290    0.30
13995    0.00
9665     0.90
2276     0.00
4174     0.00
Name: SolarEnergy, Length: 2924, dtype: float64

In [95]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [96]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 0.0044273424004779745
MAE Score: 0.04307012838803158
R2 Score: 0.9963662553218748
RMSE Score: 0.06653827770898474


In [97]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "StandardScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

✅ Regression model improved with same performance but fewer features!
   MSE: 0.004427 ≈ 0.004427 (similar)
   R²:  0.996367 ≈ 0.996366 (similar)
   Features: 27 → 24 (🎯 3 fewer features!)
💾 Registry updated: Model_Training_History/History_Regression.csv


#### Tuning with Grid Search

In [98]:
grid_search = GridSearchCV(
    LassoR(random_state=42), 
    grid_param, 
    cv=5, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, # allow parallel processing, you cant use your own laptop while this runs
    verbose=1
)

In [None]:
%%time
print("Starting hyperparameter tuning...")
grid_search.fit(X_train, y_train)

In [100]:
print("TUNING RESULTS")
print("="*50)
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score (Negative MSE):", grid_search.best_score_)
print("Best Cross-Validation Score (MSE):", -grid_search.best_score_)
print("Best Cross-Validation Score (RMSE):", math.sqrt(-grid_search.best_score_))

TUNING RESULTS
Best Parameters: {'alpha': 5e-05, 'fit_intercept': True, 'max_iter': 1000, 'positive': True, 'precompute': True, 'selection': 'random', 'tol': 0.0001, 'warm_start': False}
Best Cross-Validation Score (Negative MSE): -0.004305488466257397
Best Cross-Validation Score (MSE): 0.004305488466257397
Best Cross-Validation Score (RMSE): 0.06561622106047708


In [101]:
grid_search_best_model = grid_search.best_estimator_
y_pred = grid_search_best_model.predict(X_test)

In [102]:
mse_grid_search = mean_squared_error(y_test, y_pred)
mae_grid_search = mean_absolute_error(y_test, y_pred)
r2_grid_search = r2_score(y_test, y_pred)
rmse_grid_search = math.sqrt(mse_grid_search)

In [103]:
print("MSE Score:", mse_grid_search)
print("MAE Score:", mae_grid_search)
print("R2 Score:", r2_grid_search)
print("RMSE Score:", rmse_grid_search)

MSE Score: 0.004414163253500879
MAE Score: 0.043234788013064046
R2 Score: 0.9963770721168861
RMSE Score: 0.06643916957263146


In [104]:
regression_params = {
    'model_name': model_name_for_saving + " Tuned",
    'scaler_name': "StandardScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse_grid_search,
    'mae': mae_grid_search,
    'rmse': rmse_grid_search,
    'r2': r2_grid_search,
    'parameters' : json.dumps({
        "best_params": grid_search.best_params_,
        "cv_score": grid_search.best_score_,
        "all_params": grid_search_best_model.get_params()
    })
}
save_model_performance_if_better('regression', regression_params)

✅ Regression model improved with same performance but fewer features!
   MSE: 0.004413 ≈ 0.004414 (similar)
   R²:  0.996378 ≈ 0.996377 (similar)
   Features: 27 → 24 (🎯 3 fewer features!)
💾 Registry updated: Model_Training_History/History_Regression.csv


### Robust Scaler

In [105]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train_raw)
X_test = scaler.transform(X_test_raw)

In [106]:
X_train

array([[ 0.6       , -0.16666667,  0.        , ..., -0.03738547,
         1.44008264,  8.87596899],
       [ 1.        ,  0.33333333,  1.        , ..., -0.84268652,
         0.62603306,  0.        ],
       [-0.8       , -1.        ,  0.        , ..., -0.84268652,
         0.        ,  0.        ],
       ...,
       [ 0.4       , -0.16666667,  0.        , ...,  0.5899051 ,
         0.6322314 ,  0.75193798],
       [-1.        ,  0.75      ,  0.        , ..., -0.84268652,
         0.        ,  0.        ],
       [ 0.6       , -0.33333333,  0.        , ...,  0.61755563,
         0.59917355,  7.5       ]])

In [107]:
X_test

array([[ 4.00000000e-01,  8.33333333e-01,  0.00000000e+00, ...,
         6.20009817e-01,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  2.50000000e-01,  1.00000000e+00, ...,
         7.40592277e-01,  1.52892562e-01,  0.00000000e+00],
       [ 4.00000000e-01, -5.83333333e-01,  0.00000000e+00, ...,
         7.82804319e-01,  0.00000000e+00,  0.00000000e+00],
       ...,
       [-8.00000000e-01,  4.16666667e-01,  1.00000000e+00, ...,
        -8.40559555e-01,  4.19421488e-01,  0.00000000e+00],
       [-4.00000000e-01,  6.66666667e-01,  0.00000000e+00, ...,
        -8.42686518e-01,  0.00000000e+00,  6.67829457e+00],
       [ 0.00000000e+00,  8.33333333e-01,  0.00000000e+00, ...,
         1.71793194e-03,  0.00000000e+00,  0.00000000e+00]])

#### Default Parameters

In [108]:
training_model = LassoR(alpha=0.001, random_state=42, max_iter=10000)
training_model

In [109]:
%%time
training_model.fit(X_train, y_train)

CPU times: user 21.9 ms, sys: 718 µs, total: 22.6 ms
Wall time: 8.83 ms


In [110]:
y_pred = training_model.predict(X_test)
y_pred

array([0.00985018, 0.27044615, 0.00893683, ..., 0.73899057, 0.01160812,
       0.01002868])

In [111]:
y_test

5158     0.00
3831     0.36
5117     0.00
6326     2.70
11359    0.49
         ... 
10290    0.30
13995    0.00
9665     0.90
2276     0.00
4174     0.00
Name: SolarEnergy, Length: 2924, dtype: float64

In [112]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [113]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 0.004465067797004889
MAE Score: 0.04424113948111088
R2 Score: 0.996335292173679
RMSE Score: 0.06682116279297218


In [114]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "RobustScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

✅ Regression model improved with better performance!
   MSE: 0.004466 → 0.004465 (lower is better)
   R²:  0.996335 → 0.996335 (higher is better)
   Features: 27 → 24
💾 Registry updated: Model_Training_History/History_Regression.csv


#### Tuning with Grid Search

In [115]:
grid_search = GridSearchCV(
    LassoR(random_state=42), 
    grid_param, 
    cv=5, 
    scoring='neg_mean_squared_error', 
    n_jobs=-1, # allow parallel processing, you cant use your own laptop while this runs
    verbose=1
)

In [None]:
%%time
print("Starting hyperparameter tuning...")
grid_search.fit(X_train, y_train)

In [117]:
print("TUNING RESULTS")
print("="*50)
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score (Negative MSE):", grid_search.best_score_)
print("Best Cross-Validation Score (MSE):", -grid_search.best_score_)
print("Best Cross-Validation Score (RMSE):", math.sqrt(-grid_search.best_score_))

TUNING RESULTS
Best Parameters: {'alpha': 1e-05, 'fit_intercept': True, 'max_iter': 1000, 'positive': True, 'precompute': True, 'selection': 'cyclic', 'tol': 1e-06, 'warm_start': False}
Best Cross-Validation Score (Negative MSE): -0.004305662575369227
Best Cross-Validation Score (MSE): 0.004305662575369227
Best Cross-Validation Score (RMSE): 0.06561754777015998


In [118]:
grid_search_best_model = grid_search.best_estimator_
y_pred = grid_search_best_model.predict(X_test)

In [119]:
mse_grid_search = mean_squared_error(y_test, y_pred)
mae_grid_search = mean_absolute_error(y_test, y_pred)
r2_grid_search = r2_score(y_test, y_pred)
rmse_grid_search = math.sqrt(mse_grid_search)

In [120]:
print("MSE Score:", mse_grid_search)
print("MAE Score:", mae_grid_search)
print("R2 Score:", r2_grid_search)
print("RMSE Score:", rmse_grid_search)

MSE Score: 0.004413196428927047
MAE Score: 0.04327423890824944
R2 Score: 0.9963778656388982
RMSE Score: 0.06643189316079323


In [121]:
regression_params = {
    'model_name': model_name_for_saving + " Tuned",
    'scaler_name': "RobustScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse_grid_search,
    'mae': mae_grid_search,
    'rmse': rmse_grid_search,
    'r2': r2_grid_search,
    'parameters' : json.dumps({
        "best_params": grid_search.best_params_,
        "cv_score": grid_search.best_score_,
        "all_params": grid_search_best_model.get_params()
    })
}
save_model_performance_if_better('regression', regression_params)

✅ Regression model improved with same performance but fewer features!
   MSE: 0.004412 ≈ 0.004413 (similar)
   R²:  0.996379 ≈ 0.996378 (similar)
   Features: 27 → 24 (🎯 3 fewer features!)
💾 Registry updated: Model_Training_History/History_Regression.csv


# All Performance

In [122]:
show_model_history('regression', model_name=model_name_for_saving, target_column=target_name_for_saving)


📊 REGRESSION Model Performance History
       Model         Scaler      Target  Features      MSE      MAE     RMSE       R²
LassoR Tuned   RobustScaler SolarEnergy        24 0.004413 0.043274 0.066432 0.996378
LassoR Tuned StandardScaler SolarEnergy        24 0.004414 0.043235 0.066439 0.996377
LassoR Tuned   MinMaxScaler SolarEnergy        24 0.004414 0.043229 0.066440 0.996377
      LassoR StandardScaler SolarEnergy        24 0.004427 0.043070 0.066538 0.996366
      LassoR   RobustScaler SolarEnergy        24 0.004465 0.044241 0.066821 0.996335
      LassoR   MinMaxScaler SolarEnergy        24 0.004488 0.044761 0.066995 0.996316

📈 Total models shown: 6
🏆 Best R² Score: 0.996378 (LassoR Tuned + RobustScaler for SolarEnergy)


# <center><font size="50" color="red">Thank You</font></center>