# <font color="red">Visibility Prediction with Ridge Regressor with Polynomial Degree 3 Features</font>

# Library Import

In [1]:
# Data manipulation and visualization
import pandas as pd
import math
import json

# Preprocessing
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import Ridge as RidgeR

import sys
sys.path.append('../../') # Uncomment this line if running locally
# sys.path.append('/kaggle/input/weatherdata') # Uncomment this line if running on Kaggle
from historyManagement import *

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Datasets Import

In [2]:
# Load the weather dataset for visibility prediction
weather_data = pd.read_csv('../../FinalDatasets/finalDataset.csv') # Uncomment this line if running locally
# weather_data = pd.read_csv('/kaggle/input/weatherdata/finalDataset.csv') # Uncomment this line if running on Kaggle
weather_data['DateTime'] = pd.to_datetime(weather_data['DateTime'])
weather_data.head()

Unnamed: 0,DateTime,Year,Month,Day,Hour,DaySegments,DaySegments_Afternoon,DaySegments_Early Morning,DaySegments_Evening,DaySegments_Late Night,...,WindDir,SeaLevelPressure,CloudCover,UVIndex,SevereRisk,Visibility,SolarRadiation,SolarEnergy,Conditions,Icon
0,2023-01-01 00:00:00,2023,1,1,0,Late Night,0,0,0,1,...,0.49,1018.68,0.03,0.0,10.0,2.76,0.0,0.0,Clear,clear-night
1,2023-01-01 01:00:00,2023,1,1,1,Late Night,0,0,0,1,...,0.54,1018.03,0.11,0.0,10.0,1.75,0.0,0.0,Clear,clear-night
2,2023-01-01 02:00:00,2023,1,1,2,Late Night,0,0,0,1,...,30.51,1017.56,0.03,0.0,10.0,1.75,0.0,0.0,Clear,clear-night
3,2023-01-01 03:00:00,2023,1,1,3,Late Night,0,0,0,1,...,49.23,1018.05,0.0,0.0,10.0,2.28,0.0,0.0,Clear,clear-night
4,2023-01-01 04:00:00,2023,1,1,4,Late Night,0,0,0,1,...,49.9,1018.0,86.17,0.0,10.0,1.27,0.0,0.0,Partially cloudy,fog


In [3]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14616 entries, 0 to 14615
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   DateTime                   14616 non-null  datetime64[ns]
 1   Year                       14616 non-null  int64         
 2   Month                      14616 non-null  int64         
 3   Day                        14616 non-null  int64         
 4   Hour                       14616 non-null  int64         
 5   DaySegments                14616 non-null  object        
 6   DaySegments_Afternoon      14616 non-null  int64         
 7   DaySegments_Early Morning  14616 non-null  int64         
 8   DaySegments_Evening        14616 non-null  int64         
 9   DaySegments_Late Night     14616 non-null  int64         
 10  DaySegments_Midday         14616 non-null  int64         
 11  DaySegments_Morning        14616 non-null  int64         
 12  DayS

In [4]:
weather_data.describe()

Unnamed: 0,DateTime,Year,Month,Day,Hour,DaySegments_Afternoon,DaySegments_Early Morning,DaySegments_Evening,DaySegments_Late Night,DaySegments_Midday,...,Windgust,WindSpeed,WindDir,SeaLevelPressure,CloudCover,UVIndex,SevereRisk,Visibility,SolarRadiation,SolarEnergy
count,14616,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,...,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0,14616.0
mean,2023-11-01 11:30:00,2023.400657,5.720854,15.735632,11.5,0.131294,0.06055,0.098043,0.249726,0.116585,...,17.06856,8.090783,159.739178,1008.265012,53.579923,2.263555,16.504787,4.368134,227.640683,0.819217
min,2023-01-01 00:00:00,2023.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.66,0.0,0.0,982.42,0.0,0.0,3.0,0.0,0.0,0.0
25%,2023-06-02 05:45:00,2023.0,3.0,8.0,5.75,0.0,0.0,0.0,0.0,0.0,...,9.17,2.87,80.965,1003.2775,26.67,0.0,10.0,3.99,0.0,0.0
50%,2023-11-01 11:30:00,2023.0,6.0,16.0,11.5,0.0,0.0,0.0,0.0,0.0,...,14.645,7.67,163.79,1007.97,51.54,0.0,10.0,4.65,10.665,0.01
75%,2024-04-01 17:15:00,2024.0,8.0,23.0,17.25,0.0,0.0,0.0,0.0,0.0,...,23.41,11.75,241.4025,1013.4,87.79,4.74,12.58,4.65,459.745,1.66
max,2024-08-31 23:00:00,2024.0,12.0,31.0,23.0,1.0,1.0,1.0,1.0,1.0,...,84.12,71.97,360.0,1022.06,100.0,10.0,97.74,24.1,1026.65,3.7
std,,0.490048,3.195073,8.80361,6.922423,0.337734,0.238511,0.297383,0.432869,0.320936,...,10.195911,6.250203,104.422027,6.010683,35.166637,3.083074,15.031827,1.376994,307.219387,1.106444


# Data Split

In [5]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
for train_idx, test_idx in sss.split(weather_data, weather_data['DaySegments']):
    weather_data_train = weather_data.iloc[train_idx]
    weather_data_test = weather_data.iloc[test_idx]

In [6]:
# Show the count of each unique class in DaySegments
class_counts = weather_data['DaySegments'].value_counts().reset_index()
class_counts.columns = ['DaySegments', 'Count']

print("Distribution of data based on DaySegments:")
print(class_counts)

Distribution of data based on DaySegments:
     DaySegments  Count
0     Late Night   3650
1        Morning   3504
2      Afternoon   1919
3         Midday   1704
4          Night   1521
5        Evening   1433
6  Early Morning    885


In [7]:
print("Distribution of DaySegments in Training Set:")
print(weather_data_train['DaySegments'].value_counts())

print("\nDistribution of DaySegments in Test Set:")
print(weather_data_test['DaySegments'].value_counts())

Distribution of DaySegments in Training Set:
DaySegments
Late Night       2920
Morning          2803
Afternoon        1535
Midday           1363
Night            1217
Evening          1146
Early Morning     708
Name: count, dtype: int64

Distribution of DaySegments in Test Set:
DaySegments
Late Night       730
Morning          701
Afternoon        384
Midday           341
Night            304
Evening          287
Early Morning    177
Name: count, dtype: int64


# Some Reused Variables

In [8]:
model_name_for_saving = "RidgeR Poly 3"
target_name_for_saving = "Visibility"
poly_degree = 3

# MinMax Scaler

## Best Features

In [9]:
X_train_raw = weather_data_train.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_train = weather_data_train['Visibility']

In [10]:
X_test_raw = weather_data_test.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_test = weather_data_test['Visibility']

In [11]:
feature_columns = [col for col in X_train_raw.columns]
feature_columns_for_saving = ','.join(feature_columns)

In [12]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11692
y_train:  11692

X_test:  2924
y_test:  2924


## Scaler

In [13]:
scaler = MinMaxScaler()

# STEP 1: Create polynomial features FIRST
poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly.fit_transform(X_train_raw)
X_test_poly = poly.transform(X_test_raw)

print(f"Polynomial features created: {X_train_raw.shape[1]} ‚Üí {X_train_poly.shape[1]}")

# STEP 2: Apply scaler to polynomial features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_poly)
X_test = scaler.transform(X_test_poly)

print(f"Features scaled: {X_train.shape[1]} features")

Polynomial features created: 27 ‚Üí 4059
Features scaled: 4059 features


In [14]:
X_train

array([[0.72727273, 0.83333333, 0.43478261, ..., 0.21924713, 0.13626803,
        0.03811131],
       [0.90909091, 0.26666667, 0.69565217, ..., 0.01259383, 0.00547283,
        0.0010421 ],
       [0.09090909, 0.43333333, 0.        , ..., 0.        , 0.        ,
        0.0010421 ],
       ...,
       [0.63636364, 0.86666667, 0.43478261, ..., 0.01533627, 0.00787951,
        0.00179417],
       [0.        , 0.4       , 0.91304348, ..., 0.        , 0.        ,
        0.0010421 ],
       [0.72727273, 0.26666667, 0.34782609, ..., 0.03385919, 0.04512151,
        0.02704929]], shape=(11692, 4059))

In [15]:
X_test

array([[6.36363636e-01, 6.66666667e-02, 9.56521739e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03],
       [4.54545455e-01, 2.66666667e-01, 6.52173913e-01, ...,
        7.51165981e-04, 1.33659893e-03, 1.04209760e-03],
       [6.36363636e-01, 3.33333333e-02, 2.17391304e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03],
       ...,
       [9.09090909e-02, 2.00000000e-01, 7.39130435e-01, ...,
        5.65281207e-03, 3.66661598e-03, 1.04209760e-03],
       [2.72727273e-01, 1.33333333e-01, 8.69565217e-01, ...,
        0.00000000e+00, 0.00000000e+00, 2.15952044e-02],
       [4.54545455e-01, 7.33333333e-01, 9.56521739e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03]],
      shape=(2924, 4059))

## Model with Best Parameters

In [16]:
training_model = RidgeR(alpha= 1.0,
                        fit_intercept=True,
                        copy_X=True,
                        max_iter=10000,
                        tol=0.0001,
                        solver='auto',
                        positive=False,
                        random_state=42)
training_model

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,10000
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,42


## Training

In [17]:
%%time
training_model.fit(X_train, y_train)

CPU times: total: 10.7 s
Wall time: 2.62 s


0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,10000
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,42


In [18]:
y_pred = training_model.predict(X_test)
y_pred

array([5.16213441, 3.87890196, 3.63577377, ..., 4.24249914, 4.67259568,
       4.44824353], shape=(2924,))

In [19]:
y_test

5158     4.65
3831     3.70
5117     4.19
6326     3.88
11359    3.68
         ... 
10290    5.17
13995    4.48
9665     3.91
2276     4.00
4174     4.65
Name: Visibility, Length: 2924, dtype: float64

In [20]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [21]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1.1501444459427568
MAE Score: 0.49556429796817303
R2 Score: 0.4105964988989942
RMSE Score: 1.0724478756297469


In [22]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "MinMaxScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

‚ú® New regression model added:
   MSE: 1.150144, R¬≤: 0.410596
   Features: 27
üíæ Registry updated: Model_Training_History/History_Regression.csv


# Standard Scaler

## Best Features

In [23]:
X_train_raw = weather_data_train.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_train = weather_data_train['Visibility']

In [24]:
X_test_raw = weather_data_test.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_test = weather_data_test['Visibility']

In [25]:
feature_columns = [col for col in X_train_raw.columns]
feature_columns_for_saving = ','.join(feature_columns)

In [26]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11692
y_train:  11692

X_test:  2924
y_test:  2924


## Scaler

In [27]:
scaler = StandardScaler()

# STEP 1: Create polynomial features FIRST
poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly.fit_transform(X_train_raw)
X_test_poly = poly.transform(X_test_raw)

print(f"Polynomial features created: {X_train_raw.shape[1]} ‚Üí {X_train_poly.shape[1]}")

# STEP 2: Apply scaler to polynomial features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_poly)
X_test = scaler.transform(X_test_poly)

print(f"Features scaled: {X_train.shape[1]} features")

Polynomial features created: 27 ‚Üí 4059
Features scaled: 4059 features


In [28]:
X_train

array([[0.72727273, 0.83333333, 0.43478261, ..., 0.21924713, 0.13626803,
        0.03811131],
       [0.90909091, 0.26666667, 0.69565217, ..., 0.01259383, 0.00547283,
        0.0010421 ],
       [0.09090909, 0.43333333, 0.        , ..., 0.        , 0.        ,
        0.0010421 ],
       ...,
       [0.63636364, 0.86666667, 0.43478261, ..., 0.01533627, 0.00787951,
        0.00179417],
       [0.        , 0.4       , 0.91304348, ..., 0.        , 0.        ,
        0.0010421 ],
       [0.72727273, 0.26666667, 0.34782609, ..., 0.03385919, 0.04512151,
        0.02704929]], shape=(11692, 4059))

In [29]:
X_test

array([[6.36363636e-01, 6.66666667e-02, 9.56521739e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03],
       [4.54545455e-01, 2.66666667e-01, 6.52173913e-01, ...,
        7.51165981e-04, 1.33659893e-03, 1.04209760e-03],
       [6.36363636e-01, 3.33333333e-02, 2.17391304e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03],
       ...,
       [9.09090909e-02, 2.00000000e-01, 7.39130435e-01, ...,
        5.65281207e-03, 3.66661598e-03, 1.04209760e-03],
       [2.72727273e-01, 1.33333333e-01, 8.69565217e-01, ...,
        0.00000000e+00, 0.00000000e+00, 2.15952044e-02],
       [4.54545455e-01, 7.33333333e-01, 9.56521739e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03]],
      shape=(2924, 4059))

## Model with Best Parameters

In [30]:
training_model = RidgeR(alpha= 2.0,
                        fit_intercept=True,
                        copy_X=True,
                        max_iter=10000,
                        tol=0.001,
                        solver='sparse_cg',
                        positive=False,
                        random_state=42)
training_model

0,1,2
,alpha,2.0
,fit_intercept,True
,copy_X,True
,max_iter,10000
,tol,0.001
,solver,'sparse_cg'
,positive,False
,random_state,42


## Training

In [31]:
%%time
training_model.fit(X_train, y_train)

CPU times: total: 34.5 s
Wall time: 6.98 s


0,1,2
,alpha,2.0
,fit_intercept,True
,copy_X,True
,max_iter,10000
,tol,0.001
,solver,'sparse_cg'
,positive,False
,random_state,42


In [32]:
y_pred = training_model.predict(X_test)
y_pred

array([5.13195169, 3.78420073, 3.62603137, ..., 4.31313572, 4.84185741,
       4.54127139], shape=(2924,))

In [33]:
y_test

5158     4.65
3831     3.70
5117     4.19
6326     3.88
11359    3.68
         ... 
10290    5.17
13995    4.48
9665     3.91
2276     4.00
4174     4.65
Name: Visibility, Length: 2924, dtype: float64

In [34]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [35]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1.1590219198904317
MAE Score: 0.48805977837932163
R2 Score: 0.4060471449077191
RMSE Score: 1.0765788033815415


In [36]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "StandardScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

‚ú® New regression model added:
   MSE: 1.159022, R¬≤: 0.406047
   Features: 27
üíæ Registry updated: Model_Training_History/History_Regression.csv


# Robust Scaler

## Best Features

In [37]:
X_train_raw = weather_data_train.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_train = weather_data_train['Visibility']

In [38]:
X_test_raw = weather_data_test.drop(columns=['DateTime', # Model cannot use DateTime as a feature
                                                'Year', # No Effect on weather data
                                                'Season', # Season is categorical, not numerical
                                                'DaySegments', # DaySegments is categorical, not numerical
                                                'Visibility',  # Target variable
                                                'SolarRadiation', # Target variable
                                                'SolarEnergy', # Target variable
                                                'Conditions', # Target variable
                                                'Icon']) # Target variable
y_test = weather_data_test['Visibility']

In [39]:
feature_columns = [col for col in X_train_raw.columns]
feature_columns_for_saving = ','.join(feature_columns)

In [40]:
print("X_train: ", len(X_train_raw))
print("y_train: ", len(y_train))
print("\nX_test: ", len(X_test_raw))
print("y_test: ", len(y_test))

X_train:  11692
y_train:  11692

X_test:  2924
y_test:  2924


## Scaler

In [41]:
scaler = RobustScaler()
# STEP 1: Create polynomial features FIRST
poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly.fit_transform(X_train_raw)
X_test_poly = poly.transform(X_test_raw)

print(f"Polynomial features created: {X_train_raw.shape[1]} ‚Üí {X_train_poly.shape[1]}")

# STEP 2: Apply scaler to polynomial features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_poly)
X_test = scaler.transform(X_test_poly)

print(f"Features scaled: {X_train.shape[1]} features")

Polynomial features created: 27 ‚Üí 4059
Features scaled: 4059 features


In [42]:
X_train

array([[0.72727273, 0.83333333, 0.43478261, ..., 0.21924713, 0.13626803,
        0.03811131],
       [0.90909091, 0.26666667, 0.69565217, ..., 0.01259383, 0.00547283,
        0.0010421 ],
       [0.09090909, 0.43333333, 0.        , ..., 0.        , 0.        ,
        0.0010421 ],
       ...,
       [0.63636364, 0.86666667, 0.43478261, ..., 0.01533627, 0.00787951,
        0.00179417],
       [0.        , 0.4       , 0.91304348, ..., 0.        , 0.        ,
        0.0010421 ],
       [0.72727273, 0.26666667, 0.34782609, ..., 0.03385919, 0.04512151,
        0.02704929]], shape=(11692, 4059))

In [43]:
X_test

array([[6.36363636e-01, 6.66666667e-02, 9.56521739e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03],
       [4.54545455e-01, 2.66666667e-01, 6.52173913e-01, ...,
        7.51165981e-04, 1.33659893e-03, 1.04209760e-03],
       [6.36363636e-01, 3.33333333e-02, 2.17391304e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03],
       ...,
       [9.09090909e-02, 2.00000000e-01, 7.39130435e-01, ...,
        5.65281207e-03, 3.66661598e-03, 1.04209760e-03],
       [2.72727273e-01, 1.33333333e-01, 8.69565217e-01, ...,
        0.00000000e+00, 0.00000000e+00, 2.15952044e-02],
       [4.54545455e-01, 7.33333333e-01, 9.56521739e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.04209760e-03]],
      shape=(2924, 4059))

## Model with Best Parameters

In [44]:
training_model = RidgeR(alpha= 1.0,
                        fit_intercept=True,
                        copy_X=True,
                        max_iter=10000,
                        tol=0.0001,
                        solver='auto',
                        positive=False,
                        random_state=42)
training_model

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,10000
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,42


## Training

In [45]:
%%time
training_model.fit(X_train, y_train)

CPU times: total: 10.8 s
Wall time: 2.66 s


0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,10000
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,42


In [46]:
y_pred = training_model.predict(X_test)
y_pred

array([5.16213441, 3.87890196, 3.63577377, ..., 4.24249914, 4.67259568,
       4.44824353], shape=(2924,))

In [47]:
y_test

5158     4.65
3831     3.70
5117     4.19
6326     3.88
11359    3.68
         ... 
10290    5.17
13995    4.48
9665     3.91
2276     4.00
4174     4.65
Name: Visibility, Length: 2924, dtype: float64

In [48]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = math.sqrt(mse)

In [49]:
print("MSE Score:", mse)
print("MAE Score:", mae)
print("R2 Score:", r2)
print("RMSE Score:", rmse)

MSE Score: 1.1501444459427568
MAE Score: 0.49556429796817303
R2 Score: 0.4105964988989942
RMSE Score: 1.0724478756297469


In [50]:
regression_params = {
    'model_name': model_name_for_saving,
    'scaler_name': "RobustScaler",
    'features_list': feature_columns_for_saving,
    'target_column': target_name_for_saving,
    'mse': mse,
    'mae': mae,
    'rmse': rmse,
    'r2': r2,
    'parameters': json.dumps(training_model.get_params())
}
save_model_performance_if_better('regression', regression_params)

‚ú® New regression model added:
   MSE: 1.150144, R¬≤: 0.410596
   Features: 27
üíæ Registry updated: Model_Training_History/History_Regression.csv


# All Performance

In [51]:
show_model_history('regression', model_name=model_name_for_saving, target_column=target_name_for_saving)


üìä REGRESSION Model Performance History
        Model         Scaler     Target  Features      MSE      MAE     RMSE       R¬≤
RidgeR Poly 3   MinMaxScaler Visibility        27 1.150144 0.495564 1.072448 0.410596
RidgeR Poly 3   RobustScaler Visibility        27 1.150144 0.495564 1.072448 0.410596
RidgeR Poly 3 StandardScaler Visibility        27 1.159022 0.488060 1.076579 0.406047

üìà Total models shown: 3
üèÜ Best R¬≤ Score: 0.410596 (RidgeR Poly 3 + MinMaxScaler for Visibility)


# <center><font size="50" color="red">Thank You</font></center>