In [1]:
import pandas as pd

In [2]:
cars= pd.read_csv("auto_mpg_preprocessed.csv")

In [3]:
cars.head(2)

Unnamed: 0,symboling,wheel_base,length,width,height,curb_weight,num_of_cylinders,engine_size,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,88.6,168.8,64.1,48.8,2548,4,130,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0
1,3,88.6,168.8,64.1,48.8,2548,4,130,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0


In [4]:
X= cars.iloc[:,:-1]
y= cars.iloc[:,-1]

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.25, random_state=42)

# Hyper Param Tuning using Combination of Random and GridSearchCV 

In [91]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.linear_model import ElasticNet
# from sklearn.pipeline import Pipeline
# from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [58]:
# # define the model with scaling and elsticnet regression
# model= Pipeline([
#     ("scaler", StandardScaler()),
#     ("regression", ElasticNet())
# ])

In [59]:
# initializing the elsticnet model
elastic_net= ElasticNet()

In [60]:
# Define the param grid for random_search defining Standard scaler and elsticnet params.
param_dist_random = {
    "alpha": np.logspace(-2, 2, 20),  # it will take 20 values in range 0.01 to 100(10^-2 to 10^+2).
    "l1_ratio": np.linspace(0, 1, 20), # its will take 20 equal space values between 0 and 1.
    "normalize": [True, False], # i have used here normalization instead of Standardization
    "fit_intercept": [True, False],
}

Note: Since I have not used Pipeline and I have not definied scaler or regression in the pipeline, I shouldnt give 
      regression__ or scaler__ as prefix in the param_dist_random. If I give it will throw an error.

In [61]:
# Perform Randomized Search
random_search = RandomizedSearchCV(
    elastic_net,
    param_distributions=param_dist_random,
    n_iter=50,  # Number of random samples
    cv=5,       # Cross-validation folds
    scoring="r2",
    random_state=42
)

In [62]:
# Fit the random_search model to the train data
random_search.fit(X_train, y_train)

RandomizedSearchCV(cv=5, estimator=ElasticNet(), n_iter=50,
                   param_distributions={'alpha': array([1.00000000e-02, 1.62377674e-02, 2.63665090e-02, 4.28133240e-02,
       6.95192796e-02, 1.12883789e-01, 1.83298071e-01, 2.97635144e-01,
       4.83293024e-01, 7.84759970e-01, 1.27427499e+00, 2.06913808e+00,
       3.35981829e+00, 5.45559478e+00, 8.85866790e+00, 1.43844989e+01,
       2.33572147e+01, 3.79269019e+01, 6.15848211e+01, 1.00000000e+02]),
                                        'fit_intercept': [True, False],
                                        'l1_ratio': array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ]),
                                        'normalize': [True, False]},
                   random_state=42, scoring='r2')

In [76]:
# Get the best hyperparameters from Randomized Search
best_params_random = random_search.best_params_

In [77]:
best_params_random

{'normalize': False,
 'l1_ratio': 0.631578947368421,
 'fit_intercept': False,
 'alpha': 0.01}

In [83]:
# Define hyperparameter search space for Grid Search around the best parameters from Randomized Search
param_grid_grid = {
    "alpha": [best_params_random["alpha"] - 1.0, 
                             best_params_random["alpha"], best_params_random["alpha"] + 1.0],
    "l1_ratio": [best_params_random["l1_ratio"] - 1.0, 
                             best_params_random["l1_ratio"], best_params_random["l1_ratio"] + 1.0],
    "normalize": [best_params_random["normalize"]],
    "fit_intercept": [True, False],  # i have taken all params from Random search but this intercept i wanted to get from grid search.
}


In [84]:
# Perform Grid Search around the best parameters from Randomized Search
random_grid_search = GridSearchCV(
    elastic_net, 
    param_grid=param_grid_grid,
    cv=5,       # Cross-validation folds
    scoring="r2",
    verbose=1
)

In [85]:
# Fit the grid_Search model to the train data
random_grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


GridSearchCV(cv=5, estimator=ElasticNet(),
             param_grid={'alpha': [-0.99, 0.01, 1.01],
                         'fit_intercept': [True, False],
                         'l1_ratio': [-0.368421052631579, 0.631578947368421,
                                      1.631578947368421],
                         'normalize': [False]},
             scoring='r2', verbose=1)

In [86]:
# Get the best hyperparameters from Grid Search
best_params_grid = random_grid_search.best_params_

In [87]:
best_params_grid 

{'alpha': 0.01,
 'fit_intercept': False,
 'l1_ratio': 0.631578947368421,
 'normalize': False}

In [88]:
# Compare the results
print("Best Hyperparameters from Randomized Search:", best_params_random)
print("Best Hyperparameters from Grid Search:", best_params_grid)
print("Best R-squared Score: ", random_grid_search.best_score_)

Best Hyperparameters from Randomized Search: {'normalize': False, 'l1_ratio': 0.631578947368421, 'fit_intercept': False, 'alpha': 0.01}
Best Hyperparameters from Grid Search: {'alpha': 0.01, 'fit_intercept': False, 'l1_ratio': 0.631578947368421, 'normalize': False}
Best R-squared Score:  0.7345125699399003


In [90]:
# Evaluate on the test set
best_model_grid = random_grid_search.best_estimator_
y_pred= best_model_grid.predict(X_test)

In [92]:
r2_random_grid_cv = r2_score(y_test, y_pred)

In [93]:
r2_random_grid_cv

0.7935976836330596