In [32]:
import time
import ray
from operator import itemgetter

import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
# from sklearn.ensemble import RandomForestRegressor
# from xgboost import XGBRegressor
from sklearn.linear_model import ElasticNet
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

### Obtaining Data

In [26]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=201)
X.head(n=5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [28]:
# NUM_MODELS = 20

### Sequential Execution

In [54]:
def train_and_score_model(train_set, test_set, train_labels, test_labels, alpha_value, l1_ratio_value) -> tuple[tuple[float, float], float]:
    start_time = time.time()

    # Elastic Net regression with scaling
    model = make_pipeline(StandardScaler(), ElasticNet(alpha=alpha_value, l1_ratio=l1_ratio_value, random_state=201, max_iter=5000))

    model.fit(train_set, train_labels)
    y_pred = model.predict(test_set)
    mse = mean_squared_error(test_labels, y_pred)
    time_delta = time.time() - start_time

    print(f"alpha={alpha_value}, l1_ratio={l1_ratio_value}, MSE={mse:.4f}, took: {time_delta:.2f} seconds")
    return (alpha_value, l1_ratio_value), mse


def run_sequential(alpha_values, l1_values):
    results = []
    for a in alpha_values:
        for l1 in l1_values:
            results.append(train_and_score_model(X_train, X_test, y_train, y_test, a, l1))
    return results

# Hyperparameter grid to test sequentially
alpha_values = [0.0001, 0.001, 0.01, 0.1]
l1_ratio_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 0.9]

start_time = time.time()
results = run_sequential(alpha_values, l1_ratio_values)
end_time = time.time()

print("\nTotal execution time:", round(end_time - start_time, 4), "seconds")

alpha=0.0001, l1_ratio=0.1, MSE=0.5494, took: 0.22 seconds
alpha=0.0001, l1_ratio=0.2, MSE=0.5494, took: 0.29 seconds
alpha=0.0001, l1_ratio=0.3, MSE=0.5494, took: 0.34 seconds
alpha=0.0001, l1_ratio=0.4, MSE=0.5494, took: 0.28 seconds
alpha=0.0001, l1_ratio=0.5, MSE=0.5494, took: 0.35 seconds
alpha=0.0001, l1_ratio=0.75, MSE=0.5494, took: 0.32 seconds
alpha=0.0001, l1_ratio=0.9, MSE=0.5494, took: 0.27 seconds
alpha=0.001, l1_ratio=0.1, MSE=0.5494, took: 0.29 seconds
alpha=0.001, l1_ratio=0.2, MSE=0.5494, took: 0.29 seconds
alpha=0.001, l1_ratio=0.3, MSE=0.5495, took: 0.30 seconds
alpha=0.001, l1_ratio=0.4, MSE=0.5495, took: 0.26 seconds
alpha=0.001, l1_ratio=0.5, MSE=0.5495, took: 0.29 seconds
alpha=0.001, l1_ratio=0.75, MSE=0.5496, took: 0.31 seconds
alpha=0.001, l1_ratio=0.9, MSE=0.5497, took: 0.30 seconds
alpha=0.01, l1_ratio=0.1, MSE=0.5514, took: 0.30 seconds
alpha=0.01, l1_ratio=0.2, MSE=0.5518, took: 0.27 seconds
alpha=0.01, l1_ratio=0.3, MSE=0.5523, took: 0.28 seconds
alpha=0.

### Parallel Execution

In [52]:
if ray.is_initialized():
    ray.shutdown()
ray.init()

# Put data into Ray's object store
X_train_ref = ray.put(X_train)
X_test_ref  = ray.put(X_test)
y_train_ref = ray.put(y_train)
y_test_ref  = ray.put(y_test)


@ray.remote
def train_and_score_model(X_train_ref, X_test_ref, y_train_ref, y_test_ref, alpha: float, l1_ratio: float) -> tuple[float, float, float]:
    """Training ElasticNet for given (alpha, l1_ratio) and return (alpha, l1_ratio, MSE)."""
    start_time = time.time()

    X_train_local = np.array(X_train_ref.values, copy=True)
    y_train_local = np.array(y_train_ref.values, copy=True).ravel()
    X_test_local  = np.array(X_test_ref.values, copy=True)
    y_test_local  = np.array(y_test_ref.values, copy=True).ravel()

    model = make_pipeline(StandardScaler(), ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=201, max_iter=5000))

    model.fit(X_train_local, y_train_local)
    y_pred = model.predict(X_test_local)
    mse = mean_squared_error(y_test_local, y_pred)

    time_delta = time.time() - start_time
    print(f"alpha={alpha}, l1_ratio={l1_ratio}, MSE={mse:.4f}, took {time_delta:.2f}s")
    return alpha, l1_ratio, mse


def run_parallel(alpha_list, l1_list):
    combos = [(a, l1) for a in alpha_list for l1 in l1_list]
    futures = [train_and_score_model.remote(X_train_ref, X_test_ref, y_train_ref, y_test_ref, alpha=a, l1_ratio=l1) for (a, l1) in combos]
    return ray.get(futures)

# Hyperparameter grid
alpha_values   = [0.0001, 0.001, 0.01, 0.1]
l1_ratio_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 0.9]

start_total = time.time()
results = run_parallel(alpha_values, l1_ratio_values)
end_total = time.time()
print(f"\nTotal parallel execution time: {end_total - start_total:.2f}s")
ray.shutdown()

2025-11-12 23:59:17,185	INFO worker.py:2012 -- Started a local Ray instance.


[36m(train_and_score_model pid=23936)[0m alpha=0.0001, l1_ratio=0.1, MSE=0.5494, took 0.04s
[36m(train_and_score_model pid=23936)[0m alpha=0.01, l1_ratio=0.2, MSE=0.5518, took 0.02s
[36m(train_and_score_model pid=23936)[0m alpha=0.01, l1_ratio=0.3, MSE=0.5523, took 0.01s

Total parallel execution time: 1.46s
[36m(train_and_score_model pid=23938)[0m alpha=0.01, l1_ratio=0.1, MSE=0.5514, took 0.02s[32m [repeated 16x across cluster][0m


Time taken by sequential execution: 8.4826 seconds

Time taken by parallel execution(using ray): 1.46 seconds

The performance gain in terms of time is about 6 times with parallel execution as compared to sequential.