In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from itertools import product

##### Generate the synthetic data

In [2]:
# Generate data
np.random.seed(0)
X = np.random.randn(1000, 10)
y = X @ np.random.randn(10) + np.random.randn(1000)

# Introduce noise in the data
noise_rate = 0.2
y_noisy = y.copy()
n_noisy = int(noise_rate * y.shape[0])
noise_indices = np.random.choice(y.shape[0], n_noisy, replace=False)
y_noisy[noise_indices] += np.random.randn(n_noisy)  # Add noise

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_noisy, test_size=0.2, random_state=42)

##### Implement Canal Adaptive Elastic Net

In [3]:
class CanalAdaptiveElasticNet:
    def __init__(self, lambda1=0.1, lambda2=0.1, gamma=0.1, zeta=0.1, learning_rate=0.01):
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.gamma = gamma
        self.zeta = zeta
        self.learning_rate = learning_rate
        self.beta = None
        self.scaler = StandardScaler()

    def canal_loss(self, z, epsilon, delta):
        return np.minimum(delta, np.maximum(0, np.abs(z) - epsilon))

    def compute_gradient(self, x, z, beta, epsilon, delta):
        if -epsilon - delta < z < -epsilon:
            grad = -x + 2 * self.lambda2 * beta + self.lambda1 * (-self.gamma * np.sign(beta) * np.abs(beta) ** (-self.gamma - 1) * np.abs(beta) + np.sign(beta) * np.abs(beta) ** (-self.gamma))
        elif epsilon < z < epsilon + delta:
            grad = x + 2 * self.lambda2 * beta + self.lambda1 * (-self.gamma * np.sign(beta) * np.abs(beta) ** (-self.gamma - 1) * np.abs(beta) + np.sign(beta) * np.abs(beta) ** (-self.gamma))
        else:
            grad = 2 * self.lambda2 * beta + self.lambda1 * (-self.gamma * np.sign(beta) * np.abs(beta) ** (-self.gamma - 1) * np.abs(beta) + np.sign(beta) * np.abs(beta) ** (-self.gamma))
        return grad

    def fit(self, X, y):
        X = self.scaler.fit_transform(X)
        n_samples, n_features = X.shape
        self.beta = np.random.randn(n_features) * 0.01  # Small random initialization
        
        for t in range(n_samples):
            x_t = X[t]
            y_t = y[t]
            y_pred_t = np.dot(x_t, self.beta)
            z_t = y_pred_t - y_t
            epsilon = self.zeta * np.mean(np.abs(y_pred_t) + np.abs(y_t))
            delta = self.gamma * np.mean(np.abs(y_pred_t) + np.abs(y_t))
            grad = self.compute_gradient(x_t, z_t, self.beta, epsilon, delta)
            self.beta -= self.learning_rate * grad

    def predict(self, X):
        X = self.scaler.transform(X)
        return np.dot(X, self.beta)

#### Perform Hyperparameter Tuning

In [4]:
# Define the grid of hyperparameters
param_grid = {
    'lambda1': [0.01, 0.1, 1],
    'lambda2': [0.01, 0.1, 1],
    'gamma': [0.01, 0.1, 1],
    'zeta': [0.01, 0.1, 1],
    'learning_rate': [0.001, 0.01, 0.1]
}

# Create a list of all combinations of hyperparameters
param_combinations = list(product(param_grid['lambda1'], param_grid['lambda2'], param_grid['gamma'], param_grid['zeta'], param_grid['learning_rate']))

# Initialize variables to store the best hyperparameters and lowest MSE
best_params = None
lowest_mse = float('inf')

# Grid search over all combinations of hyperparameters
for params in param_combinations:
    lambda1, lambda2, gamma, zeta, learning_rate = params
    model = CanalAdaptiveElasticNet(lambda1=lambda1, lambda2=lambda2, gamma=gamma, zeta=zeta, learning_rate=learning_rate)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    if mse < lowest_mse:
        lowest_mse = mse
        best_params = params

# Print the best hyperparameters and the corresponding MSE
print("Best Hyperparameters:", best_params)
print("Lowest MSE:", lowest_mse)

Best Hyperparameters: (0.1, 0.01, 1, 0.01, 0.01)
Lowest MSE: 1.051282785970255


#### Compare Canal-Adaptive Elastic Net to other classical models

In [6]:
from sklearn.linear_model import ElasticNet, Lasso, Ridge

# Train Canal-Adaptive Elastic Net model using the best hyperparameters
model_canal = CanalAdaptiveElasticNet(lambda1=0.1, lambda2=0.01, gamma=1, zeta=0.01, learning_rate=0.01)
model_canal.fit(X_train, y_train)

# Predict and evaluate the Canal-Adaptive Elastic Net model
y_pred_canal = model_canal.predict(X_test)
mse_canal = mean_squared_error(y_test, y_pred_canal)
print("MSE of Canal-Adaptive Elastic Net:", mse_canal)

# Fit ElasticNet
model_elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)  # alpha is the penalty term, l1_ratio controls the mix of L1 vs L2
model_elastic.fit(X_train, y_train)

predicted_elastic = model_elastic.predict(X_test)
mse_elastic = mean_squared_error(y_test, predicted_elastic)
print("MSE of Elastic Net:", mse_elastic)

# Fit Lasso
model_lasso = Lasso(alpha=0.1)
model_lasso.fit(X_train, y_train)

predicted_lasso = model_lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test, predicted_lasso)
print("MSE of Lasso:", mse_lasso)

# Fit Ridge
model_ridge = Ridge(alpha=0.1)
model_ridge.fit(X_train, y_train)

predicted_ridge = model_ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test, predicted_ridge)
print("MSE of Ridge:", mse_ridge)

MSE of Canal-Adaptive Elastic Net: 1.0523101460343023
MSE of Elastic Net: 1.15560906622092
MSE of Lasso: 1.1782830187579945
MSE of Ridge: 1.0232747967279607
