50 Iteration Experiment NON LINEAR

L1 Lasso

In [None]:
import warnings
warnings.simplefilter("ignore", RuntimeWarning)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import statistics
import math

misclassification_rate_l1_lasso = []

# Define the file path
file_path = "C:/Users/91959/Desktop/CODE/Robust-Logistic-Regression-with-Shift-Parameter-Estimation/Robust Logistic Regression [DATA DIRECTORY]/Non-Linear Case/haberman/haberman.data"
column_names = ["Age", "Year", "Axillary_Nodes", "Survival_Status"]
df = pd.read_csv(file_path, header=None, names=column_names)

# Convert Survival_Status to binary labels (1 -> 0, 2 -> 1)
df["Survival_Status"] = df["Survival_Status"].map({1: 0, 2: 1})

# Splitting features and target variable
X = df.drop(columns=["Survival_Status"])
y = df["Survival_Status"]

# Standardize the feature set (zero mean, unit variance)
# scaler = StandardScaler()
# X = scaler.fit_transform(X)
X = X.to_numpy()

def introduce_label_noise(y, noise_percentage=0.1):
    """
    Introduces label noise by flipping a percentage of majority class labels to the minority class.

    Args:
        y (pd.Series): The target variable.
        noise_percentage (float): The percentage of majority class labels to flip (e.g., 0.1 for 10%).

    Returns:
        pd.Series: The target variable with label noise.
    """

    value_counts = y.value_counts()
    majority_class = value_counts.idxmax()
    minority_class = value_counts.idxmin()

    # print("Original class distribution:")
    # print(value_counts)

    majority_indices = y[y == majority_class].index
    num_noise = int(len(majority_indices) * noise_percentage)

    noise_indices = np.random.choice(majority_indices, num_noise, replace=False)

    y_noisy = y.copy()
    y_noisy.loc[noise_indices] = minority_class

    # print("\nClass distribution after introducing noise:")
    # print(y_noisy.value_counts())

    return y_noisy

# ===========================
# SIGMOID FUNCTION
# ===========================
def sigmoid(z):
    """Compute the sigmoid function."""
    return 1 / (1 + np.exp(-z))

# ===========================
# KERNEL FUNCTION
# ===========================
def rbf_kernel(X1, X2, gamma=0.1):
    """
    Compute the RBF kernel matrix between X1 and X2.
    K(x,y) = exp(-gamma * ||x-y||^2)
    """
    # Compute pairwise squared Euclidean distances
    X1_norm = np.sum(X1**2, axis=1).reshape(-1, 1)
    X2_norm = np.sum(X2**2, axis=1).reshape(1, -1)

    # Use broadcasting to compute the squared distances
    distances = X1_norm + X2_norm - 2 * np.dot(X1, X2.T)

    # Apply RBF kernel formula
    return np.exp(-gamma * distances)

# ===========================
# THRESHOLDING FUNCTIONS FOR SHIFT PARAMETERS
# ===========================
def soft_threshold(u, lambda_, a):
    """Soft thresholding function for shift parameter estimation."""
    return a * np.minimum(u + lambda_, 0)

def hard_threshold(u, lambda_, a):
    """Hard thresholding function for shift parameter estimation."""
    return a * u * (u <= -lambda_)

# ===========================
# ROBUST KERNELIZED LOGISTIC REGRESSION WITH SHIFT PARAMETER ESTIMATION
# ===========================
def train_robust_kernel_logistic_regression(X, y, kernel_func=rbf_kernel, kernel_param=0.1,
                                            lr=0.01, epochs=1000, tol=1e-6, lambda_=1.0,
                                            a=2.0, threshold_type='soft'):
    """
    Train robust kernelized logistic regression using gradient descent with shift parameter estimation.

    Following the screenshot:
    - f(x) = β₀ + h(x), where h(x) is in the RKHS induced by kernel K
    - By the representer theorem, f(x) = β₀ + Σᵢ αᵢK(x, xᵢ)
    - We optimize for α and β₀ instead of θ
    """
    m = X.shape[0]

    # Initialize parameters
    beta0 = 0.0
    alpha = np.zeros(m)
    gamma = np.zeros(m)

    # Compute kernel matrix
    K = kernel_func(X, X, gamma=kernel_param)

    prev_loss = float('inf')

    for epoch in range(epochs):
        # Step 1: Update weights (beta0 and alpha) using kernelized logistic regression with offset
        z = beta0 + np.dot(K, alpha)
        u = y * (z - gamma)
        h = sigmoid(z - gamma)

        # Update beta0
        gradient_beta0 = np.mean(h - (y + 1) / 2)
        beta0 -= lr * gradient_beta0

        # Update alpha
        gradient_alpha = np.dot(K, (h - (y + 1) / 2)) / m
        alpha -= lr * gradient_alpha

        # Step 2: Update shift parameters gamma using thresholding
        z = beta0 + np.dot(K, alpha)
        u = y * z
        if threshold_type == 'soft':
            gamma = soft_threshold(u, lambda_, a)
        elif threshold_type == 'hard':
            gamma = hard_threshold(u, lambda_, a)
        else:
            raise ValueError("threshold_type must be 'soft' or 'hard'")

        # Step 3: Compute current loss with L1 penalty
        loss = np.mean(np.log(1 + np.exp(-u + gamma))) + lambda_ * np.sum(np.abs(gamma))

        # Early stopping condition
        if abs(prev_loss - loss) < tol:
            break
        prev_loss = loss

    return beta0, alpha, gamma

# ===========================
# PREDICTION FUNCTION
# ===========================
def predict_robust_kernel_logistic_regression(X_train, X_test, beta0, alpha, kernel_func=rbf_kernel, kernel_param=0.1):
    """
    Make predictions using trained robust kernelized logistic regression model.

    Args:
        X_train: Training data used to compute the kernel function
        X_test: Test data to predict on
        beta0: Bias term
        alpha: Kernel weights
        kernel_func: Kernel function to use
        kernel_param: Parameter for the kernel function
    """
    # Compute kernel matrix between test and training data
    K_test = kernel_func(X_test, X_train, gamma=kernel_param)

    # Compute predictions
    z = beta0 + np.dot(K_test, alpha)
    probabilities = sigmoid(z)

    return np.where(probabilities >= 0.5, 1, -1)

def print_average_with_se(data_list):
    """
    Calculates and prints the average of a list with an error bar of ±SE.

    Args:
        data_list (list): The list of numerical data.
    """

    if not data_list:
        print("Error: Input list is empty.")
        return

    try:
        mean = statistics.mean(data_list)
        stdev = statistics.stdev(data_list)
        se = stdev / math.sqrt(len(data_list))

        print(f"Average: {mean:.4f} ± {se:.4f} SE")

    except statistics.StatisticsError:
        print("Error: Cannot calculate standard deviation. List must contain at least two elements.")
    except TypeError:
        print("Error: List elements must be numerical.")

def cross_validate_parameters(X, y, a_candidates, lambda_candidates, gamma_candidates, threshold_type='soft', n_splits=5):
    """
    Perform cross-validation to select the best parameters a, lambda_, and gamma (kernel parameter).
    """
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    best_a = None
    best_lambda_ = None
    best_gamma = None
    best_error = float('inf')

    for a in a_candidates:
        for lambda_ in lambda_candidates:
            for gamma in gamma_candidates:
                print(f"a: {a}, lambda: {lambda_}, gamma: {gamma}")
                print('---------------------------------------')
                cv_errors = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]

                    # Convert labels to -1 and 1
                    y_train_np = 2 * y_train - 1
                    y_val_np = 2 * y_val - 1

                    # Train robust kernel logistic regression
                    beta0, alpha, gamma_shifts = train_robust_kernel_logistic_regression(
                        X_train, y_train_np, kernel_param=gamma, lr=0.1, epochs=2000,
                        tol=1e-6, lambda_=lambda_, a=a, threshold_type=threshold_type
                    )

                    # Predict on validation set
                    y_pred = predict_robust_kernel_logistic_regression(X_train, X_val, beta0, alpha, kernel_param=gamma)

                    # Compute misclassification rate
                    misclassification_rate = 1 - accuracy_score(y_val_np, y_pred)
                    cv_errors.append(misclassification_rate)

                # Average misclassification rate over the folds
                avg_error = np.mean(cv_errors)

                # Update best parameters if current combination is better
                if avg_error < best_error:
                    best_error = avg_error
                    best_a = a
                    best_lambda_ = lambda_
                    best_gamma = gamma

    return best_a, best_lambda_, best_gamma, best_error

# # Define candidate values for a, lambda_, and gamma (kernel parameter)
# a_candidates = [1, 2, 3, 4, 5, float('inf')]
# lambda_candidates = [0.01, 0.1, 1.0, 10.0]
# gamma_candidates = [0.01, 0.1, 1.0, 10.0]  # RBF kernel parameter

# # Perform cross-validation to select the best a, lambda_, and gamma
# best_a, best_lambda_, best_gamma, best_error = cross_validate_parameters(
#     X, y, a_candidates, lambda_candidates, gamma_candidates, threshold_type='hard'
# )

# print(f"Best a: {best_a}, Best lambda_: {best_lambda_}, Best gamma: {best_gamma}, Best CV error: {best_error}")

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=i)

    # Introduce label noise only to the training set
    y_train_noisy = introduce_label_noise(y_train, noise_percentage=0.1)  # 10% noise

    # Convert labels to -1 and 1 for training and testing sets
    y_train_np = 2 * y_train_noisy.values - 1  # Convert 0 → -1 and 1 → 1
    y_test_np = 2 * y_test.values - 1          # Convert 0 → -1 and 1 → 1

    # Train robust kernel logistic regression on your dataset
    # Best a: 1, Best lambda_: 1.0, Best gamma: 0.1, Best CV error: 0.2548387096774193
    beta0, alpha, gamma = train_robust_kernel_logistic_regression(
        X_train, y_train_np, kernel_param=0.1, lr=0.1, epochs=2000,
        tol=1e-6, lambda_=1.0, a=1, threshold_type="hard"
    )

    # Predict on test set
    y_pred = predict_robust_kernel_logistic_regression(X_train, X_test, beta0, alpha, kernel_param=0.1)

    # Compute misclassification rate
    misclassification_rate = 1 - accuracy_score(y_test_np, y_pred)
    misclassification_rate_l1_lasso.append(misclassification_rate)

print(misclassification_rate_l1_lasso)
print(f"Average misclassification rate over 50 runs L1 Lasso:")
print_average_with_se(misclassification_rate_l1_lasso)

[0.28260869565217395, 0.3152173913043478, 0.30434782608695654, 0.2934782608695652, 0.3369565217391305, 0.26086956521739135, 0.3586956521739131, 0.30434782608695654, 0.26086956521739135, 0.32608695652173914, 0.23913043478260865, 0.28260869565217395, 0.30434782608695654, 0.2717391304347826, 0.34782608695652173, 0.25, 0.34782608695652173, 0.34782608695652173, 0.28260869565217395, 0.3913043478260869, 0.2717391304347826, 0.3695652173913043, 0.2934782608695652, 0.32608695652173914, 0.2717391304347826, 0.2934782608695652, 0.2717391304347826, 0.28260869565217395, 0.30434782608695654, 0.30434782608695654, 0.23913043478260865, 0.21739130434782605, 0.2934782608695652, 0.3152173913043478, 0.2934782608695652, 0.28260869565217395, 0.30434782608695654, 0.2717391304347826, 0.23913043478260865, 0.23913043478260865, 0.2717391304347826, 0.3369565217391305, 0.3369565217391305, 0.25, 0.32608695652173914, 0.32608695652173914, 0.26086956521739135, 0.3586956521739131, 0.32608695652173914, 0.2934782608695652]


---
Elastic Net

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import statistics
import math

misclassification_rate_elastic_net = []

# Define the file path
file_path = "C:/Users/91959/Desktop/CODE/Robust-Logistic-Regression-with-Shift-Parameter-Estimation/Robust Logistic Regression [DATA DIRECTORY]/Non-Linear Case/haberman/haberman.data"
column_names = ["Age", "Year", "Axillary_Nodes", "Survival_Status"]
df = pd.read_csv(file_path, header=None, names=column_names)

# Convert Survival_Status to binary labels (1 -> 0, 2 -> 1)
df["Survival_Status"] = df["Survival_Status"].map({1: 0, 2: 1})

# Splitting features and target variable
X = df.drop(columns=["Survival_Status"])
y = df["Survival_Status"]

# # Standardize the feature set (zero mean, unit variance)
# scaler = StandardScaler()
# X = scaler.fit_transform(X)
X = X.to_numpy()

def introduce_label_noise(y, noise_percentage=0.1):
    """
    Introduces label noise by flipping a percentage of majority class labels to the minority class.
    """
    value_counts = y.value_counts()
    majority_class = value_counts.idxmax()
    minority_class = value_counts.idxmin()

    majority_indices = y[y == majority_class].index
    num_noise = int(len(majority_indices) * noise_percentage)

    noise_indices = np.random.choice(majority_indices, num_noise, replace=False)

    y_noisy = y.copy()
    y_noisy.loc[noise_indices] = minority_class

    return y_noisy

# Methods for LogReg
# ===========================
# SIGMOID FUNCTION
# ===========================
def sigmoid(z):
    """Compute the sigmoid function."""
    return 1 / (1 + np.exp(-z))

# ===========================
# KERNEL FUNCTION
# ===========================
def rbf_kernel(X1, X2, gamma=0.1):
    """
    Compute the RBF kernel matrix between X1 and X2.
    K(x,y) = exp(-gamma * ||x-y||^2)
    """
    # Compute pairwise squared Euclidean distances
    X1_norm = np.sum(X1**2, axis=1).reshape(-1, 1)
    X2_norm = np.sum(X2**2, axis=1).reshape(1, -1)

    # Use broadcasting to compute the squared distances
    distances = X1_norm + X2_norm - 2 * np.dot(X1, X2.T)

    # Apply RBF kernel formula
    return np.exp(-gamma * distances)

# ===========================
# THRESHOLDING FUNCTIONS FOR SHIFT PARAMETERS
# ===========================
def soft_threshold(u, lambda_, a):
    """Soft thresholding function for shift parameter estimation."""
    return a * np.minimum(u + lambda_, 0)

def hard_threshold(u, lambda_, a):
    """Hard thresholding function for shift parameter estimation."""
    return a * u * (u <= -lambda_)

# ===========================
# ROBUST KERNELIZED LOGISTIC REGRESSION WITH ELASTIC NET
# ===========================
def train_robust_kernel_logistic_regression_elastic(X, y, kernel_func=rbf_kernel, kernel_param=0.1,
                                                    lr=0.01, epochs=1000, tol=1e-6, lambda_=1.0,
                                                    alpha=0.5, a=2.0, threshold_type='soft'):
    """
    Train robust kernelized logistic regression using gradient descent with shift parameter estimation and Elastic Net regularization.

    Following the screenshot:
    - f(x) = β₀ + h(x), where h(x) is in the RKHS induced by kernel K
    - By the representer theorem, f(x) = β₀ + Σᵢ αᵢK(x, xᵢ)
    - We optimize for α and β₀ instead of θ with Elastic Net regularization
    """
    m = X.shape[0]

    # Initialize parameters
    beta0 = 0.0
    alpha_coef = np.zeros(m)  # Renamed from alpha to alpha_coef to avoid confusion with Elastic Net alpha parameter
    gamma = np.zeros(m)

    # Compute kernel matrix
    K = kernel_func(X, X, gamma=kernel_param)

    prev_loss = float('inf')

    for epoch in range(epochs):
        # Step 1: Update weights (beta0 and alpha_coef) using kernelized logistic regression with offset
        z = beta0 + np.dot(K, alpha_coef)
        u = y * (z - gamma)
        h = sigmoid(z - gamma)

        # Update beta0
        gradient_beta0 = np.mean(h - (y + 1) / 2)
        beta0 -= lr * gradient_beta0

        # Update alpha_coef with Elastic Net regularization
        gradient_alpha_coef = np.dot(K, (h - (y + 1) / 2)) / m

        # Elastic Net penalty gradient
        l1_grad = alpha * np.sign(alpha_coef)  # L1 (Lasso)
        l2_grad = (1 - alpha) * alpha_coef     # L2 (Ridge)

        # Apply Elastic Net regularization
        gradient_alpha_coef += lambda_ * (l1_grad + l2_grad)

        # Update alpha coefficients
        alpha_coef -= lr * gradient_alpha_coef

        # Step 2: Update shift parameters gamma using thresholding
        z = beta0 + np.dot(K, alpha_coef)
        u = y * z
        if threshold_type == 'soft':
            gamma = soft_threshold(u, lambda_, a)
        elif threshold_type == 'hard':
            gamma = hard_threshold(u, lambda_, a)
        else:
            raise ValueError("threshold_type must be 'soft' or 'hard'")

        # Step 3: Compute current loss with Elastic Net penalty
        l1_term = alpha * np.sum(np.abs(alpha_coef))
        l2_term = (1 - alpha) * np.sum(alpha_coef**2)
        loss = np.mean(np.log(1 + np.exp(-u + gamma))) + lambda_ * (l1_term + l2_term)

        # Early stopping condition
        if abs(prev_loss - loss) < tol:
            break
        prev_loss = loss

    return beta0, alpha_coef, gamma

# ===========================
# PREDICTION FUNCTION
# ===========================
def predict_robust_kernel_logistic_regression(X_train, X_test, beta0, alpha_coef, kernel_func=rbf_kernel, kernel_param=0.1):
    """
    Make predictions using trained robust kernelized logistic regression model.

    Args:
        X_train: Training data used to compute the kernel function
        X_test: Test data to predict on
        beta0: Bias term
        alpha_coef: Kernel weights
        kernel_func: Kernel function to use
        kernel_param: Parameter for the kernel function
    """
    # Compute kernel matrix between test and training data
    K_test = kernel_func(X_test, X_train, gamma=kernel_param)

    # Compute predictions
    z = beta0 + np.dot(K_test, alpha_coef)
    probabilities = sigmoid(z)

    return np.where(probabilities >= 0.5, 1, -1)

def print_average_with_se(data_list):
    """
    Calculates and prints the average of a list with an error bar of ±SE.
    """
    if not data_list:
        print("Error: Input list is empty.")
        return

    try:
        mean = statistics.mean(data_list)
        stdev = statistics.stdev(data_list)
        se = stdev / math.sqrt(len(data_list))

        print(f"Average: {mean:.4f} ± {se:.4f} SE")

    except statistics.StatisticsError:
        print("Error: Cannot calculate standard deviation. List must contain at least two elements.")
    except TypeError:
        print("Error: List elements must be numerical.")

def cross_validate_parameters(X, y, a_candidates, lambda_candidates, alpha_candidates, gamma_candidates, threshold_type='soft', n_splits=5):
    """
    Perform cross-validation to select the best parameters a, lambda_, alpha, and gamma (kernel parameter).
    """
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    best_a = None
    best_lambda_ = None
    best_alpha = None
    best_gamma = None
    best_error = float('inf')

    for a in a_candidates:
        for lambda_ in lambda_candidates:
            for alpha in alpha_candidates:
                for gamma in gamma_candidates:
                    print(f"a: {a}, lambda: {lambda_}, alpha: {alpha}, gamma: {gamma}")
                    print('--------------------------')
                    cv_errors = []
                    for train_index, val_index in kf.split(X):
                        X_train, X_val = X[train_index], X[val_index]
                        y_train, y_val = y[train_index], y[val_index]

                        # Convert labels to -1 and 1
                        y_train_np = 2 * y_train - 1
                        y_val_np = 2 * y_val - 1

                        # Train robust kernel logistic regression with Elastic Net
                        beta0, alpha_coef, gamma_shifts = train_robust_kernel_logistic_regression_elastic(
                            X_train, y_train_np, kernel_param=gamma, lr=0.1, epochs=2000,
                            tol=1e-6, lambda_=lambda_, alpha=alpha, a=a, threshold_type=threshold_type
                        )

                        # Predict on validation set
                        y_pred = predict_robust_kernel_logistic_regression(X_train, X_val, beta0, alpha_coef, kernel_param=gamma)

                        # Compute misclassification rate
                        misclassification_rate = 1 - accuracy_score(y_val_np, y_pred)
                        cv_errors.append(misclassification_rate)

                    # Average misclassification rate over the folds
                    avg_error = np.mean(cv_errors)

                    # Update best parameters if current combination is better
                    if avg_error < best_error:
                        best_error = avg_error
                        best_a = a
                        best_lambda_ = lambda_
                        best_alpha = alpha
                        best_gamma = gamma

    return best_a, best_lambda_, best_alpha, best_gamma, best_error

# # Define candidate values for a, lambda_, alpha, and gamma
# a_candidates = [1, 2, 3, 4, 5, float('inf')]
# lambda_candidates = [0.01, 0.1, 1.0, 10.0]
# alpha_candidates = [0.1, 0.5, 0.9]  # Values for alpha (balance between L1 and L2)
# gamma_candidates = [0.01, 0.1, 1.0, 10.0]  # RBF kernel parameter

# # Perform cross-validation to select the best a, lambda_, alpha, and gamma
# best_a, best_lambda_, best_alpha, best_gamma, best_error = cross_validate_parameters(
#     X, y, a_candidates, lambda_candidates, alpha_candidates, gamma_candidates, threshold_type='hard'
# )

# print(f"Best a: {best_a}, Best lambda_: {best_lambda_}, Best alpha: {best_alpha}, Best gamma: {best_gamma}, Best CV error: {best_error}")

# Now run the 50 iteration experiment with the best parameters
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=i)

    # Introduce label noise only to the training set
    y_train_noisy = introduce_label_noise(y_train, noise_percentage=0.1)  # 10% noise

    # Convert labels to -1 and 1 for training and testing sets
    y_train_np = 2 * y_train_noisy.values - 1  # Convert 0 → -1 and 1 → 1
    y_test_np = 2 * y_test.values - 1          # Convert 0 → -1 and 1 → 1

    # Train robust kernel logistic regression on your dataset with the best parameters
    beta0, alpha_coef, gamma = train_robust_kernel_logistic_regression_elastic(
        X_train, y_train_np, kernel_param=0.01, lr=0.1, epochs=2000,
        tol=1e-6, lambda_=0.01, alpha=0.1, a=1, threshold_type="hard"
    )

    # Predict on test set
    # Best a: 1, Best lambda_: 0.01, Best alpha: 0.1, Best gamma: 0.01, Best CV error: 0.26462189317821255
    y_pred = predict_robust_kernel_logistic_regression(X_train, X_test, beta0, alpha_coef, kernel_param=0.01)

    # Compute misclassification rate
    misclassification_rate = 1 - accuracy_score(y_test_np, y_pred)
    misclassification_rate_elastic_net.append(misclassification_rate)

print(misclassification_rate_elastic_net)
print(f"Average misclassification rate over 50 runs Elastic Net:")
print_average_with_se(misclassification_rate_elastic_net)

[0.18478260869565222, 0.30434782608695654, 0.23913043478260865, 0.23913043478260865, 0.23913043478260865, 0.2282608695652174, 0.28260869565217395, 0.25, 0.21739130434782605, 0.25, 0.2282608695652174, 0.19565217391304346, 0.19565217391304346, 0.19565217391304346, 0.2717391304347826, 0.26086956521739135, 0.21739130434782605, 0.21739130434782605, 0.23913043478260865, 0.28260869565217395, 0.2282608695652174, 0.2934782608695652, 0.2717391304347826, 0.2282608695652174, 0.2065217391304348, 0.2282608695652174, 0.2282608695652174, 0.25, 0.21739130434782605, 0.21739130434782605, 0.23913043478260865, 0.21739130434782605, 0.26086956521739135, 0.23913043478260865, 0.2065217391304348, 0.21739130434782605, 0.21739130434782605, 0.25, 0.1630434782608695, 0.2282608695652174, 0.2282608695652174, 0.18478260869565222, 0.3152173913043478, 0.2065217391304348, 0.25, 0.21739130434782605, 0.2717391304347826, 0.32608695652173914, 0.23913043478260865, 0.28260869565217395]
Average misclassification rate over 50 ru

---

In [3]:
data = {
    'Noise Level': ['5% Noise', '10% Noise'],
    'L1 Lasso': [
        '0.2798 ± 0.0040 SE',
        '0.2983 ± 0.0055 SE'
    ],
    'Elastic Net': [
        '0.2387 ± 0.0043 SE',
        '0.2424 ± 0.0044 SE'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Set the 'Noise Level' column as the index
df.set_index('Noise Level', inplace=True)

# Write to Excel file
df.to_csv('Non-Linear Haberman_Results.csv')

print("Excel file has been created successfully.")

Excel file has been created successfully.
