### Robust Logistic Regression with Shift Parameter Estimation
Python implementation of *Robust Logistic Regression with Shift Parameter Estimation*

#### Experiments On Synthetic Data

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm

In [None]:
def generate_two_class_data(n, p, alpha):
    """
    Generate synthetic two-class data from a multivariate normal distribution.

    Parameters:
    - n: Total number of observations (n/2 for each class).
    - p: Number of features.
    - alpha: Desired Bayes error rate.

    Returns:
    - df: A pandas DataFrame containing the generated data with columns 'x1', 'x2', ..., 'xp', 'y'.
    """
    # Calculate c_alpha
    z_alpha = norm.ppf(1 - alpha)  # Upper quantile of standard normal distribution
    c_alpha = (2 * z_alpha) / np.sqrt(p)

    # Mean vectors for the two classes
    mu_plus = (c_alpha / 2) * np.ones(p)  # Mean for class y = +1
    mu_minus = -(c_alpha / 2) * np.ones(p)  # Mean for class y = -1

    # Covariance matrix (identity matrix)
    cov = np.eye(p)

    # Generate data for each class
    n_half = n // 2
    X_plus = np.random.multivariate_normal(mu_plus, cov, n_half)  # Class y = +1
    X_minus = np.random.multivariate_normal(mu_minus, cov, n_half)  # Class y = -1

    # Combine the data
    X = np.vstack((X_plus, X_minus))
    y = np.hstack((np.ones(n_half), -np.ones(n_half)))

    # Create a pandas DataFrame
    columns = [f'x{i+1}' for i in range(p)]
    df = pd.DataFrame(X, columns=columns)
    df['y'] = y

    return df

In [None]:
n = 200  # Total number of observations
p = 2    # Number of features
alpha = 0.1  # Desired Bayes error rate

# Generate the dataset
df = generate_two_class_data(n, p, alpha)

# Display the first few rows of the dataset
print(df.head())

In [None]:
def add_label_noise(df, noise_rate):
    """
    Add label noise to the dataset by flipping the labels of a specified percentage of observations
    from the -1 class to +1.

    Parameters:
    - df: The pandas DataFrame containing the dataset.
    - noise_rate: The percentage of observations from the -1 class to flip (e.g., 0.05 for 5%).

    Returns:
    - df_noisy: The DataFrame with added label noise.
    """
    # Make a copy of the DataFrame to avoid modifying the original
    df_noisy = df.copy()

    # Identify the indices of the -1 class
    negative_class_indices = df_noisy[df_noisy['y'] == -1].index

    # Randomly select a portion of the -1 class to flip
    num_to_flip = int(noise_rate * len(negative_class_indices))
    flip_indices = np.random.choice(negative_class_indices, size=num_to_flip, replace=False)

    # Flip the labels of the selected observations
    df_noisy.loc[flip_indices, 'y'] = 1

    return df_noisy

In [None]:
df['y'].value_counts()

In [None]:
noise_rate = 0.05  # 5% of the -1 class will be flipped to +1
df_noisy = add_label_noise(df, noise_rate)

In [None]:
df_noisy['y'].value_counts()

### 100-Repetition Experiment with Normal Logistic Regression

In [1]:
import numpy as np
import pandas as pd
import time
from scipy.stats import norm
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# ===========================
# 1. GENERATE SYNTHETIC DATA
# ===========================
def generate_two_class_data(n, p, alpha):
    """
    Generate a two-class dataset with a specified Bayes error rate.

    Parameters:
    - n: Number of observations
    - p: Number of features
    - alpha: Desired Bayes error rate

    Returns:
    - df: A DataFrame containing feature columns and target column 'y'.
    """
    z_alpha = norm.ppf(1 - alpha)
    c_alpha = (2 * z_alpha) / np.sqrt(p)
    mu_plus = (c_alpha / 2) * np.ones(p)
    mu_minus = -(c_alpha / 2) * np.ones(p)
    cov = np.eye(p)
    n_half = n // 2
    X_plus = np.random.multivariate_normal(mu_plus, cov, n_half)
    X_minus = np.random.multivariate_normal(mu_minus, cov, n_half)
    X = np.vstack((X_plus, X_minus))
    y = np.hstack((np.ones(n_half), -np.ones(n_half)))
    df = pd.DataFrame(X, columns=[f'x{i+1}' for i in range(p)])
    df['y'] = y
    return df

# ===========================
# 2. ADD LABEL NOISE FUNCTION
# ===========================
def add_label_noise(df, noise_rate):
    """
    Introduce label noise by flipping a percentage of -1 class labels to +1.

    Parameters:
    - df: DataFrame containing the dataset with class labels
    - noise_rate: Proportion of negative class labels (-1) to flip

    Returns:
    - df_noisy: Modified dataset with noisy labels.
    """
    df_noisy = df.copy()
    negative_class_indices = df_noisy[df_noisy['y'] == -1].index
    num_to_flip = int(noise_rate * len(negative_class_indices))
    flip_indices = np.random.choice(negative_class_indices, size=num_to_flip, replace=False)
    df_noisy.loc[flip_indices, 'y'] = 1  # Flip labels from -1 to +1
    return df_noisy

# ===========================
# 3. SIGMOID FUNCTION
# ===========================
def sigmoid(z):
    """Compute the sigmoid function."""
    return 1 / (1 + np.exp(-z))

# ===========================
# 4. IMPLEMENT LOGISTIC REGRESSION TRAINING WITH EARLY STOPPING
# ===========================
def train_logistic_regression(X, y, lr=0.01, epochs=1000, tol=1e-6):
    """
    Train logistic regression using gradient descent with early stopping.

    Parameters:
    - X: Feature matrix (including bias column)
    - y: Target values (-1 or +1)
    - lr: Learning rate
    - epochs: Max number of iterations
    - tol: Tolerance for early stopping (if loss change < tol, stop training)

    Returns:
    - theta: Optimized weight parameters
    """
    m, n = X.shape  # Samples, features
    theta = np.zeros(n)  # Initialize weights
    prev_loss = float('inf')  # Initialize previous loss for tracking

    start_time = time.time()  # Start timer

    for epoch in range(epochs):
        # Compute margin (u = y * f(x))
        z = np.dot(X, theta)
        u = y * z

        # Compute predictions using the sigmoid function
        h = sigmoid(z)

        # Compute gradient
        gradient = np.dot(X.T, (h - (y + 1) / 2)) / m

        # Update weights
        theta -= lr * gradient

        # Compute current loss
        loss = np.mean(np.log(1 + np.exp(-u)))

        # Print loss every 100 epochs
#         if epoch % 100 == 0:
#             print(f"Epoch {epoch}: Loss = {loss:.6f}")

        # Early stopping condition: If loss improvement is too small, stop training
        if abs(prev_loss - loss) < tol:
#             print(f"Early stopping at epoch {epoch}: Loss improvement below {tol}")
            break

        prev_loss = loss  # Update previous loss

    end_time = time.time()
    training_time = end_time - start_time
    return theta, epoch + 1, training_time

# ===========================
# 5. PREDICTION FUNCTION
# ===========================
def predict_logistic_regression(X, theta):
    """
    Make predictions using trained logistic regression model.

    Parameters:
    - X: Feature matrix
    - theta: Trained model parameters

    Returns:
    - Predicted class labels (-1 or 1)
    """
    probabilities = sigmoid(np.dot(X, theta))
    return np.where(probabilities >= 0.5, 1, -1)  # Convert to {-1,1}

# ===========================
# 6. CALCULATE AVERAGE TEST MISCLASSIFICATION RATE
# ===========================
def calculate_avg_metrics(n, p, alpha, noise_rate, n_repetitions=100):
    """
    Run multiple iterations of training/testing and compute the average misclassification rate.

    Parameters:
    - n: Number of observations
    - p: Number of features
    - alpha: Desired Bayes error rate
    - noise_rate: Proportion of -1 class flipped to +1
    - n_repetitions: Number of iterations for averaging results

    Returns:
    - avg_misclassification_rate: Mean test misclassification rate over `n_repetitions`.
    """
    misclassification_rates = []
    training_times = []
    convergence_iters = []
    precisions = []
    recalls = []
    f1s = []
    aucs = []

    for _ in range(n_repetitions):
        # Step 1: Generate clean dataset
        df = generate_two_class_data(n, p, alpha)

        # Step 2: Add label noise
        df_noisy = add_label_noise(df, noise_rate)

        # Step 3: Split into training and test sets
        X = df_noisy.drop(columns=['y']).values
        y = df_noisy['y'].values
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Step 4: Train logistic regression with early stopping
        theta, num_iters, train_time = train_logistic_regression(X_train, y_train, lr=0.1, epochs=5000, tol=1e-6)

        # Step 5: Predict on test set
        y_pred = predict_logistic_regression(X_test, theta)
        y_scores = sigmoid(np.dot(X_test, theta))  # Probabilities for AUC

        # Step 6: Compute misclassification rate
        misclassification_rate = 1 - accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, pos_label=1)
        recall = recall_score(y_test, y_pred, pos_label=1)
        f1 = f1_score(y_test, y_pred, pos_label=1)
        auc = roc_auc_score((y_test + 1) // 2, y_scores)  # Convert {-1,1} → {0,1} for AUC

        misclassification_rates.append(misclassification_rate)
        training_times.append(train_time)
        convergence_iters.append(num_iters)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        aucs.append(auc)

    # # Step 7: Compute average misclassification rate
    # avg_misclassification_rate = np.mean(misclassification_rates)
    return {
        'avg_misclassification_rate': np.mean(misclassification_rates),
        'avg_precision': np.mean(precisions),
        'avg_recall': np.mean(recalls),
        'avg_f1': np.mean(f1s),
        'avg_auc': np.mean(aucs),
        'avg_training_time': np.mean(training_times),
        'avg_iterations': np.mean(convergence_iters)
    }
# ===========================
# 7. RUN EXPERIMENT
# ===========================
# Parameters
n = 200  # Number of observations
p = 20   # Number of features
alpha = 0.1  # Desired Bayes error rate
noise_rate = 0.2  # 20% of -1 class will be flipped to +1

results = calculate_avg_metrics(n, p, alpha, noise_rate, n_repetitions=100)

print(f"Average Test Misclassification Rate: {results['avg_misclassification_rate']:.4f}")
print(f"Average Precision: {results['avg_precision']:.4f}")
print(f"Average Recall: {results['avg_recall']:.4f}")
print(f"Average F1-Score: {results['avg_f1']:.4f}")
print(f"Average AUC: {results['avg_auc']:.4f}")
print(f"Average Training Time (s): {results['avg_training_time']:.4f}")
print(f"Average Iterations Until Convergence: {results['avg_iterations']:.2f}")

Average Test Misclassification Rate: 0.2360
Average Precision: 0.8731
Average Recall: 0.7214
Average F1-Score: 0.7879
Average AUC: 0.8456
Average Training Time (s): 0.0505
Average Iterations Until Convergence: 649.25


### 100-Repetition Experiment with Robust Logistic Regression With Shift Parameter Estimation. 'L1 Lasso'

In [2]:
import numpy as np
import pandas as pd
import time
from scipy.stats import norm
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# ===========================
# 1. GENERATE SYNTHETIC DATA
# ===========================
def generate_two_class_data(n, p, alpha):
    """
    Generate a two-class dataset with a specified Bayes error rate.
    """
    z_alpha = norm.ppf(1 - alpha)
    c_alpha = (2 * z_alpha) / np.sqrt(p)
    mu_plus = (c_alpha / 2) * np.ones(p)
    mu_minus = -(c_alpha / 2) * np.ones(p)
    cov = np.eye(p)
    n_half = n // 2
    X_plus = np.random.multivariate_normal(mu_plus, cov, n_half)
    X_minus = np.random.multivariate_normal(mu_minus, cov, n_half)
    X = np.vstack((X_plus, X_minus))
    y = np.hstack((np.ones(n_half), -np.ones(n_half)))
    df = pd.DataFrame(X, columns=[f'x{i+1}' for i in range(p)])
    df['y'] = y
    return df

# ===========================
# 2. ADD LABEL NOISE FUNCTION
# ===========================
def add_label_noise(df, noise_rate):
    """
    Introduce label noise by flipping a percentage of -1 class labels to +1.
    """
    df_noisy = df.copy()
    negative_class_indices = df_noisy[df_noisy['y'] == -1].index
    num_to_flip = int(noise_rate * len(negative_class_indices))
    flip_indices = np.random.choice(negative_class_indices, size=num_to_flip, replace=False)
    df_noisy.loc[flip_indices, 'y'] = 1  # Flip labels from -1 to +1
    return df_noisy

# ===========================
# 3. SIGMOID FUNCTION
# ===========================
def sigmoid(z):
    """Compute the sigmoid function."""
    return 1 / (1 + np.exp(-z))

# ===========================
# 4. THRESHOLDING FUNCTIONS FOR SHIFT PARAMETERS
# ===========================
def soft_threshold(u, lambda_, a):
    """Soft thresholding function for shift parameter estimation."""
    return a * np.minimum(u + lambda_, 0)

def hard_threshold(u, lambda_, a):
    """Hard thresholding function for shift parameter estimation."""
    return a * u * (u <= -lambda_)

# ===========================
# 5. ROBUST LOGISTIC REGRESSION WITH SHIFT PARAMETER ESTIMATION
# ===========================
def train_robust_logistic_regression(X, y, lr=0.01, epochs=1000, tol=1e-6, lambda_=1.0, a=2.0, threshold_type='soft'):
    """
    Train robust logistic regression using gradient descent with shift parameter estimation.
    """
    m, n = X.shape
    theta = np.zeros(n)
    gamma = np.zeros(m)
    prev_loss = float('inf')
    start_time = time.time()  # Start timer

    for epoch in range(epochs):
        # Step 1: Update weights theta using logistic regression with offset
        z = np.dot(X, theta)
        u = y * (z - gamma)
        h = sigmoid(z - gamma)
        gradient_theta = np.dot(X.T, (h - (y + 1) / 2)) / m
        theta -= lr * gradient_theta

        # Step 2: Update shift parameters gamma using thresholding
        z = np.dot(X, theta)
        u = y * z
        if threshold_type == 'soft':
            gamma = soft_threshold(u, lambda_, a)
        elif threshold_type == 'hard':
            gamma = hard_threshold(u, lambda_, a)
        else:
            raise ValueError("threshold_type must be 'soft' or 'hard'")

        # Step 3: Compute current loss with L1 penalty
        loss = np.mean(np.log(1 + np.exp(-u + gamma))) + lambda_ * np.sum(np.abs(gamma))

        # Early stopping condition
        if abs(prev_loss - loss) < tol:
#             print(f"Early stopping at epoch {epoch}: Loss improvement below {tol}")
            break
        prev_loss = loss

    end_time = time.time()
    training_time = end_time - start_time
    return theta, epoch + 1, training_time, gamma

# ===========================
# 6. PREDICTION FUNCTION
# ===========================
def predict_robust_logistic_regression(X, theta):
    """
    Make predictions using trained robust logistic regression model.
    """
    probabilities = sigmoid(np.dot(X, theta))
    return np.where(probabilities >= 0.5, 1, -1)

# ===========================
# 7. CALCULATE AVERAGE TEST MISCLASSIFICATION RATE
# ===========================
def calculate_avg_metrics_l1(n, p, alpha, noise_rate, lambda_, a, threshold_type="soft", n_repetitions=100):
    """
    Run multiple iterations of training/testing and compute the average misclassification rate.
    """
    misclassification_rates = []
    training_times = []
    convergence_iters = []
    precisions = []
    recalls = []
    f1s = []
    aucs = []

    for _ in range(n_repetitions):
        df = generate_two_class_data(n, p, alpha)
        df_noisy = add_label_noise(df, noise_rate)

        X = df_noisy.drop(columns=['y']).values
        y = df_noisy['y'].values
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Train robust logistic regression
        theta, num_iters, train_time, gamma = train_robust_logistic_regression(X_train, y_train, lr=0.1, epochs=5000, tol=1e-6, lambda_=lambda_, a=a, threshold_type=threshold_type)

        # Predict on test set
        y_pred = predict_robust_logistic_regression(X_test, theta)
        y_scores = sigmoid(np.dot(X_test, theta))

        # Compute misclassification rate
        misclassification_rate = 1 - accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, pos_label=1)
        recall = recall_score(y_test, y_pred, pos_label=1)
        f1 = f1_score(y_test, y_pred, pos_label=1)
        auc = roc_auc_score((y_test + 1) // 2, y_scores)  # {-1, 1} → {0, 1}

        misclassification_rates.append(misclassification_rate)
        training_times.append(train_time)
        convergence_iters.append(num_iters)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        aucs.append(auc)

    # # Compute average misclassification rate
    # avg_misclassification_rate = np.mean(misclassification_rates)
    return {
        'avg_misclassification_rate': np.mean(misclassification_rates),
        'avg_precision': np.mean(precisions),
        'avg_recall': np.mean(recalls),
        'avg_f1': np.mean(f1s),
        'avg_auc': np.mean(aucs),
        'avg_training_time': np.mean(training_times),
        'avg_iterations': np.mean(convergence_iters)
    }

# ===========================
# 8. RUN EXPERIMENT
# ===========================
# Parameters
n = 200  # Number of observations
p = 20    # Number of features
alpha = 0.1  # Desired Bayes error rate
noise_rate = 0.2  # 20% of -1 class will be flipped to +1
lambda_ = 0.1  # Threshold parameter
a = 2.0  # Multiplicative factor for shift parameter estimation
threshold_type = "hard"  # Choose "soft" or "hard"

# Run experiment
# avg_misclassification_rate = calculate_avg_misclassification_rate(n, p, alpha, noise_rate, lambda_, a, threshold_type)
# print(f"Average test misclassification rate over 100 repetitions: {avg_misclassification_rate:.4f}")

# results = calculate_avg_misclassification_rate(n, p, alpha, noise_rate, lambda_, a, threshold_type, n_repetitions=100)

# print(f"Average Test Misclassification Rate: {results['avg_misclassification_rate']:.4f}")
# print(f"Average Training Time (s): {results['avg_training_time']:.4f}")
# print(f"Average Iterations Until Convergence: {results['avg_iterations']:.2f}")

results = calculate_avg_metrics_l1(n, p, alpha, noise_rate, lambda_, a, threshold_type, n_repetitions=100)

print(f"Average Test Misclassification Rate: {results['avg_misclassification_rate']:.4f}")
print(f"Average Precision: {results['avg_precision']:.4f}")
print(f"Average Recall: {results['avg_recall']:.4f}")
print(f"Average F1-Score: {results['avg_f1']:.4f}")
print(f"Average AUC: {results['avg_auc']:.4f}")
print(f"Average Training Time (s): {results['avg_training_time']:.4f}")
print(f"Average Iterations Until Convergence: {results['avg_iterations']:.2f}")

Average Test Misclassification Rate: 0.2283
Average Precision: 0.8844
Average Recall: 0.7267
Average F1-Score: 0.7955
Average AUC: 0.8563
Average Training Time (s): 0.5821
Average Iterations Until Convergence: 4984.08


### 100-Repetition Experiment with Robust Logistic Regression With Shift Parameter Estimation. 'Elastic Net'

In [3]:
import numpy as np
import pandas as pd
import time
from scipy.stats import norm
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def update_gamma_elastic_net(y, X, theta, gamma, lambda_, alpha, lr_gamma=0.1):
    """
    Update gamma using proximal gradient descent with Elastic Net regularization.
    Applies one update step for all gamma_i.
    """
    z = np.dot(X, theta)
    margin = y * (z + gamma)
    grad_gamma = -y * sigmoid(-margin)  # dL/dγ
    gamma -= lr_gamma * grad_gamma

    # Apply Elastic Net penalty
    l1_component = alpha * lambda_
    l2_component = (1 - alpha) * lambda_
    gamma = np.sign(gamma) * np.maximum(np.abs(gamma) - l1_component * lr_gamma, 0) / (1 + l2_component * lr_gamma)

    return gamma

# ===========================
# 1. GENERATE SYNTHETIC DATA
# ===========================
def generate_two_class_data(n, p, alpha):
    """Generate a two-class dataset with a specified Bayes error rate."""
    z_alpha = norm.ppf(1 - alpha)
    c_alpha = (2 * z_alpha) / np.sqrt(p)
    mu_plus = (c_alpha / 2) * np.ones(p)
    mu_minus = -(c_alpha / 2) * np.ones(p)
    cov = np.eye(p)
    n_half = n // 2
    X_plus = np.random.multivariate_normal(mu_plus, cov, n_half)
    X_minus = np.random.multivariate_normal(mu_minus, cov, n_half)
    X = np.vstack((X_plus, X_minus))
    y = np.hstack((np.ones(n_half), -np.ones(n_half)))
    df = pd.DataFrame(X, columns=[f'x{i+1}' for i in range(p)])
    df['y'] = y
    return df

# ===========================
# 2. ADD LABEL NOISE FUNCTION
# ===========================
def add_label_noise(df, noise_rate):
    """Introduce label noise by flipping a percentage of -1 class labels to +1."""
    df_noisy = df.copy()
    negative_class_indices = df_noisy[df_noisy['y'] == -1].index
    num_to_flip = int(noise_rate * len(negative_class_indices))
    flip_indices = np.random.choice(negative_class_indices, size=num_to_flip, replace=False)
    df_noisy.loc[flip_indices, 'y'] = 1
    return df_noisy

# ===========================
# 3. SIGMOID FUNCTION
# ===========================
def sigmoid(z):
    """Compute the sigmoid function."""
    return 1 / (1 + np.exp(-z))

# ===========================
# 4. THRESHOLDING FUNCTIONS FOR SHIFT PARAMETERS
# ===========================
def soft_threshold(u, lambda_, a):
    """Soft thresholding function for shift parameter estimation."""
    return a * np.minimum(u + lambda_, 0)

def hard_threshold(u, lambda_, a):
    """Hard thresholding function for shift parameter estimation."""
    return a * u * (u <= -lambda_)

# ===========================
# 5. ROBUST LOGISTIC REGRESSION WITH ELASTIC NET
# ===========================
def train_robust_logistic_regression(X, y, lr=0.01, epochs=1000, tol=1e-6, lambda_=1.0, alpha=0.5, lr_gamma=0.1):
    """
    Train robust logistic regression with Elastic Net regularization on shift parameters gamma.
    """
    m, n = X.shape
    theta = np.zeros(n)
    gamma = np.zeros(m)
    prev_loss = float('inf')
    start_time = time.time()

    for epoch in range(epochs):
        # Step 1: Update theta (standard logistic regression with shift correction)
        z = np.dot(X, theta)
        margin = y * (z + gamma)
        h = sigmoid(z + gamma)
        grad_theta = np.dot(X.T, (h - (y + 1) / 2)) / m
        theta -= lr * grad_theta

        # Step 2: Update gamma via Elastic Net proximal update
        gamma = update_gamma_elastic_net(y, X, theta, gamma, lambda_, alpha, lr_gamma=lr_gamma)

        # Step 3: Compute full objective
        margin = y * (np.dot(X, theta) + gamma)
        logistic_loss = np.mean(np.log(1 + np.exp(-margin)))
        l1_term = alpha * np.sum(np.abs(gamma))
        l2_term = (1 - alpha) * np.sum(gamma**2)
        loss = logistic_loss + lambda_ * (l1_term + 0.5 * l2_term)

        if abs(prev_loss - loss) < tol:
            break
        prev_loss = loss

    training_time = time.time() - start_time
    return theta, epoch + 1, training_time, gamma

# ===========================
# 6. PREDICTION FUNCTION
# ===========================
def predict_robust_logistic_regression(X, theta):
    """Make predictions using trained robust logistic regression model."""
    probabilities = sigmoid(np.dot(X, theta))
    return np.where(probabilities >= 0.5, 1, -1)

# ===========================
# 7. CALCULATE AVERAGE TEST MISCLASSIFICATION RATE
# ===========================
def calculate_avg_metrics_elastic(n, p, alpha, noise_rate, lambda_, alpha_elastic, gamma_learning_rate ,n_repetitions=100):
    """
    Run multiple iterations of training/testing and compute the average misclassification rate.
    """
    misclassification_rates = []
    training_times = []
    convergence_iters = []
    precisions = []
    recalls = []
    f1s = []
    aucs = []

    for _ in range(n_repetitions):
        df = generate_two_class_data(n, p, alpha)
        df_noisy = add_label_noise(df, noise_rate)

        X = df_noisy.drop(columns=['y']).values
        y = df_noisy['y'].values
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Train robust logistic regression with Elastic Net
        theta, num_iters, train_time, gamma = train_robust_logistic_regression(X_train, y_train, lr=0.1, epochs=5000, tol=1e-6, lambda_=lambda_, alpha=alpha_elastic, lr_gamma=gamma_learning_rate)

        # Predict on test set
        y_pred = predict_robust_logistic_regression(X_test, theta)
        y_scores = sigmoid(np.dot(X_test, theta))

        # Compute misclassification rate
        misclassification_rate = 1 - accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, pos_label=1)
        recall = recall_score(y_test, y_pred, pos_label=1)
        f1 = f1_score(y_test, y_pred, pos_label=1)
        auc = roc_auc_score((y_test + 1) // 2, y_scores)  # {-1, 1} → {0, 1}

        misclassification_rates.append(misclassification_rate)
        training_times.append(train_time)
        convergence_iters.append(num_iters)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        aucs.append(auc)

    return {
        'avg_misclassification_rate': np.mean(misclassification_rates),
        'avg_precision': np.mean(precisions),
        'avg_recall': np.mean(recalls),
        'avg_f1': np.mean(f1s),
        'avg_auc': np.mean(aucs),
        'avg_training_time': np.mean(training_times),
        'avg_iterations': np.mean(convergence_iters)
    }

# ===========================
# 8. RUN EXPERIMENT
# ===========================
# Parameters
n = 200  # Number of observations
p = 20    # Number of features
alpha = 0.1  # Desired Bayes error rate
noise_rate = 0.2  # 20% of -1 class will be flipped to +1
lambda_ = 0.1  # Regularization parameter
alpha_elastic = 0.1  # Mixing parameter (0 = Ridge, 1 = Lasso)
gamma_lr = 0.1

# results = calculate_avg_misclassification_rate(n, p, alpha, noise_rate, lambda_, alpha_elastic, gamma_learning_rate=gamma_lr ,n_repetitions=100)

# print(f"Average Test Misclassification Rate: {results['avg_misclassification_rate']:.4f}")
# print(f"Average Training Time (s): {results['avg_training_time']:.4f}")
# print(f"Average Iterations Until Convergence: {results['avg_iterations']:.2f}")

results = calculate_avg_metrics_elastic(n, p, alpha, noise_rate, lambda_, alpha_elastic, gamma_learning_rate=gamma_lr, n_repetitions=100)

print(f"Average Test Misclassification Rate: {results['avg_misclassification_rate']:.4f}")
print(f"Average Precision: {results['avg_precision']:.4f}")
print(f"Average Recall: {results['avg_recall']:.4f}")
print(f"Average F1-Score: {results['avg_f1']:.4f}")
print(f"Average AUC: {results['avg_auc']:.4f}")
print(f"Average Training Time (s): {results['avg_training_time']:.4f}")
print(f"Average Iterations Until Convergence: {results['avg_iterations']:.2f}")

Average Test Misclassification Rate: 0.2237
Average Precision: 0.8790
Average Recall: 0.7393
Average F1-Score: 0.8011
Average AUC: 0.8544
Average Training Time (s): 0.0663
Average Iterations Until Convergence: 867.99


# Results Section

In [None]:
import pandas as pd

# Data for Normal Logistic Regression
normal_data = {
    "Noise Level": ["5% Noise", "10% Noise", "20% Noise"],
    "Misclassification Rate": [0.1562, 0.1803, 0.2285]
}

# Data for Robust Logistic Regression with L1 Lasso
robust_data = {
    "Noise Level": ["5% Noise", "10% Noise", "20% Noise"],
    "Misclassification Rate": [0.1512, 0.1740, 0.2177]
}

# Data for Robust Logistic Regression with Elastic Net Regularizer
elastic_data = {
    "Noise Level": ["5% Noise", "10% Noise", "20% Noise"],
    "Misclassification Rate": [0.1362, 0.1573, 0.1937]
}

# Create DataFrames
normal_df = pd.DataFrame(normal_data)
robust_df = pd.DataFrame(robust_data)
elastic_df = pd.DataFrame(elastic_data)

# Add a column to distinguish between Normal and Robust
normal_df["Model"] = "Normal Logistic Regression"
robust_df["Model"] = "Robust Logistic Regression (L1 Lasso)"
elastic_df["Model"] = "Robust Logistic Regression (Elastic Net)"

# Combine the two DataFrames
results_df = pd.concat([normal_df, robust_df, elastic_df], ignore_index=True)

# Reorder columns for better readability
results_df = results_df[["Model", "Noise Level", "Misclassification Rate"]]

# Print the DataFrame
results_df

---