# One-vs-One Kernel Perceptron with Polynomial Kernel

In [17]:
import numpy as np
from tqdm import tqdm
from tabulate import tabulate

In [18]:
class MultiKernelPerceptronOVO(object):
    def __init__(self, train_data, train_labels, test_data, test_labels, kernel_param, epochs):
        """
        Initialize the One-vs-One Kernel Perceptron.

        Inputs:
        - train_data (ndarray): Training data.
        - train_labels (ndarray): Training labels.
        - test_data (ndarray): Test data.
        - test_labels (ndarray): Test labels.
        - kernel_param (float): Parameter for the kernel (degree for polynomial, sigma for Gaussian).
        - epochs (int): Number of training epochs.
        """
        
        self.train_data = train_data
        self.train_labels = train_labels
        self.test_data = test_data
        self.test_labels = test_labels
        self.epochs = epochs
        self.classes = np.unique(np.append(self.train_labels, self.test_labels))
        self.num_classes = len(self.classes)
        
        self.N = int(self.num_classes * (self.num_classes - 1) / 2) 
        self.classifiers = sorted(self.permutation(self.classes))
        self.kernel_param = kernel_param
        self.K = self.polynomial(self.train_data, self.train_data, self.kernel_param)
        self.K_test = self.polynomial(self.test_data, self.train_data, self.kernel_param)
        self.alpha = np.zeros((self.N, len(self.train_labels))) 
    
    def permutation(self, classes):
        """
        Generate all possible permutations of classes.

        Inputs:
        - classes (ndarray): Array of unique class labels.

        Output:
        - list: List of all possible permutations of classes.
        """
        return [(classes[i], classes[j]) for i in range(len(classes)) for j in range(i+1, len(classes))]

    def polynomial(self, X, Y, degree):
        """Compute the polynomial kernel."""
        return (np.dot(X, Y.T) + 1) ** degree

    def predict(self, i, data):
        """
        Predict the label for a given data sample using the ith classifier.

        Inputs:
        - i (int): Index of the classifier.
        - data (ndarray): Data sample to predict the label for.

        Output:
        - int: Predicted label.
        """
        K = self.K if data == 'train' else self.K_test
        votes = np.sign(np.dot(self.alpha, K[i]))
        individual_votes = [0] * self.num_classes
        for idx, combo in enumerate(self.classifiers):
            vote = combo[0] if votes[idx] > 0 else combo[1]
            individual_votes[int(vote)] += 1
        return np.argmax(individual_votes)
    
    def train(self):
        """
        Train the OvO Kernel Perceptron.
        Updates the alpha matrix for each classifier independently.
        
        Output:
        - float: Training error rate after the final epoch.
        """
        for _ in range(self.epochs):                    
            errors = 0                                  
            for i in range(len(self.train_labels)):       
                y_pred = self.predict(i, 'train')       
                label = int(self.train_labels[i])       
                if int(y_pred) != label:                
                    errors += 1
                    for j in range(self.N):             
                        combo = self.classifiers[j]    
                        if int(label) in combo:         
                            self.alpha[j, i] += 1 if combo[0]==label else -1
            error_rate = (errors/len(self.train_labels)) * 100           
        return error_rate     
    
    def test(self):
        """
        Evaluate the OvO Kernel Perceptron on the test set.
        
        Output:
        - float: Test error rate.
        """
        errors = 0
        for i in range(len(self.test_labels)):
            y_pred = self.predict(i, 'test')
            if y_pred != int(self.test_labels[i]):
                errors += 1
        return (errors / len(self.test_labels)) * 100

In [19]:
# Helper function to split data
def train_test_split(X, y, train_ratio=0.8, shuffle=True, random_seed=None):
    """
    Split dataset into training and testing sets.

    Inputs:
    - X (ndarray): Feature data.
    - y (ndarray): Corresponding labels.
    - train_ratio (float): Proportion of data for training (default is 0.8).
    - shuffle (bool): Whether to shuffle data before splitting (default is True).
    - random_seed (int or None): Random seed for reproducibility (default is None).

    Outputs:
    - Tuple[ndarray, ndarray, ndarray, ndarray]: 
      Training features, test features, training labels, test labels.
    """
    if shuffle:
        np.random.seed(random_seed)
        indices = np.arange(len(X))
        np.random.shuffle(indices)
        X, y = X[indices], y[indices]
    n_train = int(train_ratio * len(X))
    return X[:n_train], X[n_train:], y[:n_train], y[n_train:]

In [20]:
# Load dataset
X, y = np.loadtxt("zipcombo.dat")[:, 1:], np.loadtxt("zipcombo.dat")[:, 0].astype(int)

In [40]:
# Initialize variables
np.random.seed(42) 
train_ratio = 0.8  # Proportion of data used for training
degree_range = np.arange(1, 8)  # Range of polynomial degrees to evaluate
training_errors, training_stddevs, testing_errors, testing_stddevs = [], [], [], []  # Store metrics

# Iterate through polynomial degrees
for degree in degree_range:
    train_errs, test_errs = [], []  # Store errors for each run
    print(f"Running for polynomial degree {degree}...")

    # Perform 20 runs for each degree
    for run_id in tqdm(range(20)):  
        # Split the data into training and testing sets
        train_X, test_X, train_y, test_y = train_test_split(
            X, y, train_ratio=train_ratio, shuffle=True
        )
        # Train the multi-kernel perceptron with the current degree
        clf = MultiKernelPerceptronOVO(
            train_X, train_y, test_X, test_y, kernel_param=degree, epochs=5
        )
        train_e = clf.train()  # Compute training error
        train_errs.append(train_e)
        test_e = clf.test()  # Compute testing error
        test_errs.append(test_e)

    # Store mean and standard deviation of errors
    training_errors.append(np.mean(train_errs))
    training_stddevs.append(np.std(train_errs))
    testing_errors.append(np.mean(test_errs))
    testing_stddevs.append(np.std(test_errs))

    # Print results for the current degree
    print(
        'Polynomial order: ', degree,
        ', mean train error: ', training_errors[-1],
        ', mean test error: ', testing_errors[-1]
    )


Running for polynomial degree 1...


  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [02:09<00:00,  6.45s/it]


Polynomial order:  1 , mean train error:  9.335842968539929 , mean test error:  10.174731182795698
Running for polynomial degree 2...


100%|██████████| 20/20 [02:11<00:00,  6.59s/it]


Polynomial order:  2 , mean train error:  4.747243882764184 , mean test error:  6.819892473118278
Running for polynomial degree 3...


100%|██████████| 20/20 [03:07<00:00,  9.37s/it]


Polynomial order:  3 , mean train error:  2.1759881688625975 , mean test error:  5.513440860215053
Running for polynomial degree 4...


100%|██████████| 20/20 [03:09<00:00,  9.48s/it]


Polynomial order:  4 , mean train error:  1.1427803172895943 , mean test error:  5.39247311827957
Running for polynomial degree 5...


100%|██████████| 20/20 [03:10<00:00,  9.53s/it]


Polynomial order:  5 , mean train error:  0.7945684323742942 , mean test error:  5.14247311827957
Running for polynomial degree 6...


100%|██████████| 20/20 [03:06<00:00,  9.31s/it]


Polynomial order:  6 , mean train error:  0.596262436138747 , mean test error:  4.908602150537634
Running for polynomial degree 7...


100%|██████████| 20/20 [03:10<00:00,  9.54s/it]

Polynomial order:  7 , mean train error:  0.4382898628663619 , mean test error:  4.92741935483871





In [41]:
# Tabulate results
table_data = []
for idx, degree in enumerate(degree_range):
    train_entry = f"{training_errors[idx]:.4f}±{training_stddevs[idx]:.4f}"
    test_entry = f"{testing_errors[idx]:.4f}±{testing_stddevs[idx]:.4f}"
    table_data.append([degree, train_entry, test_entry])

print(tabulate(
    table_data,
    headers=["Degree", "Mean Train Error Rates (%)", "Mean Test Error Rates (%)"],
    tablefmt="simple_outline",
    stralign="center"
))

┌──────────┬──────────────────────────────┬─────────────────────────────┐
│   Degree │  Mean Train Error Rates (%)  │  Mean Test Error Rates (%)  │
├──────────┼──────────────────────────────┼─────────────────────────────┤
│        1 │        9.3358±0.2502         │       10.1747±0.9133        │
│        2 │        4.7472±0.1793         │        6.8199±0.6107        │
│        3 │        2.1760±0.2140         │        5.5134±0.4882        │
│        4 │        1.1428±0.2258         │        5.3925±0.6006        │
│        5 │        0.7946±0.1787         │        5.1425±0.4725        │
│        6 │        0.5963±0.1273         │        4.9086±0.5193        │
│        7 │        0.4383±0.1179         │        4.9274±0.5219        │
└──────────┴──────────────────────────────┴─────────────────────────────┘


In [42]:
# Cross-validation function
def cross_validation_split(features, labels, folds=5):
    """
    Generate k-fold cross-validation splits.

    Inputs:
    - features (ndarray): Feature data.
    - labels (ndarray): Corresponding labels.
    - folds (int): Number of cross-validation folds (default is 5).

    Output:
    - List[Tuple[ndarray, ndarray, ndarray, ndarray]]:
      A list of tuples containing (train_features, valid_features, train_labels, valid_labels).
    """
    split_size = len(features) // folds  # Calculate the size of each fold
    results = []

    # Create cross-validation splits
    for fold in range(folds):
        start, end = fold * split_size, (fold + 1) * split_size
        valid_features = features[start:end]  # Validation features for the current fold
        valid_labels = labels[start:end]  # Validation labels for the current fold
        # Combine remaining data into the training set
        train_features = np.concatenate([features[:start], features[end:]]) if start > 0 else features[end:]
        train_labels = np.concatenate([labels[:start], labels[end:]]) if start > 0 else labels[end:]
        results.append((train_features, valid_features, train_labels, valid_labels))

    return results  # Return the training-validation splits


In [24]:
# Initialization
np.random.seed(42)  
train_ratio = 0.8  
best_degrees, test_errs, train_errs = [], [], []  
degree_candidates = range(1, 8)  # Polynomial degrees to evaluate

# Perform 20 cross-validation iterations
for iteration in tqdm(range(20), desc="Cross-Validation Iterations"):
    # Randomly split the dataset into training and testing sets
    train_X, test_X, train_y, test_y = train_test_split(
        X, y, train_ratio=train_ratio, shuffle=True, seed=iteration
    )

    # Initialize variables to track the best degree
    best_degree, lowest_error = 0, float('inf')

    # Evaluate each polynomial degree
    for degree in degree_candidates:
        print(f"Iteration {iteration + 1}: Evaluating degree {degree}...")
        validation_errors = []  # Store validation errors for each fold

        # Perform 5-fold cross-validation
        splits = cross_validation_split(train_X, train_y, folds=5)
        for fold_idx, (X_train_fold, X_valid_fold, y_train_fold, y_valid_fold) in enumerate(splits):
            # Train the model on the training fold
            model = MultiKernelPerceptronOVO(
                X_train_fold, y_train_fold, X_valid_fold, y_valid_fold,
                kernel_param=degree, epochs=5
            )
            model.train()
            validation_errors.append(model.test())  # Test on the validation fold

        # Compute the mean validation error for the current degree
        mean_validation_error = np.mean(validation_errors)
        print(f"Iteration {iteration + 1}, Degree {degree}: Mean Validation Error = {mean_validation_error:.4f}")

        # Update the best degree if the current degree gives a lower validation error
        if mean_validation_error < lowest_error:
            lowest_error = mean_validation_error
            best_degree = degree

    print(f"Iteration {iteration + 1}: Best Degree = {best_degree}, Validation Error = {lowest_error:.4f}")

    # Retrain the classifier on the entire training set with the best degree
    model = MultiKernelPerceptronOVO(
        train_X, train_y, test_X, test_y,
        kernel_param=best_degree, epochs=5
    )
    train_error = model.train()
    test_error = model.test()
    train_errs.append(train_error)
    test_errs.append(test_error)
    best_degrees.append(best_degree)

    # Print results for the current iteration
    print(f"Iteration {iteration + 1}: Best Degree = {best_degree}, Train Error = {train_error:.4f}, Test Error = {test_error:.4f}")

# Print final results across all iterations
print("\nFinal Results:")
print(f"Mean Best Degree: {np.mean(best_degrees):.2f} ± {np.std(best_degrees):.2f}")
print(f"Mean Train Error: {np.mean(train_errs):.4f} ± {np.std(train_errs):.4f}")
print(f"Mean Test Error: {np.mean(test_errs):.4f} ± {np.std(test_errs):.4f}")


Cross-Validation Runs:   0%|          | 0/20 [00:00<?, ?it/s]

Run 1: Evaluating degree 1...
Run 1, Degree 1: Mean Validation Error = 9.8991
Run 1: Evaluating degree 2...
Run 1, Degree 2: Mean Validation Error = 6.8594
Run 1: Evaluating degree 3...
Run 1, Degree 3: Mean Validation Error = 5.7835
Run 1: Evaluating degree 4...
Run 1, Degree 4: Mean Validation Error = 5.7566
Run 1: Evaluating degree 5...
Run 1, Degree 5: Mean Validation Error = 5.5817
Run 1: Evaluating degree 6...
Run 1, Degree 6: Mean Validation Error = 5.5145
Run 1: Evaluating degree 7...
Run 1, Degree 7: Mean Validation Error = 5.3800
Run 1: Best degree (d*) = 7, Validation Error = 5.3800


Cross-Validation Runs:   5%|▌         | 1/20 [02:56<55:59, 176.79s/it]

Run 1: d* = 7,Train Error Rate = 0.4571, Test Error Rate = 4.1935
Run 2: Evaluating degree 1...
Run 2, Degree 1: Mean Validation Error = 10.8944
Run 2: Evaluating degree 2...
Run 2, Degree 2: Mean Validation Error = 7.7606
Run 2: Evaluating degree 3...
Run 2, Degree 3: Mean Validation Error = 6.2946
Run 2: Evaluating degree 4...
Run 2, Degree 4: Mean Validation Error = 6.3215
Run 2: Evaluating degree 5...
Run 2, Degree 5: Mean Validation Error = 5.8238
Run 2: Evaluating degree 6...
Run 2, Degree 6: Mean Validation Error = 5.6893
Run 2: Evaluating degree 7...
Run 2, Degree 7: Mean Validation Error = 5.5414
Run 2: Best degree (d*) = 7, Validation Error = 5.5414


Cross-Validation Runs:  10%|█         | 2/20 [05:59<54:06, 180.37s/it]

Run 2: d* = 7,Train Error Rate = 0.3496, Test Error Rate = 4.5699
Run 3: Evaluating degree 1...
Run 3, Degree 1: Mean Validation Error = 9.6167
Run 3: Evaluating degree 2...
Run 3, Degree 2: Mean Validation Error = 7.4647
Run 3: Evaluating degree 3...
Run 3, Degree 3: Mean Validation Error = 6.2004
Run 3: Evaluating degree 4...
Run 3, Degree 4: Mean Validation Error = 5.6759
Run 3: Evaluating degree 5...
Run 3, Degree 5: Mean Validation Error = 5.5817
Run 3: Evaluating degree 6...
Run 3, Degree 6: Mean Validation Error = 5.3665
Run 3: Evaluating degree 7...
Run 3, Degree 7: Mean Validation Error = 5.0706
Run 3: Best degree (d*) = 7, Validation Error = 5.0706


Cross-Validation Runs:  15%|█▌        | 3/20 [08:59<51:05, 180.31s/it]

Run 3: d* = 7,Train Error Rate = 0.3764, Test Error Rate = 4.1935
Run 4: Evaluating degree 1...
Run 4, Degree 1: Mean Validation Error = 9.6974
Run 4: Evaluating degree 2...
Run 4, Degree 2: Mean Validation Error = 6.7922
Run 4: Evaluating degree 3...
Run 4, Degree 3: Mean Validation Error = 5.8642
Run 4: Evaluating degree 4...
Run 4, Degree 4: Mean Validation Error = 5.8104
Run 4: Evaluating degree 5...
Run 4, Degree 5: Mean Validation Error = 5.3127
Run 4: Evaluating degree 6...
Run 4, Degree 6: Mean Validation Error = 5.2589
Run 4: Evaluating degree 7...
Run 4, Degree 7: Mean Validation Error = 5.0168
Run 4: Best degree (d*) = 7, Validation Error = 5.0168


Cross-Validation Runs:  20%|██        | 4/20 [11:56<47:44, 179.04s/it]

Run 4: d* = 7,Train Error Rate = 0.4974, Test Error Rate = 5.2151
Run 5: Evaluating degree 1...
Run 5, Degree 1: Mean Validation Error = 10.4909
Run 5: Evaluating degree 2...
Run 5, Degree 2: Mean Validation Error = 7.3436
Run 5: Evaluating degree 3...
Run 5, Degree 3: Mean Validation Error = 6.0794
Run 5: Evaluating degree 4...
Run 5, Degree 4: Mean Validation Error = 5.5952
Run 5: Evaluating degree 5...
Run 5, Degree 5: Mean Validation Error = 5.4472
Run 5: Evaluating degree 6...
Run 5, Degree 6: Mean Validation Error = 5.1782
Run 5: Evaluating degree 7...
Run 5, Degree 7: Mean Validation Error = 5.3127
Run 5: Best degree (d*) = 6, Validation Error = 5.1782


Cross-Validation Runs:  25%|██▌       | 5/20 [14:52<44:28, 177.93s/it]

Run 5: d* = 6,Train Error Rate = 0.5109, Test Error Rate = 4.5161
Run 6: Evaluating degree 1...
Run 6, Degree 1: Mean Validation Error = 10.8541
Run 6: Evaluating degree 2...
Run 6, Degree 2: Mean Validation Error = 7.3436
Run 6: Evaluating degree 3...
Run 6, Degree 3: Mean Validation Error = 6.1735
Run 6: Evaluating degree 4...
Run 6, Degree 4: Mean Validation Error = 5.6490
Run 6: Evaluating degree 5...
Run 6, Degree 5: Mean Validation Error = 5.4203
Run 6: Evaluating degree 6...
Run 6, Degree 6: Mean Validation Error = 5.1110
Run 6: Evaluating degree 7...
Run 6, Degree 7: Mean Validation Error = 5.0572
Run 6: Best degree (d*) = 7, Validation Error = 5.0572


Cross-Validation Runs:  30%|███       | 6/20 [17:51<41:34, 178.18s/it]

Run 6: d* = 7,Train Error Rate = 0.4840, Test Error Rate = 4.9462
Run 7: Evaluating degree 1...
Run 7, Degree 1: Mean Validation Error = 10.2488
Run 7: Evaluating degree 2...
Run 7, Degree 2: Mean Validation Error = 7.3840
Run 7: Evaluating degree 3...
Run 7, Degree 3: Mean Validation Error = 5.9583
Run 7: Evaluating degree 4...
Run 7, Degree 4: Mean Validation Error = 5.5145
Run 7: Evaluating degree 5...
Run 7, Degree 5: Mean Validation Error = 5.5952
Run 7: Evaluating degree 6...
Run 7, Degree 6: Mean Validation Error = 5.0975
Run 7: Evaluating degree 7...
Run 7, Degree 7: Mean Validation Error = 5.1648
Run 7: Best degree (d*) = 6, Validation Error = 5.0975


Cross-Validation Runs:  35%|███▌      | 7/20 [20:53<38:51, 179.33s/it]

Run 7: d* = 6,Train Error Rate = 0.4302, Test Error Rate = 4.8387
Run 8: Evaluating degree 1...
Run 8, Degree 1: Mean Validation Error = 10.0202
Run 8: Evaluating degree 2...
Run 8, Degree 2: Mean Validation Error = 7.4243
Run 8: Evaluating degree 3...
Run 8, Degree 3: Mean Validation Error = 6.3887
Run 8: Evaluating degree 4...
Run 8, Degree 4: Mean Validation Error = 5.8238
Run 8: Evaluating degree 5...
Run 8, Degree 5: Mean Validation Error = 5.5010
Run 8: Evaluating degree 6...
Run 8, Degree 6: Mean Validation Error = 5.2993
Run 8: Evaluating degree 7...
Run 8, Degree 7: Mean Validation Error = 4.9630
Run 8: Best degree (d*) = 7, Validation Error = 4.9630


Cross-Validation Runs:  40%|████      | 8/20 [23:54<36:00, 180.00s/it]

Run 8: d* = 7,Train Error Rate = 0.6857, Test Error Rate = 4.9462
Run 9: Evaluating degree 1...
Run 9, Degree 1: Mean Validation Error = 10.5178
Run 9: Evaluating degree 2...
Run 9, Degree 2: Mean Validation Error = 7.3302
Run 9: Evaluating degree 3...
Run 9, Degree 3: Mean Validation Error = 6.2273
Run 9: Evaluating degree 4...
Run 9, Degree 4: Mean Validation Error = 5.4472
Run 9: Evaluating degree 5...
Run 9, Degree 5: Mean Validation Error = 5.6221
Run 9: Evaluating degree 6...
Run 9, Degree 6: Mean Validation Error = 5.3127
Run 9: Evaluating degree 7...
Run 9, Degree 7: Mean Validation Error = 5.0706
Run 9: Best degree (d*) = 7, Validation Error = 5.0706


Cross-Validation Runs:  45%|████▌     | 9/20 [33:00<53:57, 294.29s/it]

Run 9: d* = 7,Train Error Rate = 0.3092, Test Error Rate = 4.4086
Run 10: Evaluating degree 1...
Run 10, Degree 1: Mean Validation Error = 10.6792
Run 10: Evaluating degree 2...
Run 10, Degree 2: Mean Validation Error = 7.5992
Run 10: Evaluating degree 3...
Run 10, Degree 3: Mean Validation Error = 6.6039
Run 10: Evaluating degree 4...
Run 10, Degree 4: Mean Validation Error = 5.9045
Run 10: Evaluating degree 5...
Run 10, Degree 5: Mean Validation Error = 5.8776
Run 10: Evaluating degree 6...
Run 10, Degree 6: Mean Validation Error = 5.4069
Run 10: Evaluating degree 7...
Run 10, Degree 7: Mean Validation Error = 5.5279
Run 10: Best degree (d*) = 6, Validation Error = 5.4069


Cross-Validation Runs:  50%|█████     | 10/20 [36:07<43:33, 261.36s/it]

Run 10: d* = 6,Train Error Rate = 0.6857, Test Error Rate = 5.3763
Run 11: Evaluating degree 1...
Run 11, Degree 1: Mean Validation Error = 10.0471
Run 11: Evaluating degree 2...
Run 11, Degree 2: Mean Validation Error = 6.8998
Run 11: Evaluating degree 3...
Run 11, Degree 3: Mean Validation Error = 5.9852
Run 11: Evaluating degree 4...
Run 11, Degree 4: Mean Validation Error = 5.6490
Run 11: Evaluating degree 5...
Run 11, Degree 5: Mean Validation Error = 5.3665
Run 11: Evaluating degree 6...
Run 11, Degree 6: Mean Validation Error = 5.3934
Run 11: Evaluating degree 7...
Run 11, Degree 7: Mean Validation Error = 4.9630
Run 11: Best degree (d*) = 7, Validation Error = 4.9630


Cross-Validation Runs:  55%|█████▌    | 11/20 [40:35<39:29, 263.24s/it]

Run 11: d* = 7,Train Error Rate = 0.3361, Test Error Rate = 4.6774
Run 12: Evaluating degree 1...
Run 12, Degree 1: Mean Validation Error = 9.8184
Run 12: Evaluating degree 2...
Run 12, Degree 2: Mean Validation Error = 7.0074
Run 12: Evaluating degree 3...
Run 12, Degree 3: Mean Validation Error = 5.9045
Run 12: Evaluating degree 4...
Run 12, Degree 4: Mean Validation Error = 5.4741
Run 12: Evaluating degree 5...
Run 12, Degree 5: Mean Validation Error = 5.3127
Run 12: Evaluating degree 6...
Run 12, Degree 6: Mean Validation Error = 5.0975
Run 12: Evaluating degree 7...
Run 12, Degree 7: Mean Validation Error = 4.8420
Run 12: Best degree (d*) = 7, Validation Error = 4.8420


Cross-Validation Runs:  60%|██████    | 12/20 [44:50<34:46, 260.81s/it]

Run 12: d* = 7,Train Error Rate = 0.3764, Test Error Rate = 5.0538
Run 13: Evaluating degree 1...
Run 13, Degree 1: Mean Validation Error = 9.5494
Run 13: Evaluating degree 2...
Run 13, Degree 2: Mean Validation Error = 7.0074
Run 13: Evaluating degree 3...
Run 13, Degree 3: Mean Validation Error = 6.0794
Run 13: Evaluating degree 4...
Run 13, Degree 4: Mean Validation Error = 5.6759
Run 13: Evaluating degree 5...
Run 13, Degree 5: Mean Validation Error = 5.6221
Run 13: Evaluating degree 6...
Run 13, Degree 6: Mean Validation Error = 5.6221
Run 13: Evaluating degree 7...
Run 13, Degree 7: Mean Validation Error = 5.3531
Run 13: Best degree (d*) = 7, Validation Error = 5.3531


Cross-Validation Runs:  65%|██████▌   | 13/20 [48:22<28:42, 246.07s/it]

Run 13: d* = 7,Train Error Rate = 0.4033, Test Error Rate = 4.5161
Run 14: Evaluating degree 1...
Run 14, Degree 1: Mean Validation Error = 9.4687
Run 14: Evaluating degree 2...
Run 14, Degree 2: Mean Validation Error = 6.4291
Run 14: Evaluating degree 3...
Run 14, Degree 3: Mean Validation Error = 5.6624
Run 14: Evaluating degree 4...
Run 14, Degree 4: Mean Validation Error = 5.2858
Run 14: Evaluating degree 5...
Run 14, Degree 5: Mean Validation Error = 5.3531
Run 14: Evaluating degree 6...
Run 14, Degree 6: Mean Validation Error = 5.0303
Run 14: Evaluating degree 7...
Run 14, Degree 7: Mean Validation Error = 4.7478
Run 14: Best degree (d*) = 7, Validation Error = 4.7478


Cross-Validation Runs:  70%|███████   | 14/20 [52:19<24:19, 243.33s/it]

Run 14: d* = 7,Train Error Rate = 0.4840, Test Error Rate = 6.0215
Run 15: Evaluating degree 1...
Run 15, Degree 1: Mean Validation Error = 10.3564
Run 15: Evaluating degree 2...
Run 15, Degree 2: Mean Validation Error = 7.2898
Run 15: Evaluating degree 3...
Run 15, Degree 3: Mean Validation Error = 6.3618
Run 15: Evaluating degree 4...
Run 15, Degree 4: Mean Validation Error = 5.7835
Run 15: Evaluating degree 5...
Run 15, Degree 5: Mean Validation Error = 5.5010
Run 15: Evaluating degree 6...
Run 15, Degree 6: Mean Validation Error = 5.0437
Run 15: Evaluating degree 7...
Run 15, Degree 7: Mean Validation Error = 5.3127
Run 15: Best degree (d*) = 6, Validation Error = 5.0437


Cross-Validation Runs:  75%|███████▌  | 15/20 [55:29<18:55, 227.08s/it]

Run 15: d* = 6,Train Error Rate = 0.5378, Test Error Rate = 5.3226
Run 16: Evaluating degree 1...
Run 16, Degree 1: Mean Validation Error = 9.9529
Run 16: Evaluating degree 2...
Run 16, Degree 2: Mean Validation Error = 7.1688
Run 16: Evaluating degree 3...
Run 16, Degree 3: Mean Validation Error = 6.3618
Run 16: Evaluating degree 4...
Run 16, Degree 4: Mean Validation Error = 5.4876
Run 16: Evaluating degree 5...
Run 16, Degree 5: Mean Validation Error = 5.1244
Run 16: Evaluating degree 6...
Run 16, Degree 6: Mean Validation Error = 4.7747
Run 16: Evaluating degree 7...
Run 16, Degree 7: Mean Validation Error = 5.0841
Run 16: Best degree (d*) = 6, Validation Error = 4.7747


Cross-Validation Runs:  80%|████████  | 16/20 [59:50<15:50, 237.50s/it]

Run 16: d* = 6,Train Error Rate = 0.4571, Test Error Rate = 4.3011
Run 17: Evaluating degree 1...
Run 17, Degree 1: Mean Validation Error = 9.9933
Run 17: Evaluating degree 2...
Run 17, Degree 2: Mean Validation Error = 6.9401
Run 17: Evaluating degree 3...
Run 17, Degree 3: Mean Validation Error = 5.7969
Run 17: Evaluating degree 4...
Run 17, Degree 4: Mean Validation Error = 5.3127
Run 17: Evaluating degree 5...
Run 17, Degree 5: Mean Validation Error = 5.3262
Run 17: Evaluating degree 6...
Run 17, Degree 6: Mean Validation Error = 4.7747
Run 17: Evaluating degree 7...
Run 17, Degree 7: Mean Validation Error = 4.7882
Run 17: Best degree (d*) = 6, Validation Error = 4.7747


Cross-Validation Runs:  85%|████████▌ | 17/20 [1:04:04<12:06, 242.25s/it]

Run 17: d* = 6,Train Error Rate = 0.3630, Test Error Rate = 5.1613
Run 18: Evaluating degree 1...
Run 18, Degree 1: Mean Validation Error = 10.0202
Run 18: Evaluating degree 2...
Run 18, Degree 2: Mean Validation Error = 7.0074
Run 18: Evaluating degree 3...
Run 18, Degree 3: Mean Validation Error = 5.8507
Run 18: Evaluating degree 4...
Run 18, Degree 4: Mean Validation Error = 5.5010
Run 18: Evaluating degree 5...
Run 18, Degree 5: Mean Validation Error = 5.2589
Run 18: Evaluating degree 6...
Run 18, Degree 6: Mean Validation Error = 5.0841
Run 18: Evaluating degree 7...
Run 18, Degree 7: Mean Validation Error = 4.8958
Run 18: Best degree (d*) = 7, Validation Error = 4.8958


Cross-Validation Runs:  90%|█████████ | 18/20 [1:07:47<07:53, 236.68s/it]

Run 18: d* = 7,Train Error Rate = 0.3496, Test Error Rate = 4.7312
Run 19: Evaluating degree 1...
Run 19, Degree 1: Mean Validation Error = 10.3968
Run 19: Evaluating degree 2...
Run 19, Degree 2: Mean Validation Error = 7.2226
Run 19: Evaluating degree 3...
Run 19, Degree 3: Mean Validation Error = 6.0928
Run 19: Evaluating degree 4...
Run 19, Degree 4: Mean Validation Error = 5.6759
Run 19: Evaluating degree 5...
Run 19, Degree 5: Mean Validation Error = 5.8776
Run 19: Evaluating degree 6...
Run 19, Degree 6: Mean Validation Error = 5.2858
Run 19: Evaluating degree 7...
Run 19, Degree 7: Mean Validation Error = 5.3531
Run 19: Best degree (d*) = 6, Validation Error = 5.2858


Cross-Validation Runs:  95%|█████████▌| 19/20 [1:11:33<03:53, 233.43s/it]

Run 19: d* = 6,Train Error Rate = 0.5243, Test Error Rate = 4.3548
Run 20: Evaluating degree 1...
Run 20, Degree 1: Mean Validation Error = 10.4775
Run 20: Evaluating degree 2...
Run 20, Degree 2: Mean Validation Error = 6.7922
Run 20: Evaluating degree 3...
Run 20, Degree 3: Mean Validation Error = 5.8373
Run 20: Evaluating degree 4...
Run 20, Degree 4: Mean Validation Error = 5.6355
Run 20: Evaluating degree 5...
Run 20, Degree 5: Mean Validation Error = 5.2051
Run 20: Evaluating degree 6...
Run 20, Degree 6: Mean Validation Error = 4.9227
Run 20: Evaluating degree 7...
Run 20, Degree 7: Mean Validation Error = 4.7209
Run 20: Best degree (d*) = 7, Validation Error = 4.7209


Cross-Validation Runs: 100%|██████████| 20/20 [1:14:38<00:00, 223.94s/it]

Run 20: d* = 7,Train Error Rate = 0.2286, Test Error Rate = 5.4839

Final Results:
Mean d*: 6.65 ± 0.48
Mean Train Error: 0.4423 ± 0.1128
Mean Test Error: 4.8414 ± 0.4750





In [29]:
# Tabulate results
data_table = []
for iteration_idx in range(20):
    train_result = f"{train_errs[iteration_idx]:.4f}"  # Using 'train_errs'
    test_result = f"{test_errs[iteration_idx]:.4f}"    # Using 'test_errs'
    data_table.append([iteration_idx + 1, best_degrees[iteration_idx], train_result, test_result])  # Using 'best_degrees'

# Print the table
print(tabulate(
    data_table,
    headers=["Iteration", "Best Degree (d*)", "Train Error Rate (%)", "Test Error Rate (%)"],
    tablefmt="simple_outline",
    numalign="center"
))


┌───────┬────────────────────┬────────────────────────┬───────────────────────┐
│  Run  │  Best Degree (d*)  │  Train Error Rate (%)  │  Test Error Rate (%)  │
├───────┼────────────────────┼────────────────────────┼───────────────────────┤
│   1   │         7          │         0.4571         │        4.1935         │
│   2   │         7          │         0.3496         │        4.5699         │
│   3   │         7          │         0.3764         │        4.1935         │
│   4   │         7          │         0.4974         │        5.2151         │
│   5   │         6          │         0.5109         │        4.5161         │
│   6   │         7          │         0.484          │        4.9462         │
│   7   │         6          │         0.4302         │        4.8387         │
│   8   │         7          │         0.6857         │        4.9462         │
│   9   │         7          │         0.3092         │        4.4086         │
│  10   │         6          │         0