In [2]:
import numpy as np
import pandas as pd

# Load the data
train_data = pd.read_csv("dataset/bank-note/train.csv", header=None)
test_data = pd.read_csv("dataset/bank-note/test.csv", header=None)

# Extract features and labels
X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values
X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values

T = 10  # Maximum number of epochs

distinct_weight_vectors = []
weight_vector_counts = []

for epoch in range(1, T + 1):
    # Initialize the weight vector for this epoch
    weight_vector = np.zeros(X_train.shape[1])
    correctly_predicted_count = 0

    # Shuffle the training data
    shuffle_indices = np.random.permutation(len(X_train))
    X_train_shuffled = X_train[shuffle_indices]
    y_train_shuffled = y_train[shuffle_indices]

    for i in range(len(X_train_shuffled)):
        # Perceptron update
        prediction = np.sign(np.dot(weight_vector, X_train_shuffled[i]))
        if prediction * y_train_shuffled[i] <= 0:
            weight_vector += y_train_shuffled[i] * X_train_shuffled[i]
        else:
            correctly_predicted_count += 1

    # Store the distinct weight vectors and their counts
    if not any(np.array_equal(weight_vector, w) for w in distinct_weight_vectors):
        distinct_weight_vectors.append(weight_vector)
        weight_vector_counts.append(correctly_predicted_count)

# Testing
test_errors = 0
for i in range(len(X_test)):
    votes = [np.sign(np.dot(weight, X_test[i])) for weight in distinct_weight_vectors]
    predicted_label = max(set(votes), key=votes.count)
    if predicted_label != y_test[i]:
        test_errors += 1

# Calculate the average test error
average_test_error = test_errors / len(X_test) * 100

print("Distinct Weight Vectors and Their Counts:")
for weight, count in zip(distinct_weight_vectors, weight_vector_counts):
    print(f"Weight Vector: {weight}, Correctly Predicted: {count}")

print(f"Average Test Error: {average_test_error:.2f}%")


Distinct Weight Vectors and Their Counts:
Weight Vector: [-10.8539997 -11.17523    -3.321218   -7.4427952], Correctly Predicted: 317
Weight Vector: [-18.1891625  -8.671183   -5.5532456 -19.3512206], Correctly Predicted: 318
Weight Vector: [ -9.0522607 -10.649155   -7.010688  -10.3062392], Correctly Predicted: 321
Weight Vector: [ -9.2390727  -9.240155   -9.881265  -10.3453052], Correctly Predicted: 319
Weight Vector: [-12.3147914  -5.455255   -4.350441   -9.6941496], Correctly Predicted: 323
Weight Vector: [-14.0842077  -5.453563   -5.97261   -17.8325452], Correctly Predicted: 307
Weight Vector: [-20.7541767  -3.313665   -3.908658  -19.6545466], Correctly Predicted: 316
Weight Vector: [-14.0486044  -7.485185   -5.258825  -10.7023156], Correctly Predicted: 319
Weight Vector: [-13.5785044 -17.999635    8.701365  -15.7682012], Correctly Predicted: 318
Weight Vector: [-11.0231655 -10.777548   -3.203265   -8.0548356], Correctly Predicted: 324
Average Test Error: 60.40%
