### Instructions on how to run the notebook:
#### Perform classification with the best model on the validation and test sets
1. Import required libraries by running the first block
2. Run the second block to prepare the data sets
3. Assuming the the file weights.npy and alpha.npy are in the same directory as this notebook, run the fourth block to perform classification on both the validation and test sets using our chosen best model
#### Perform validation to choose the best model
1. Import required libraries by running the first block
2. Run the second block to prepare the data sets
3. Run the third block to perform validation and choose the best model

# Import needed libraries and prepare the data sets (training, validation, test)

In [2]:
from perceptron import Perceptron
import numpy as np
from sklearn.datasets import load_digits

from sklearn.model_selection import train_test_split

In [3]:
X, y = load_digits(return_X_y=True, n_class=2)

# Split in 60/40
Xtrain, Xdata, yTrain, yData = train_test_split(X, y * 2 - 1, test_size=0.4, random_state=42, stratify=(y * 2 - 1))
# Split in 50/50 the remaining 40%
Xval, Xtest, yVal, yTest = train_test_split(Xdata, yData, test_size=0.5, random_state=42, stratify=yData)
np.random.seed(42)

# Validation of perceptron

In [4]:
accuracies = []
for i in range(100):
    initial_weights = np.random.randn(Xtrain.shape[1] + 1)
    for alpha in [10**-5, 10**-4, 10**-3, 10**-2, 10**-1]:
        perceptron = Perceptron(alpha)
        iterations = perceptron.train(Xtrain, yTrain, initial_weights)
        predictions = perceptron.predict(Xval)
        accuracy = np.mean(predictions == yVal)
        margin = perceptron.compute_min_margin(Xval)
        accuracies.append((initial_weights, alpha, accuracy, margin, iterations))
        
# Find the maximum accuracy
max_accuracy = max(accuracies, key=lambda x: x[2])[2]

# Filter accuracies based on the maximum accuracy
best = [el for el in accuracies if el[2] == max_accuracy]

for el in best:
    print(f'Alpha: {el[1]} Accuracy: {el[2]} Margin: {el[3]}')

best_margin = max(best, key=lambda x: x[3])
print('Best model:')
print(f'Initial_weights: {best_margin[0]} Alpha: {best_margin[1]} Accuracy: {best_margin[2]} Margin: {best_margin[3]} Iterations: {best_margin[4]}')

# save weights and alpha in a file
np.save('weights', best_margin[0])
np.save('alpha', best_margin[1])

Alpha: 0.1 Accuracy: 1.0 Margin: 0.37420176310379283
Alpha: 0.1 Accuracy: 1.0 Margin: 1.398221509663573
Alpha: 0.001 Accuracy: 1.0 Margin: 0.00527555388721912
Alpha: 0.01 Accuracy: 1.0 Margin: 1.0216146742576193
Alpha: 0.1 Accuracy: 1.0 Margin: 3.445611702216981
Alpha: 0.01 Accuracy: 1.0 Margin: 0.051932901057536895
Alpha: 0.1 Accuracy: 1.0 Margin: 0.21504656439423364
Alpha: 0.1 Accuracy: 1.0 Margin: 2.641298485472315
Alpha: 0.1 Accuracy: 1.0 Margin: 0.500398610068781
Alpha: 0.001 Accuracy: 1.0 Margin: 0.07103766851339621
Alpha: 0.1 Accuracy: 1.0 Margin: 2.1639707900651355
Alpha: 0.1 Accuracy: 1.0 Margin: 2.0189939247833535
Alpha: 0.01 Accuracy: 1.0 Margin: 1.2580012226138648
Alpha: 0.1 Accuracy: 1.0 Margin: 1.3818093785730192
Alpha: 0.01 Accuracy: 1.0 Margin: 0.036995956931241025
Alpha: 0.1 Accuracy: 1.0 Margin: 1.399354063520599
Alpha: 0.1 Accuracy: 1.0 Margin: 1.803462622930939
Alpha: 0.1 Accuracy: 1.0 Margin: 2.297976589418476
Alpha: 0.1 Accuracy: 1.0 Margin: 1.0791247291740906
Alp

# Testing the perceptron on the test set

In [5]:
# load weights and alpha from a file
weights = np.load('weights.npy')
alpha = np.load('alpha.npy')
# predict on eval set
perceptron = Perceptron(alpha)
perceptron.train(Xtrain, yTrain, weights)
predictions = perceptron.predict(Xval)
accuracy = np.mean(predictions == yVal)
print(f'Accuracy on eval set: {accuracy}')
print(f'Margin on eval set: {perceptron.compute_min_margin(Xval)}')
# predict on test set
perceptron = Perceptron(alpha)
# Here on test we train both on train and eval
Xtrain2 = np.concatenate((Xtrain, Xval))
yTrain2 = np.concatenate((yTrain, yVal))
perceptron.train(Xtrain2, yTrain2, weights)
#Save final weights
np.save('final_model_weights.npy', perceptron.weights)
predictions = perceptron.predict(Xtest)
accuracy = np.mean(predictions == yTest)
print(f'Accuracy on test set: {accuracy}')
print(f'Margin on test set: {perceptron.compute_min_margin(Xtest)}')

Accuracy on eval set: 1.0
Margin on eval set: 3.445611702216981
Accuracy on test set: 1.0
Margin on test set: 1.094277924417078


# Comparison with SciKit-Learn perceptron

In [6]:
from sklearn.linear_model import Perceptron as Perceptron_sklearn

# load weights and alpha from a file
weights = np.load('weights.npy')
alpha = np.load('alpha.npy')
# train my perceptron
perceptron = Perceptron(alpha)
perceptron.train(Xtrain, yTrain, weights)


sklearn_perceptron = Perceptron_sklearn()

sklearn_perceptron.fit(Xtrain, yTrain)

sklearn_predictions = sklearn_perceptron.predict(Xval)
sklearn_accuracy = np.mean(sklearn_predictions == yVal)
print(f'Scikit-learn Perceptron Accuracy on Validation Set: {sklearn_accuracy}')

my_predictions = perceptron.predict(Xval)
my_accuracy = np.mean(my_predictions == yVal)
print(f'Your Perceptron Algorithm Accuracy on Validation Set: {my_accuracy}')

# Train on train+validation
Xtrain = np.concatenate((Xtrain, Xval))
yTrain = np.concatenate((yTrain, yVal))
sklearn_predictions = sklearn_perceptron.predict(Xtest)
sklearn_accuracy = np.mean(sklearn_predictions == yTest)
print(f'Scikit-learn Perceptron Accuracy on Test Set: {sklearn_accuracy}')

my_predictions = perceptron.predict(Xtest)
my_accuracy = np.mean(my_predictions == yTest)
print(f'Your Perceptron Algorithm Accuracy on Test Set: {my_accuracy}')


Scikit-learn Perceptron Accuracy on Validation Set: 0.9722222222222222
Your Perceptron Algorithm Accuracy on Validation Set: 1.0
Scikit-learn Perceptron Accuracy on Test Set: 0.9861111111111112
Your Perceptron Algorithm Accuracy on Test Set: 1.0
