# Perceptron

#### Install dependencies

In [None]:
%pip install matplotlib scikit-learn seaborn numpy

#### Prepare imports

In [None]:
from sklearn.datasets import load_digits # This is to load the dataset
from sklearn.decomposition import PCA  # This is to load PCA to be used to transform digits into a 2D points dataset
import matplotlib.pyplot as plt # This is to load plotting functions
import seaborn as sns; sns.set() # This is to make the plots prettier
import numpy as np # This is used to handle arrays of data

#### Data
We will be working with a dataset of handwritten digits.
Let us visualise some examples.

In [None]:
# We're using a subset of two classes for now
digits = load_digits(n_class=2)

In [None]:
# Handy plotting functions
x_min, x_max = -40, 40
y_min, y_max = -40, 40

def plot_examples():
    show_num = 4
    _, axes = plt.subplots(1, show_num)
    images_and_labels = list(zip(digits.images, digits.target))
    for ax, (image, label) in zip(axes[:], images_and_labels[:show_num]):
        ax.set_axis_off()
        ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
        ax.set_title('Label: %i' % label)

def plot_scatter(data, target, n_class=2, alpha=0.5):
    scatter = plt.scatter(data[:, 0], data[:, 1], c=target, edgecolor='none', alpha=alpha, cmap=plt.cm.get_cmap('rainbow', n_class))
    plt.legend(*scatter.legend_elements(), loc="upper left", title="Targets")
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)

def plot_decision_boundary(data, weights):
    weights = weights.flatten()
    intersection = weights[0]
    slope = - weights[1] / weights[2]
    xx = np.linspace(x_min, x_max)
    yy = slope * xx - intersection
    plt.plot(xx, yy, 'k--')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)

def plot_mesh(X, pred_fn, n_class=2):
    h = 0.1  # step size in the mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = pred_fn(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = plt.contourf(xx, yy, Z, alpha = 0.1, cmap=plt.cm.get_cmap('rainbow', n_class))
    plt.axis('tight')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)


In [None]:
plot_examples()

In [None]:
# The dataset contains 2D data in the form of the `images` attribute,
# as well as a 1D-version called `data`, where the images have been flattened.
# Here we check that they are identical
np.array_equal(digits.images[0].flatten(), digits.data[0])

#### We can get a 2D version of the data using PCA

In [None]:
# We can get a 2D version of the data using PCA
pca = PCA(n_components=2)
X = pca.fit_transform(digits.data) # this is the representation, we'll be working with

In [None]:
# Out targets are in the set {0,1}
t_01 = digits.target
t_01

In [None]:
# Let's plot all the data in 2D
plot_scatter(X, t_01)

## Perceptron

A learning_rate of 0.1 should do the job.

In [None]:
learning_rate = 0.1  # This is the rate of change of the parameters on every update

# Use a non-linear representation (quadratic) for the data (slide 16)
def phi(X):
    """Augment with bias component: phi_0(x) = 1 and squared values"""
    return np.hstack((np.ones((len(X), 1)), X, X**2))


# Define the sign function
def f(a):
    ones = np.ones_like(a)
    return ones * (a > 0) - ones * (a < 0)

# Define the prediction function (following slide 17)
def predict(W_tilde, X_tilde, boundary=0.5):
    y = f((W_tilde @ X_tilde.T)[0])  # [0] is used to unpack the vector
    preds = np.array(y > boundary, dtype=int)
    return preds


# Our data is already shuffled, so we skip that step
X_train = phi(X)

# Our target vectors should be {-1, 1}
t_11 = 2 * digits.target - 1
t_11

In [None]:
# Initial weight vector
np.random.seed(13)
W_0 = 2 * np.random.random((1, X_train.shape[1])) -1 # random values in the range [-1,1]
print("W_0 = {}".format(W_0))

plot_scatter(X, t_11)
plot_mesh(X, lambda x: predict(W_0, phi(x)), n_class=2)

#### Define functions to find misclassified values and to update weights of the model (following slide 17)

<details>
<summary>Solution</summary>
    <code>
    z = W @ X.T * t <= 0
    </code>
</details>

In [None]:
def misclassified(W, X, t):
    z = ...
    return z

def weight_update(W, X, t):
    M = misclassified(W[0], X, t)
    M = np.broadcast_to(np.expand_dims(M,axis=1), X.shape)
    t = np.broadcast_to(np.expand_dims(t,axis=1), X.shape)
    zeros = np.zeros_like(X)
    E_p_w = np.sum(np.where(M, X*t, zeros), axis=0) # Perceptron criterion
    return E_p_w

In [None]:
# Initialize the W list
epoch = 0
W_list = [W_0]

In [None]:
# Do one update manually and check progress

# Apply update
W_list.append(W_list[epoch] + learning_rate * weight_update(W_list[epoch], X_train, t_11))

# Plot updates weight
plot_scatter(X, t_11)
plot_mesh(X, lambda x: predict(W_list[epoch+1], phi(x)), n_class=2)

epoch += 1

In [None]:
# Do rest of the updates (following slide 18)
for e in range(epoch, 1000):
    W_list.append(W_list[epoch] + learning_rate * weight_update(W_list[epoch], X_train, t_11))
    epoch += 1

W_perceptron = W_list[-1]
W_perceptron

### Perform class-predictions
You should be able to classify all data points correctly

In [None]:
preds = 2 * predict(W_perceptron, X_train) - 1
np.array_equal(preds, t_11)

### Plot the decision boundary

In [None]:
plot_scatter(X, preds)
plot_mesh(X, lambda x: predict(W_perceptron, phi(x)), n_class=2)