## Perceptron
- **Goal:** Predict P(y|X) 
- **Libraries:** sklearn, numpy, math, 
- **Data:** Digit Dataset
- **Metric:** Accuracy

#### Disclaimer: This implementation isn't optimized in any way, therefore it shouldn't be used for production.

In [25]:
import math
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

In [24]:
class Perceptron:
    def __init__(self, epochs=5000, learning_rate=0.01):
        self.epochs = epochs
        self.learning_rate = learning_rate
    
    def fit(self, X, y):
        n_samples, n_features = np.shape(X)
        n_outputs = y.shape[1]
        
        limit = 1 / math.sqrt(n_features)
        self.W = np.random.uniform(-limit, limit, (n_features, n_outputs))
        self.w0 = np.zeros((1, n_outputs))
        
        for i in range(self.epochs):
            # Calculate outputs
            linear_output = X.dot(self.W) + self.w0
            y_pred = self.sigmoid(linear_output)
            # Calculate the gradient of the loss with respect to the input
            error_gradient = self.loss(y, y_pred, gradient=True) * self.sigmoid(linear_output, gradient=True)
            # Calculate the gradient of the loss with respect to the weights
            grad_wrt_w = X.T.dot(error_gradient)
            grad_wrt_w0 = np.sum(error_gradient, axis=0, keepdims=True)
            # Update all weights
            self.W  -= self.learning_rate * grad_wrt_w
            self.w0 -= self.learning_rate  * grad_wrt_w0
        
    def predict(self, X):
        y_pred = self.sigmoid(X.dot(self.W) + self.w0)
        return y_pred
    
    
    # Helper functions
    def sigmoid(self, x, gradient=False):
        if gradient:
            return self.sigmoid(x) * (1 - self.sigmoid(x))
        else:
            return 1 / (1 + np.exp(-x))
        
    def loss(self, y, y_pred, gradient=False):
        if gradient:
            return -(y - y_pred)
        else:
            return  0.5 * np.power((y - y_pred), 2) 
    

In [21]:
def normalise(X, axis=-1, order=2):
    l2_metric = np.atleast_1d(np.linalg.norm(X, order, axis))
    l2_metric[l2_metric == 0] = 1
    return X / np.expand_dims(l2_metric, axis)

def onehot_ecoding(x, columns=None):
    if not columns:
        columns = np.amax(x) + 1
    onehot_encoding = np.zeros((x.shape[0], columns))
    onehot_encoding[np.arange(x.shape[0]), x] = 1
    return onehot_encoding

### Loading Data

In [22]:
def load_data():
    digits = datasets.load_digits()
    X = normalise(digits.data)
    y = onehot_ecoding(digits.target)
    return train_test_split(X, y, test_size=0.33)

# Splitting data into trainings and test sets
X_train, X_test, y_train, y_test = load_data()

### Creating Model

In [23]:
pt_clf = Perceptron()
pt_clf.fit(X_train, y_train)

y_pred = np.argmax(pt_clf.predict(X_test), axis=1)
y_test = np.argmax(y_test, axis=1)
accuracy = np.sum(y_test == y_pred, axis=0) / len(y_test)


print ("Accuracy:", accuracy)

Accuracy: 0.9511784511784511
