In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, f1_score
from keras.src.utils import to_categorical
from keras.datasets import mnist
import pandas as pd

rng = np.random.default_rng()

In [3]:
class Perceptron:
    def __init__(self, *layers):
        self.logistic = lambda z: 1 / (1 + np.exp(-z))
        self.logistic_cdf = lambda c, z: c * self.logistic(z) * (1 - self.logistic(z))
        self.softmax = lambda z: np.exp(z) / np.sum(np.exp(z), axis=-1)[..., np.newaxis]
        self.softmax_cdf = lambda c, z: c @ (self.softmax(z) * (np.eye(len(z)) - self.softmax(z)[:, np.newaxis]))

        self.layers_count = len(layers) - 1
        self.w = []
        self.b = []
        for l in range(self.layers_count):
            self.w.append(np.empty((layers[l], layers[l + 1]), dtype=np.float64))
            self.b.append(np.empty(layers[l + 1], dtype=np.float64))

        self.f = []
        self.cdf = []
        for l in range(self.layers_count - 1):
            self.f.append(self.logistic)
            self.cdf.append(self.logistic_cdf)
        self.f.append(self.softmax)
        self.cdf.append(self.softmax_cdf)

    def train(self, x, y, gens, lr):
        """
        :type x: 2d np.ndarray
        :type y: 2d np.ndarray
        :type gens: positive int
        :type lr: float between 0 and 1
        """
        z = []
        a = []
        c = []
        cdf = []
        for l in range(self.layers_count):
            r = np.sqrt(6 / (self.w[l].shape[0] + self.w[l].shape[1]))
            self.w[l][:, :] = rng.uniform(-r, r, size=self.w[l].shape)
            self.b[l][:] = 0
            z.append(np.empty_like(self.b[l]))
            a.append(np.empty_like(self.b[l]))
            c.append(np.empty_like(self.b[l]))
            cdf.append(np.empty_like(self.b[l]))

        for g in range(gens):
            it = np.arange(len(x))
            rng.shuffle(it)
            for i in it:
                z[0][:] = x[i] @ self.w[0] + self.b[0]
                a[0][:] = self.f[0](z[0])
                for l in range(1, self.layers_count):
                    z[l][:] = a[l - 1] @ self.w[l] + self.b[l]
                    a[l][:] = self.f[l](z[l])

                c[-1][:] = a[-1] - y[i]
                cdf[-1][:] = self.cdf[-1](c[-1], z[-1])
                for l in range(self.layers_count - 2, -1, -1):
                    c[l][:] = cdf[l + 1] @ self.w[l + 1].T
                    cdf[l][:] = self.cdf[l](c[l], z[l])

                for l in range(self.layers_count):
                    cdf[l] *= lr
                self.w[0] -= cdf[0] * x[i][:, np.newaxis]
                self.b[0] -= cdf[0]
                for l in range(1, self.layers_count):
                    self.w[l] -= cdf[l] * a[l - 1][:, np.newaxis]
                    self.b[l] -= cdf[l]
            print('Generation', g, 'Accuracy', accuracy(y, self.predict(x)))

    def predict(self, x):
        """
        :param x: np.ndarray
        :return: np.ndarray
        """
        a = self.f[0](x @ self.w[0] + self.b[0])
        for l in range(1, self.layers_count):
            a = self.f[l](a @ self.w[l] + self.b[l])
        return a

def accuracy(y_true, y_pred):
    return accuracy_score(np.argmax(y_true, axis=-1).reshape(-1), np.argmax(y_pred, axis=-1).reshape(-1))

def f1(y_true, y_pred):
    return f1_score(np.argmax(y_true, axis=-1).reshape(-1), np.argmax(y_pred, axis=-1).reshape(-1), average='weighted')

In [10]:
def iris_test():
    iris = load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data, to_categorical(iris.target), train_size=0.7)
    p = Perceptron(4, 5, 3)
    p.train(x_train, y_train, 25, 0.1)
    print('Accuracy', accuracy(y_test, p.predict(x_test)))
    print('F1', f1(y_test, p.predict(x_test)))

iris_test()

Generation 0 Accuracy 0.638095238095238
Generation 1 Accuracy 0.6952380952380952
Generation 2 Accuracy 0.6952380952380952
Generation 3 Accuracy 0.6952380952380952
Generation 4 Accuracy 0.6952380952380952
Generation 5 Accuracy 0.6952380952380952
Generation 6 Accuracy 0.9333333333333333
Generation 7 Accuracy 0.8857142857142857
Generation 8 Accuracy 0.8476190476190476
Generation 9 Accuracy 0.7047619047619048
Generation 10 Accuracy 0.7619047619047619
Generation 11 Accuracy 0.8666666666666667
Generation 12 Accuracy 0.7142857142857143
Generation 13 Accuracy 0.9238095238095239
Generation 14 Accuracy 0.819047619047619
Generation 15 Accuracy 0.9428571428571428
Generation 16 Accuracy 0.8761904761904762
Generation 17 Accuracy 0.9428571428571428
Generation 18 Accuracy 0.9142857142857143
Generation 19 Accuracy 0.8380952380952381
Generation 20 Accuracy 0.9809523809523809
Generation 21 Accuracy 0.8666666666666667
Generation 22 Accuracy 0.8857142857142857
Generation 23 Accuracy 0.9809523809523809
Gene

In [118]:
def mnist_test():
    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
    n = 10000
    train_images = train_images[:n]
    train_labels = train_labels[:n]
    train_images, test_images = train_images / 255.0, test_images / 255.0
    x_train = np.reshape(train_images, (len(train_images), -1))
    x_test = np.reshape(test_images, (len(test_images), -1))
    y_train = to_categorical(train_labels)
    y_test = to_categorical(test_labels)
    p = Perceptron(x_train.shape[1], 64, y_train.shape[1])
    p.train(x_train, y_train, 20, 0.2)
    print('Accuracy', accuracy(y_test, p.predict(x_test)))
    print('F1', f1(y_test, p.predict(x_test)))

mnist_test()

Generation 0 Accuracy 0.9251
Generation 1 Accuracy 0.9467
Generation 2 Accuracy 0.9605
Generation 3 Accuracy 0.9718
Generation 4 Accuracy 0.9761
Generation 5 Accuracy 0.9799
Generation 6 Accuracy 0.9831
Generation 7 Accuracy 0.9833
Generation 8 Accuracy 0.9881
Generation 9 Accuracy 0.9897
Generation 10 Accuracy 0.9918
Generation 11 Accuracy 0.9927
Generation 12 Accuracy 0.9925
Generation 13 Accuracy 0.9936
Generation 14 Accuracy 0.994
Generation 15 Accuracy 0.9947
Generation 16 Accuracy 0.9947
Generation 17 Accuracy 0.995
Generation 18 Accuracy 0.9951
Generation 19 Accuracy 0.9952
Accuracy 0.9529
F1 0.9528699203319375


In [8]:
# https://www.kaggle.com/datasets/uciml/mushroom-classification

def mushroom_test():
    df = pd.read_csv('mushrooms.csv')
    df = pd.get_dummies(df)
    df_train, df_test = train_test_split(df, train_size=0.8)
    x_train = df_train.drop(columns=['class_e', 'class_p']).to_numpy().astype(np.uint8)
    x_test = df_test.drop(columns=['class_e', 'class_p']).to_numpy().astype(np.uint8)
    y_train = df_train[['class_e', 'class_p']].to_numpy().astype(np.uint8)
    y_test = df_test[['class_e', 'class_p']].to_numpy().astype(np.uint8)
    p = Perceptron(x_train.shape[1], 128, y_train.shape[1])
    p.train(x_train, y_train, 5, 0.1)
    print('Accuracy', accuracy(y_test, p.predict(x_test)))
    print('F1', f1(y_test, p.predict(x_test)))

mushroom_test()

Generation 0 Accuracy 0.9973842129558393
Generation 1 Accuracy 1.0
Generation 2 Accuracy 1.0
Generation 3 Accuracy 1.0
Generation 4 Accuracy 1.0
Accuracy 1.0
F1 1.0


In [120]:
# https://www.kaggle.com/datasets/fedesoriano/stellar-classification-dataset-sdss17

def star_test():
    df = pd.read_csv('star_classification.csv')
    df = df[['u', 'g', 'r', 'i', 'z', 'redshift', 'class']]
    df = pd.get_dummies(df, columns=['class'])
    df = df[(df[['u', 'g', 'r', 'i', 'z']] >= 0).all(axis=1)]
    df_train, df_test = train_test_split(df, train_size=0.8)
    x_train = df_train.drop(columns=['class_GALAXY', 'class_QSO', 'class_STAR']).to_numpy()
    x_test = df_test.drop(columns=['class_GALAXY', 'class_QSO', 'class_STAR']).to_numpy()
    y_train = df_train[['class_GALAXY', 'class_QSO', 'class_STAR']].to_numpy().astype(np.uint8)
    y_test = df_test[['class_GALAXY', 'class_QSO', 'class_STAR']].to_numpy().astype(np.uint8)
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    p = Perceptron(x_train.shape[1], 32, 32, y_train.shape[1])
    p.train(x_train, y_train, 10, 0.1)
    print('Accuracy', accuracy(y_test, p.predict(x_test)))
    print('F1', f1(y_test, p.predict(x_test)))

star_test()

Generation 0 Accuracy 0.9368242103026287
Generation 1 Accuracy 0.9109113863923299
Generation 2 Accuracy 0.959449493118664
Generation 3 Accuracy 0.951986899836248
Generation 4 Accuracy 0.9654120676508456
Generation 5 Accuracy 0.9616495206190078
Generation 6 Accuracy 0.9643370542131776
Generation 7 Accuracy 0.9145864323304042
Generation 8 Accuracy 0.9645620570257128
Generation 9 Accuracy 0.967174589682371
Accuracy 0.96805
F1 0.9679298908497814
