In [None]:
import numpy as np

def activate(inputs, activation_function):
    if activation_function == "relu":
        return np.maximum(0, inputs)
    elif activation_function == "softmax":
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        return exp_values / np.sum(exp_values, axis=1, keepdims=True)
    return inputs

def d_activate(inputs, activation_function):
    if activation_function == "relu":
        return (inputs > 0).astype(float)
    return np.ones_like(inputs)

def cross_entropy_loss(y_hat, y):
    samples = len(y_hat)
    y_hat_clipped = np.clip(y_hat, 1e-7, 1 - 1e-7)
    if len(y.shape) == 1:
        correct_confidences = y_hat_clipped[range(samples), y]
    else:
        correct_confidences = np.sum(y_hat_clipped * y, axis=1)
    return -np.mean(np.log(correct_confidences))

def d_cross_entropy_loss(y_hat, y):
    samples = len(y_hat)
    if len(y.shape) == 1:
        y = np.eye(y_hat.shape[1])[y]
    return (y_hat - y) / samples

def propagate_forward(weights, activations, biases, activation_function):
    return activate(np.dot(activations, weights) + biases.T, activation_function)

def propagate_backward(weights, activations, dl_dz, biases, activation_function):
    d_activation = d_activate(np.dot(activations, weights) + biases.T, activation_function)
    dl_dz *= d_activation
    d_weights = np.dot(activations.T, dl_dz)
    d_biases = np.sum(dl_dz, axis=0, keepdims=True).T
    d_inputs = np.dot(dl_dz, weights.T)
    return d_weights, d_biases, d_inputs

class NeuralNet:
    def __init__(self, input_dim, output_dim, neurons=[]):
        self.weights = [0.01 * np.random.randn(n, m) for n, m in zip([input_dim] + neurons, neurons + [output_dim])]
        self.biases = [0.01 * np.random.randn(n, 1) for n in neurons + [output_dim]]
        self.activation_functions = ["relu"] * len(neurons) + ["softmax"]
    
    def forward(self, x):
        self.activations = [x]
        for layer_weights, layer_biases, activation_function in zip(self.weights, self.biases, self.activation_functions):
            x = propagate_forward(layer_weights, x, layer_biases, activation_function)
            self.activations.append(x)
        return x
    
    def adjust_weights(self, x, y, learning_rate=1e-4):
        y_hat = self.forward(x)
        loss_grad = d_cross_entropy_loss(y_hat, y)
        dl_dz = loss_grad
        for i in reversed(range(len(self.weights))):
            d_weights, d_biases, dl_dz = propagate_backward(self.weights[i], self.activations[i], dl_dz, self.biases[i], self.activation_functions[i])
            self.weights[i] -= learning_rate * d_weights
            self.biases[i] -= learning_rate * d_biases
    
    def train_net(self, x, y, batch_size=32, epochs=100, learning_rate=1e-4):
        for epoch in range(epochs + 1):
            self.adjust_weights(x, y, learning_rate)
            if epoch % 100 == 0:
                y_hat = self.forward(x)
                loss = cross_entropy_loss(y_hat, y)
                predictions = np.argmax(y_hat, axis=1)
                accuracy = np.mean(predictions == y)
                print(f"Epoch {epoch}, Loss: {loss:.3f}, Accuracy: {accuracy:.3f}")

def spiral_data(samples, classes):
    X = np.zeros((samples * classes, 2))
    y = np.zeros(samples * classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(samples * class_number, samples * (class_number + 1))
        r = np.linspace(0.0, 1, samples)
        t = np.linspace(class_number * 4, (class_number + 1) * 4, samples) + np.random.randn(samples) * 0.2
        X[ix] = np.c_[r * np.sin(t * 2.5), r * np.cos(t * 2.5)]
        y[ix] = class_number
    return X, y

np.random.seed(0)
X, y = spiral_data(100, 3)
nn = NeuralNet(input_dim=2, output_dim=3, neurons=[64])
nn.train_net(X, y, epochs=30000, learning_rate=0.1)


Epoch 0, Loss: 1.099, Accuracy: 0.333
Epoch 100, Loss: 1.098, Accuracy: 0.370
Epoch 200, Loss: 1.098, Accuracy: 0.437
Epoch 300, Loss: 1.098, Accuracy: 0.423
Epoch 400, Loss: 1.098, Accuracy: 0.420
Epoch 500, Loss: 1.097, Accuracy: 0.420
Epoch 600, Loss: 1.096, Accuracy: 0.407
Epoch 700, Loss: 1.095, Accuracy: 0.410
Epoch 800, Loss: 1.093, Accuracy: 0.400
Epoch 900, Loss: 1.091, Accuracy: 0.403
Epoch 1000, Loss: 1.088, Accuracy: 0.397
Epoch 1100, Loss: 1.086, Accuracy: 0.393
Epoch 1200, Loss: 1.084, Accuracy: 0.400
Epoch 1300, Loss: 1.082, Accuracy: 0.400
Epoch 1400, Loss: 1.081, Accuracy: 0.407
Epoch 1500, Loss: 1.081, Accuracy: 0.413
Epoch 1600, Loss: 1.080, Accuracy: 0.417
Epoch 1700, Loss: 1.079, Accuracy: 0.430
Epoch 1800, Loss: 1.079, Accuracy: 0.433
Epoch 1900, Loss: 1.078, Accuracy: 0.430
Epoch 2000, Loss: 1.078, Accuracy: 0.433
Epoch 2100, Loss: 1.078, Accuracy: 0.440
Epoch 2200, Loss: 1.078, Accuracy: 0.437
Epoch 2300, Loss: 1.078, Accuracy: 0.437
Epoch 2400, Loss: 1.077, Acc