In [17]:
from dataclasses import dataclass
from keras.datasets import mnist
from keras.utils import to_categorical
import time
import numpy as np
from ai_utils import ai_utils
        

class Model(object):
    def __init__(self, m_input_size, hidden_size, n_output_size, x_train, y_train, layers = None):
        if not layers:
            raise NotImplementedError("Nueral network incomplete (missing layers)")
        self.m_input_size = m_input_size
        self.hidden_size = hidden_size
        self.n_output_size = n_output_size
        self.x_train = x_train
        self.y_train = y_train
        self.layers = layers

    def forward_propagate(self, input_data):
        output_layer1 = self.layers[0].forward_propagate(input_data, activation_function=ai_utils.ActivationFunctions.relu)
        output_layer2 = self.layers[1].forward_propagate(output_layer1, activation_function=ai_utils.ActivationFunctions.softmax)
        return output_layer2
        
    """ def backward(self, dvalues):
        # gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        # gradients on values
        self.dinputs = np.dot(dvalues, self.weights.T) """
    
    def backward_propagate(self, y_true, y_pred, learning_rate, activation_function_derivative=ai_utils.ActivationFunctions.softmax, loss_function_derivative=ai_utils.LossFunctions.categorical_cross_entropy_derivative):
        # gradient of categorical cross-entropy loss and softmax
        output_error = y_pred - y_true
        # second layer first
        error_layer2 = self.layers[1].backward_propagate(output_error, learning_rate)
        # first layer
        self.layers[0].backward_propagate(error_layer2, learning_rate)
    
    def fit(self, x_train, y_train, epochs = 50, learning_rate=0.01):
        for epoch in range(epochs):
            total_loss = 0
            correct_predictions = 0

            for x, y in zip(x_train, y_train):
                x = x.reshape(x.shape[0], -1)
                # forward propagation
                y_pred = self.forward_propagate(x)

                # calculate loss (MSE for example)
                loss = ai_utils.LossFunctions.mse_loss(y, y_pred)
                total_loss += loss

                # Backward pass and update weights
                self.backward_propagate(y, y_pred, learning_rate)

                # Calculate accuracy (for classification tasks)
                if np.argmax(output) == np.argmax(y):
                    correct_predictions += 1

            # Calculate average loss and accuracy for the epoch
            average_loss = total_loss / len(x_train)
            accuracy = correct_predictions / len(x_train)

            # Print the metrics
            print(f"Epoch {epoch+1}/{epochs} - Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}")
        """ # TODO: change to enumerate layers
        lowest_loss = 9999999 # some initial value

        for epoch in range(10000):

            layer1_output = self.layers[0].forward_propagate(self.data, activation_function=ai_utils.ActivationFunctions.relu)
            layer2_output = self.layers[1].forward_propagate(layer1_output, activation_function=ai_utils.ActivationFunctions.softmax)

            loss = ai_utils.LossFunctions.categorical_cross_entropy(layer2_output, self.labels)

            predictions = np.argmax(loss, axis=0)
            check_labels = None
            check_labels = self.labels.copy()
            if len(check_labels.shape) == 2:
                check_labels = np.argmax(check_labels, axis=1)
            accuracy = np.mean(predictions == check_labels)
            
            # backward propagate
            loss_dinputs = ai_utils.LossFunctions.categorical_cross_entropy_derivative(loss, self.labels)
            self.layers[1].backward(loss_dinputs)
            loss_dinputs = ai_utils.ActivationFunctions.relu(self.layers[1].) """



class DenseLayer(object):
    def __init__(self, m_input_size, n_output_size):
        self.m_input_size = m_input_size
        self.n_output_size = n_output_size
        self.weights = np.random.randn(m_input_size, n_output_size) * 0.01 # multiply by a small number so generated numbers are smaller, improves optimization when training
        self.biases = np.zeros((1, n_output_size))
        self.input = None
        self.gradient_weights = None
        self.gradient_biases = None

    def forward_propagate(self, input_data, activation_function=ai_utils.ActivationFunctions.relu):
        self.input = input_data
        self.output = activation_function(np.dot(input_data, self.weights) + self.biases)
        if activation_function:
            return activation_function(self.output)
        return self.output
    
    def backward_propagate(self, output_error, learning_rate):
        if self.input.ndim == 1:
            self.input = self.input.reshape(1, -1)
        input_error = np.dot(output_error, self.weights.T)
        self.gradient_weights = np.dot(self.input.T, output_error)
        self.gradient_biases = np.sum(output_error, axis=0, keepdims=True)

        # Update weights and biases
        self.weights -= learning_rate * self.gradient_weights
        self.biases -= learning_rate * self.gradient_biases

        return input_error


In [18]:
# initialize the neural network with the size of the input layer, hidden layer, and output layer.
input_size = 28 * 28  # for MNIST, each image is 28x28 pixels
hidden_size = 64  # arbitrary number
output_size = 10  # MNIST has 10 classes (numbers 0-9)

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# preprocess images
# flatten
X_train_flattened = X_train.reshape((X_test.shape[0], -1))
#X_test_flattened = X_test.reshape(X_test.shape[0], -1)

# normalize the data
X_train_flattened = X_train_flattened.astype('float32') / 255
#X_test_flattened = X_test_flattened.astype('float32') / 255.0

# convert labels to one-hot encoding using the custom function
y_train_encoded = to_categorical(y_train, num_classes=10)
#y_test_encoded = to_categorical(y_test, num_classes=10)

layers = [DenseLayer(input_size, hidden_size), DenseLayer(hidden_size, output_size)]

data = Input(X_train_flattened, y_train_encoded)

model = Model(input_size, hidden_size, output_size, X_train_flattened, y_train, layers)

model.fit(X_train_flattened, y_train_encoded)

ValueError: shapes (4704,1) and (784,64) not aligned: 1 (dim 1) != 784 (dim 0)

In [5]:
from dataclasses import dataclass
import numpy as np


@dataclass
class Input:
    def __init__(self, data, batch_size = 16):
        self.data = data
        self.batch_size = batch_size
        self.index = len(data)

    def __iter__(self):
        return self

    def __next__(self):
        if self.index == 0:
            return None
        self.index -= 1
        return self.data[self.index]


random_array = np.random.rand(3, 3)

data = Input(random_array)

print(str(data.__next__()))
print(str(data.__next__()))
print(str(data.__next__()))
print(str(data.__next__()))

[0.02306606 0.30160894 0.9097127 ]
[0.80744778 0.19457231 0.28212657]
[0.25028443 0.3662832  0.27355988]
None
