In [25]:
import numpy as np 
import matplotlib.pyplot as plt 

In [26]:
import warnings 
warnings.filterwarnings('ignore')

#### 0. Module 

In [2]:
class StandardScaler(): 
    def __init__(self, ):
        self.mean_x = None 
        self.std_x = None 

    def fit(self, X): 
        self.mean_x = np.mean(X, axis= 0, keepdims= True) 
        self.std_x = np.std(X, axis= 0, keepdims= True)  + 1e-12 # Ensure not devision byy zero 

    def transform(self, X): 
        X_norm = ( X - self.mean_x )  / self.std_x 
        return X_norm

    def inverse_transform(self, X_norm): 
        X = X_norm * self.std_x + self.mean_x 
        return X 
    
    def fit_transform(self, X): 
        self.fit(X) 
        return self.transform(X)

class OnehotEncoder():
    def __init__(self):
        self.unique_classes = None 
        self.labels = None 
        self.encoded_labels = None 

    def fit(self, labels):
        self.labels = labels # Save labels 

        # Unique class in this labels 
        self.unique_classes = np.unique(self.labels) 
        print(f'Unique classes: {self.unique_classes}')

    def transform(self): 
        # Convert unique_class to index (int number) 
        class_to_index = {cls: idx for idx, cls in enumerate(self.unique_classes)}

        # Create indices array ( map class -> int )
        indices = [class_to_index[label] for label in self.labels]

        # Encoded matrix 
        self.encoded_labels = np.zeros(shape= (len(self.labels), len(self.unique_classes)))
        self.encoded_labels[np.arange(len(self.labels)), indices] = 1 

        return self.encoded_labels 
    
    def inverse_transform(self,y_onehot ): 
        # Convert idx -> class 
        index_to_class = {idx : cls for idx, cls in enumerate(self.unique_classes) }

        # Create indices array ( map idx -> class ) 
        indices = np.argmax(y_onehot, axis= 1) 
        
        # Original matrix 
        original_labels = [index_to_class[idx] for idx in indices ] 

        return np.array(original_labels)
class data:
    def __init__(self): 
        pass 

    def DataLoader(inputs: np.ndarray, labels: np.ndarray, batchsize: int,random_state: int= None, shuffle= False,): 
        """
        This function use to split data to batch 
        """
        # Combine inputs, labels -> data 
        data = list(zip(inputs, labels))

        # random.shuffle data 
        if shuffle: 
            if isinstance(random_state, int): 
                np.random.seed(random_state)

            np.random.shuffle(data) 

        # Create batch of data 
        batches = [data[i: i + batchsize] for i in range(0, len(data), batchsize)] # list 

        # Convert to np.array 
        dataloader = [(np.array([item[0] for item in batch]), np.array([item[1] for item in batch])) for batch in batches]

        return dataloader  
    
    def train_test_split(inputs: np.ndarray, labels: np.ndarray,ratio: float, strategy: bool = True, shuffle= True, random_state: int = None):
        """
        This function use to split data to train_dataset, test_dataset
        Parameters: 
            
        """
        # Combine data 
        data = list(zip(inputs, labels))

        # Shulffe data 
        if shuffle:
            if isinstance(random_state, int): 
                np.random.seed(random_state)
        
            np.random.shuffle(data) 
        
        if strategy: 
            train_dataset = [] 
            test_dataset = []
            unique_classed = np.unique(labels) 

            for cls in unique_classed: 
                # Get all data of one class at time 
                current_data = [item for item in data if item[-1] == cls] 

                # Split data 
                split_idex = int(ratio * len(current_data)) 
                train_dataset.extend(current_data[: split_idex])
                test_dataset.extend(current_data[split_idex:])
        else: 
            split_idex = int(ratio * len(data))
            train_dataset = data[:split_idex] 
            test_dataset = data[split_idex : ] 
    
        X_train, y_train = zip(*train_dataset)
        X_test, y_test = zip(*test_dataset)
        
        return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

In [3]:
class Module():
    def __init__(self): 
        self.grads = {} # Save gradient 
        self.params = {} # Save weight, bias 


    #TODO: take forward for neuron network 
    def forward(self, *inputs): 
        raise NotImplementedError('Forward method must be implemmented by subclass')

    #TODO: compute gradint 
    def backward(self, *dout):  
        raise NotImplementedError('Backward method must implemented by subclasses') 

In [4]:
class Sequential(): 
    def __init__(self, layers): 
        self.layers = layers 

    def forward(self,x):
        self.x  = x      # Save input array 
        for layer in self.layers: 
           x = layer.forward(x) 

        return x 

    def backward(self, dout):
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        
        return dout

    def __iter__(self):
        return iter(self.layers) 

#### 1. Linear Layer 

In [5]:
import numpy as np

class Linear:
    def __init__(self, n_features, n_neurons):
        """
        Initializes a linear (fully connected) layer.

        Constraints:
            - input.shape = (m, n)  # (batch_size, n_features)
            - W.shape = (n, o)      # (n_features, n_neurons)
            - b.shape = (o,)        # (n_neurons,) -> broadcasted to (m, o)
            - output.shape = (m, o) # (batch_size, n_neurons)

        Parameters:
            - n_features (int): The number of input features
            - n_neurons (int): The number of neurons in this layer
        """
        # He initialization for weights and zeros for biases
        self.params = {
            'W': np.random.randn(n_features, n_neurons) * np.sqrt(2. / n_features),
            'b': np.zeros(n_neurons)
        }
        # Initialize gradients as empty dictionaries
        self.grads = {
            'dW': np.zeros_like(self.params['W']),
            'db': np.zeros_like(self.params['b'])
        }

    def forward(self, input):
        """
        Performs the forward pass of the linear layer.

        Parameters:
            - input (np.array): The input array to the layer

        Returns:
            - output (np.array): The result of the linear transformation
        """
        self.X = input  # Save the input for backward pass
        W = self.params['W']
        b = self.params['b']
        # Linear transformation
        output = np.dot(self.X, W) + b
        return output

    def backward(self, dout):
        """
        Performs the backward pass and calculates the gradients.

        Parameters:
            - dout (np.array): The gradient of the next layer (n + 1)

        Returns:
            - dx (np.array): The gradient with respect to the input (n - 1)
        """
        # Calculate gradients
        self.grads['dW'] = np.dot(self.X.T, dout)  # Gradient of weights
        self.grads['db'] = np.sum(dout, axis=0)    # Gradient of biases
        # Gradient with respect to the input to propagate to the previous layer
        dx = np.dot(dout, self.params['W'].T)
        return dx


#### 2. Activation layer 

In [6]:
class Activation(Module):  
    def __init__(self, activation_name): 
        """
        Use to select activation function.
        
        Parameters:
            - activation_name (str): name of activation functi"o"n.
              Supported list: ['relu', 'leakyrelu', 'tanh', 'sigmoid', 'softmax']
        
        Return: 
            - None (apply activation function selected)
        """
        self.activation_functions = {
            'tanh': (self.tanh, self.tanh_prime), 
            'sigmoid': (self.sigmoid, self.sigmoid_prime), 
            'relu': (self.relu, self.relu_prime), 
            'leakyrelu': (self.leakyrelu, self.leakyrelu_prime), 
            'softmax': (self.softmax, self.softmax_prime)
        }

        self.activation_name = activation_name.lower() 
        if self.activation_name not in self.activation_functions: 
            raise ValueError(f'Activation function {self.activation_name} is not supported. Please choose from {list(self.activation_functions.keys())}')

    def forward(self, x): 
        """
        Apply activation function.
        
        Parameters: 
            - x (np.array): input array 
        
        Return: 
            - result (np.array) after applying the activation function.
        """
        self.x = x 
        activation_func, _ = self.activation_functions[self.activation_name]
        return activation_func(self.x) 

    def backward(self, output_gradient):
        """
        Compute gradient for the previous layer.
        
        Parameters: 
            - output_gradient (np.array): the gradient of the later layer.
        
        Return: 
            - input_gradient (np.array): gradient for the previous layer.
        """
        if self.activation_name == 'softmax': 
            # For softmax, usually combined with categorical cross-entropy, just return the output gradient
            return output_gradient
        else:
            _, gradient = self.activation_functions[self.activation_name]
            return output_gradient * gradient(self.x) 
    
    @staticmethod
    def sigmoid(x): 
        x = np.clip(x, -500, 500) 
        return 1 / (1 + np.exp(-x)) 
    
    @staticmethod
    def sigmoid_prime(x): 
        s = Activation.sigmoid(x)  
        return s * (1 - s)

    @staticmethod
    def tanh(x): 
        return np.tanh(x) 
    
    @staticmethod
    def tanh_prime(x): 
        return 1 - np.tanh(x) ** 2 
    
    @staticmethod
    def relu(x): 
        return np.maximum(0, x) 
    
    @staticmethod
    def relu_prime(x): 
        return np.where(x > 0, 1, 0) 

    @staticmethod
    def leakyrelu(x, alpha=0.1):
        return np.where(x > 0, x, alpha * x) 
    
    @staticmethod
    def leakyrelu_prime(x, alpha=0.1):
        return np.where(x > 0, 1, alpha)
    
    @staticmethod
    def softmax(x): 
        # Stable softmax implementation
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        output = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return output 
    
    @staticmethod
    def softmax_prime(softmax_output): 
        """
        The derivative of softmax is usually not implemented separately because it is rarely used
        directly. When combined with cross-entropy loss, the gradient simplifies to just the 
        output gradient from the loss function, making it computationally eficient.
        
        If needed, the derivative involves computing a Jacobian matrix which can be complex 
        and is generally avoided in practice.
        
        Raise NotImplementedError to indicate its limited practical usage.
        """
        raise NotImplementedError(
            "The derivative of softmax is rarely used directly. In practice, "
            "softmax is combined with cross-entropy loss, which simplifies the "
            "gradient computation during backpropagation. If you need the exact "
            "Jacobian matrix, consider implementing a specialized function."
        )

#### 3. Criteria

In [17]:
# NEED TO ADJUST THE LOSS CLASSS

class Loss(Module):    
    def __init__(self, loss_name, reduction='mean'):
        """
        Choose loss function from the supported list.
        Parameters:
            - loss_name (str): Name of the loss function. Options: [mse, binary_cross_entropy, categorical_cross_entropy]
            - reduction (str): Reduction type: [mean, sum, none]
        
        Return:
            - None
        """
        self.losses_func = {
            'mse': (self.mse, self.mse_prime),
            'binary_cross_entropy': (self.binary_cross_entropy, self.binary_cross_entropy_prime),
            'categorical_cross_entropy': (self.categorical_cross_entropy, self.categorical_cross_entropy_prime),
        }

        self.loss_name = loss_name.lower()
        self.reduction = reduction.lower()
        if self.loss_name not in self.losses_func:
            raise ValueError(f'Loss function {self.loss_name} is not supported. Choose from {list(self.losses_func.keys())}')

    def __apply_reduction(self, loss):
        if self.reduction == 'mean':
            return np.mean(loss)
        elif self.reduction == 'sum':
            return np.sum(loss)
        elif self.reduction == 'none':
            return loss
        else:
            raise ValueError(f'The reduction method {self.reduction} is not supported')

    def forward(self, y_true, y_pred):
        """
        Apply loss function.
        Parameters:
            - y_true (np.array): True values
            - y_pred (np.array): Predicted values
        
        Return:
            - Loss value after reduction
        """
        loss_function, _ = self.losses_func[self.loss_name]
        loss = loss_function(y_true, y_pred)
        return self.__apply_reduction(loss)

    def backward(self, y_true, y_pred):
        """
        Compute gradient of the loss.
        Parameters:
            - y_true (np.array): True values
            - y_pred (np.array): Predicted values
        
        Return:
            - Gradient for the previous layer
        """
        _, gradient = self.losses_func[self.loss_name]
        grad = gradient(y_true, y_pred)
        return grad

    @staticmethod
    def mse(y_true, y_pred):
        return (y_true - y_pred) ** 2

    @staticmethod
    def mse_prime(y_true, y_pred):
        return 2 * (y_pred - y_true)

    @staticmethod
    def binary_cross_entropy(y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    @staticmethod
    def binary_cross_entropy_prime(y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return (y_pred - y_true) / (y_pred * (1 - y_pred))

    @staticmethod  
    def categorical_cross_entropy(y_true, y_pred):
        """
        Categorical Cross-Entropy Loss:
        Calculates the loss between the true labels and predicted probabilities.
        Parameters:
            - y_true (np.array): True one-hot encoded labels
            - y_pred (np.array): Predicted probabilities
        
        Return:
            - Computed categorical cross-entropy loss
        """
        # Clip y_pred to avoid log(0)
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        # Compute categorical cross-entropy
        loss = -np.sum(y_true * np.log(y_pred), axis=1)
        return loss

    @staticmethod
    def categorical_cross_entropy_prime(y_true, y_pred):
        """
        Gradient of Categorical Cross-Entropy Loss:
        Calculates the gradient of the loss with respect to the predictions.
        Parameters:
            - y_true (np.array): True one-hot encoded labels
            - y_pred (np.array): Predicted probabilities
        
        Return:
            - Gradient of the loss
        """
        # Gradient of categorical cross-entropy with respect to predictions
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        grad = (y_pred - y_true) / y_true.shape[0]
        return grad


#### 4. Optimizer 

In [18]:
class Optimizer:
    def __init__(self, learning_rate):
        self.learning_rate = learning_rate

    def step(self):
        raise NotImplementedError('The step method must be implemented by the subclass')


class SGD(Optimizer):
    def __init__(self, model, learning_rate=0.1, momentum=0.0):
        super().__init__(learning_rate)
        self.layers = model
        self.lr = learning_rate
        self.momentum = momentum
        # Initialize velocities for all parameters as a dictionary of layer IDs
        self.velocities = {id(layer): {param_name: 0 for param_name in layer.params} for layer in self.layers if hasattr(layer, 'params')}

    def step(self):
        """
        Performs a single optimization step, updating the provided parameters.
        """
        for layer in self.layers:
            if hasattr(layer, 'params'):
                for param_name in layer.params:
                    # Extract gradient
                    dparam = layer.grads[f'd{param_name}']

                    # Compute velocities
                    self.velocities[id(layer)][param_name] = self.momentum * self.velocities[id(layer)][param_name] - self.lr * dparam

                    # Update params together
                    layer.params[param_name] += self.velocities[id(layer)][param_name]


class Adam(Optimizer):
    def __init__(self, model, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        super().__init__(learning_rate)
        self.layers = model
        self.lr = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.t = 0  # Time step counter

        # Initialize first (m) and second (v) moment estimates for all parameters
        self.m = {id(layer): {param_name: 0 for param_name in layer.params} 
                  for layer in self.layers if hasattr(layer, 'params')}
        self.v = {id(layer): {param_name: 0 for param_name in layer.params} 
                  for layer in self.layers if hasattr(layer, 'params')}

    def step(self):
        """
        Performs a single optimization step using the Adam algorithm.
        """
        self.t += 1  # Increment time step
        for layer in self.layers:
            if hasattr(layer, 'params'):
                for param_name in layer.params:
                    # Extract gradient
                    dparam = layer.grads[f'd{param_name}']

                    # Update first moment estimate (m)
                    self.m[id(layer)][param_name] = self.beta1 * self.m[id(layer)][param_name] + (1 - self.beta1) * dparam

                    # Update second moment estimate (v)
                    self.v[id(layer)][param_name] = self.beta2 * self.v[id(layer)][param_name] + (1 - self.beta2) * (dparam ** 2)

                    # Bias correction for first moment (m_hat)
                    m_hat = self.m[id(layer)][param_name] / (1 - self.beta1 ** self.t)

                    # Bias correction for second moment (v_hat)
                    v_hat = self.v[id(layer)][param_name] / (1 - self.beta2 ** self.t)

                    # Update parameters
                    layer.params[param_name] -= self.lr * m_hat / (v_hat ** 0.5 + self.epsilon)


        


#### 5. Train 

In [22]:
import Points as points

spiral = points.Circle(3000, 3, 2) 
X = spiral.P
y = spiral.L
X_train, y_train, X_test, y_test = data.train_test_split(inputs= X, labels=y, ratio=0.8)

# Standard scaler 
scaler = StandardScaler() 
scaler.fit(X_train) 
X_train = scaler.transform(X_train) 
X_test = scaler.transform(X_test) 

# Encoder 
encoder = OnehotEncoder() 
encoder.fit(y_train) 
y_train = encoder.transform()

# Check out the shape 
print(f'X.shape= {X_train.shape}')
print(f'y.shape= {y_train.shape}')

# Create dataloader 
trainloader = data.DataLoader(X_train, y_train, 32, shuffle= True) 
testloader = data.DataLoader(X_test, y_test, 32, shuffle= False) 

Unique classes: [0 1 2]
X.shape= (7200, 2)
y.shape= (7200, 3)


In [23]:
# Define network 
model = Sequential([
    Linear(2, 32),
    Activation('relu'), 
    Linear(32, 64),
    Activation('relu'), 
    Linear(64, 3), 
    Activation('softmax') 
])

# Define loss and optimizer 
loss = Loss(loss_name= 'categorical_cross_entropy')
optimizer  =Adam(model, learning_rate= 0.001) 

# Train loop 
verbose = True
NUM_EPOCHS = 1000 
for epoch in range(NUM_EPOCHS): 
    training_error = 0.0 
    correct = 0 
    total = 0 
    for x_train, y_train in trainloader: 
        # Forward 
        y_pred = model.forward(x_train)

        # Loss 
        training_error += loss.forward(y_train, y_pred) 

        # Backward 
        dout = loss.backward(y_train, y_pred) 
        model.backward(dout)
        
        # Calculate accuracy 
        predicted_classes = np.argmax(y_pred, axis= 1) 
        labels = np.argmax(y_train, axis= 1) 

        correct += (predicted_classes == labels).sum() 
        total += len(y_train)

        optimizer.step()
    
    # Compute + Display 
    training_error /= len(trainloader)
    training_accuracy = 100*(correct / total )
    if verbose and (epoch % 100) == 99:     
        print(f'Epoch {epoch + 1}, Training error = {training_error}, Training accuracy: {training_accuracy:.5f}%')    



Epoch 100, Training error = 0.09122743618058475, Training accuracy: 96.43056%
Epoch 200, Training error = 0.0887182564842566, Training accuracy: 96.55556%
Epoch 300, Training error = 0.08725881127278104, Training accuracy: 96.59722%
Epoch 400, Training error = 0.08560742965671357, Training accuracy: 96.59722%
Epoch 500, Training error = 0.08416777719554552, Training accuracy: 96.61111%
Epoch 600, Training error = 0.08283073622665714, Training accuracy: 96.66667%
Epoch 700, Training error = 0.08182449318279744, Training accuracy: 96.77778%
Epoch 800, Training error = 0.08033521274892087, Training accuracy: 96.65278%
Epoch 900, Training error = 0.079252967709406, Training accuracy: 96.58333%
Epoch 1000, Training error = 0.07866039249480161, Training accuracy: 96.62500%


In [24]:
# Initialize counters for correct predictions and total samples
correct = 0 
total = 0 
val_error = 0

for x_test, y_test in testloader: 
    # Predict 
    y_pred = model.forward(x_test)

    # Calculate predicted classes
    predicted_class = np.argmax(y_pred, axis=1)

    # Count correct predictions
    correct += (predicted_class == y_test).sum()
    
    # Update the total number of samples
    total += len(y_test)

# Compute accuracy
acc = correct / total
print(f'Accuracy: {acc:.4f}')


Accuracy: 0.9678


In [13]:
import tensorflow as tf

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Reshape and normalize the data
x_train = x_train.reshape(-1, 28 * 28) / 255.0  # Flatten and normalize
x_test = x_test.reshape(-1, 28 * 28) / 255.0

# Convert the data to float32 for compatibility with most neural networks
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Print shapes to verify
print(f'Training data shape: {x_train.shape}')
print(f'Training labels shape: {y_train.shape}')
print(f'Test data shape: {x_test.shape}')
print(f'Test labels shape: {y_test.shape}')


2024-10-03 09:20:53.998872: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-03 09:20:54.044183: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-03 09:20:54.056881: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-03 09:20:54.136524: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Training data shape: (60000, 784)
Training labels shape: (60000,)
Test data shape: (10000, 784)
Test labels shape: (10000,)


In [14]:
# Standard scaler 
scaler = StandardScaler() 
scaler.fit(x_train)
x_train = scaler.transform(x_train) 
x_test = scaler.transform(x_test)

# Encoder 
encoder = OnehotEncoder()
encoder.fit(y_train) 
y_train = encoder.transform()

# Dataloader 
trainloader = data.DataLoader(x_train, y_train, 64, True) 
testloader = data.DataLoader(x_test, y_test, 64, False) 

# Check 
dataiter = iter(trainloader) 
features, labels = next(dataiter)
print(f'X.shape = {features.shape}')
print(f'y.shape = {labels.shape}')

Unique classes: [0 1 2 3 4 5 6 7 8 9]
X.shape = (64, 784)
y.shape = (64, 10)


In [15]:
# Define network 
model = Sequential([
    Linear(784, 128),
    Activation('relu'), 
    Linear(128, 64),
    Activation('relu'), 
    Linear(64, 10), 
    Activation('softmax') 
])

# Define loss and optimizer 
loss = Loss(loss_name='categorical_cross_entropy')
optimizer = Adam(model, learning_rate=0.0005) 

# Train loop 
verbose = True 
NUM_EPOCHS = 10
for epoch in range(NUM_EPOCHS): 
    error = 0 
    correct_predictions = 0
    total_samples = 0
    
    for x_train, y_train in trainloader: 
        # Forward 
        y_pred = model.forward(x_train)

        # Loss 
        error += loss.forward(y_train, y_pred) 

        # Calculate accuracy
        predicted_labels = np.argmax(y_pred, axis=1)
        true_labels = np.argmax(y_train, axis=1)
        correct_predictions += np.sum(predicted_labels == true_labels)
        total_samples += y_train.shape[0]

        # Backward 
        dout = loss.backward(y_train, y_pred) 
        model.backward(dout)

        optimizer.step()
    
    # Calculate average error and accuracy
    error /= len(trainloader)
    training_accuracy = correct_predictions / total_samples * 100  # Percentage
    
    if verbose:
        print(f'Epoch {epoch + 1}, Error = {error:.4f}, Training Accuracy = {training_accuracy:.2f}%')


Epoch 1, Error = 0.3202, Training Accuracy = 90.69%
Epoch 2, Error = 0.1234, Training Accuracy = 96.36%
Epoch 3, Error = 0.0793, Training Accuracy = 97.67%
Epoch 4, Error = 0.0544, Training Accuracy = 98.46%
Epoch 5, Error = 0.0380, Training Accuracy = 99.01%
Epoch 6, Error = 0.0270, Training Accuracy = 99.37%
Epoch 7, Error = 0.0181, Training Accuracy = 99.62%
Epoch 8, Error = 0.0134, Training Accuracy = 99.74%
Epoch 9, Error = 0.0146, Training Accuracy = 99.63%
Epoch 10, Error = 0.0175, Training Accuracy = 99.53%


In [16]:
# Initialize counters for correct predictions and total samples
correct = 0 
total = 0 
val_error = 0

for x_test, y_test in testloader: 
    # Predict 
    y_pred = model.forward(x_test)

    # Calculate predicted classes
    predicted_class = np.argmax(y_pred, axis=1)

    # Count correct predictions
    correct += (predicted_class == y_test).sum()
    
    # Update the total number of samples
    total += len(y_test)

# Compute accuracy
acc = ( correct / total ) * 100 
print(f'Accuracy: {acc:.2f} %')

Accuracy: 97.04 %
