Problem 1 - Creating a one-dimensional convolutional layer class with the number of channels limited to 1

In [34]:
import numpy as np

class SimpleConv1d:
    def __init__(self, filter_size, input_size, learning_rate=0.01):
        """
        Initialize the SimpleConv1d layer.

        :param filter_size: Size of the convolutional filter (F).
        :param input_size: Size of the input array (used for weight initialization).
        :param learning_rate: Learning rate for weight and bias updates.
        """
        self.filter_size = filter_size
        self.input_size = input_size
        self.learning_rate = learning_rate

        # Xavier initialization for weights and bias
        self.weights = np.random.randn(filter_size) * np.sqrt(2.0 / input_size)
        self.bias = 0.0

    def forward(self, x):
        """
        Perform forward propagation.

        :param x: Input array (1D).
        :return: Output array after convolution.
        """
        self.x = x  # Store input for backpropagation
        self.output_size = len(x) - self.filter_size + 1  # Compute output size
        self.a = np.zeros(self.output_size)  # Initialize output array

        # Convolution operation
        for i in range(self.output_size):
            self.a[i] = np.sum(self.x[i:i + self.filter_size] * self.weights) + self.bias

        return self.a

    def backward(self, da):
        """
        Perform backward propagation.

        :param da: Gradient of the loss with respect to the output (a).
        :return: Gradient of the loss with respect to the input (x).
        """
        # Gradients for weights and bias
        self.dw = np.zeros_like(self.weights)
        for s in range(self.filter_size):
            for i in range(self.output_size):
                self.dw[s] += da[i] * self.x[i + s]

        self.db = np.sum(da)

        # Gradient of the loss with respect to the input
        dx = np.zeros_like(self.x)
        for j in range(len(self.x)):
            for s in range(self.filter_size):
                if 0 <= j - s < self.output_size:
                    dx[j] += da[j - s] * self.weights[s]

        return dx

    def update(self):
        """
        Update weights and bias using gradients.
        """
        self.weights -= self.learning_rate * self.dw
        self.bias -= self.learning_rate * self.db

# Example usage
if __name__ == "__main__":
    np.random.seed(0)  # For reproducibility

    # Initialize SimpleConv1d
    filter_size = 3
    input_size = 10
    learning_rate = 0.01
    conv_layer = SimpleConv1d(filter_size, input_size, learning_rate)

    # Input array
    x = np.random.randn(input_size)

    # Forward propagation
    output = conv_layer.forward(x)
    print("Output:", output)

    # Backward propagation (example gradient of the output)
    da = np.random.randn(len(output))
    dx = conv_layer.backward(da)
    print("Gradient w.r.t input:", dx)

    # Update weights
    conv_layer.update()
    print("Updated weights:", conv_layer.weights)
    print("Updated bias:", conv_layer.bias)


Output: [ 1.67430988  1.7143007  -0.66720854  0.67726689  0.04184244  0.05509734
  0.98624463  0.70699772]
Gradient w.r.t input: [ 0.09599042  0.37194178  0.39592804  1.43268528  0.2515739   0.86423321
 -0.70757665 -2.02988864 -0.83071411 -1.11745624]
Updated weights: [0.77415498 0.21697288 0.46896568]
Updated bias: 0.00905884467480576


Problem 2 - Calculating the output size after 1D convolution

In [35]:
def calculate_output_size(N_in, P, F, S):
    """
    Function to calculate the output size of a convolutional layer.

    Parameters:
    N_in (int): Input size (number of features)
    P (int): Padding size
    F (int): Filter size
    S (int): Stride size

    Returns:
    int: Output size (number of features)
    """
    N_out = (N_in + 2 * P - F) // S + 1
    return N_out

# Example
N_in = 64  # Input size
P = 1      # Padding size
F = 3      # Filter size
S = 1      # Stride size

output_size = calculate_output_size(N_in, P, F, S)
output_size


64

Problem 3 - Experiment with 1D convolutional layers on small arrays

In [36]:
import numpy as np

# Given input, weights, and bias
x = np.array([1, 2, 3, 4])
w = np.array([3, 5, 7])
b = np.array([1])

# Forward propagation
# Create an empty array for storing output
a = np.empty((2, 3))  # 2 outputs, each of length 3 (filter size)

# Indexing to perform the convolution (efficient way)
indexes = np.array([[0, 1, 2], [1, 2, 3]]).astype(int)  # Replace np.int with int
a = (x[indexes] * w).sum(axis=1) + b

print("Output after forward propagation:")
print(a)

# Backpropagation
delta_a = np.array([10, 20])

# Compute the gradients for the bias (delta_b)
delta_b = delta_a.sum()

# Compute the gradients for the weights (delta_w)
delta_w = np.empty_like(w)
for i in range(len(w)):
    delta_w[i] = (x[indexes[:, i]] * delta_a).sum()

# Compute the gradients for the input (delta_x)
# Initialize delta_x to zeros
delta_x = np.zeros_like(x)

# Spread the error back through the input x
for i in range(len(delta_a)):
    for j in range(len(w)):
        delta_x[indexes[i, j]] += delta_a[i] * w[j]

print("\nBackpropagation results:")
print("delta_b:", delta_b)
print("delta_w:", delta_w)
print("delta_x:", delta_x)



Output after forward propagation:
[35 50]

Backpropagation results:
delta_b: 30
delta_w: [ 50  80 110]
delta_x: [ 30 110 170 140]


Problem 4 - Creating a 1D convolutional layer class with no limit on the number of channels

In [37]:
import numpy as np

class Conv1d:
    def __init__(self, in_channels, out_channels, filter_size):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size

        # Initialize weights and biases
        self.weights = np.ones((out_channels, in_channels, filter_size), dtype=np.float64)  # Ensure dtype is float64
        self.biases = np.array([i+1 for i in range(out_channels)], dtype=np.float64)  # Ensure dtype is float64

    def forward(self, x):
        """
        Perform the forward pass through the convolutional layer.

        Args:
        - x: Input array of shape (in_channels, features)

        Returns:
        - a: Output array after applying the convolution
        """
        # Ensure input x is float64
        x = x.astype(np.float64)

        # Output shape
        out_features = x.shape[1] - self.filter_size + 1  # Feature length after convolution

        # Initialize output array
        a = np.zeros((self.out_channels, out_features), dtype=np.float64)

        # Perform convolution for each output channel
        for i in range(self.out_channels):
            for j in range(out_features):
                # Apply filter for this output channel
                conv_sum = 0
                for k in range(self.in_channels):
                    conv_sum += np.sum(x[k, j:j+self.filter_size] * self.weights[i, k])  # Convolution operation
                a[i, j] = conv_sum + self.biases[i]  # Add the bias

        return a

    def backward(self, x, delta_a):
        """
        Perform the backpropagation for the convolutional layer.

        Args:
        - x: Input array of shape (in_channels, features)
        - delta_a: Gradient of the loss with respect to the output

        Returns:
        - delta_w: Gradient of the loss with respect to the weights
        - delta_b: Gradient of the loss with respect to the biases
        - delta_x: Gradient of the loss with respect to the input
        """
        delta_w = np.zeros_like(self.weights, dtype=np.float64)
        delta_b = np.zeros_like(self.biases, dtype=np.float64)
        delta_x = np.zeros_like(x, dtype=np.float64)

        # Ensure delta_a is float64
        delta_a = delta_a.astype(np.float64)

        # Backpropagation for each output channel
        for i in range(self.out_channels):
            for j in range(delta_a.shape[1]):  # For each feature in the output
                # Compute the gradient with respect to the bias
                delta_b[i] += delta_a[i, j]

                # Compute the gradient with respect to the weights
                for k in range(self.in_channels):
                    delta_w[i, k] += x[k, j:j+self.filter_size] * delta_a[i, j]

                # Compute the gradient with respect to the input
                for k in range(self.in_channels):
                    delta_x[k, j:j+self.filter_size] += self.weights[i, k] * delta_a[i, j]

        return delta_w, delta_b, delta_x

# Example Usage
x = np.array([[1, 2, 3, 4], [2, 3, 4, 5]], dtype=np.float64)  # (2, 4), 2 input channels, 4 features
w = np.ones((3, 2, 3), dtype=np.float64)  # (3, 2, 3), 3 output channels, 2 input channels, filter size 3
b = np.array([1, 2, 3], dtype=np.float64)  # Bias for each output channel

# Create Conv1d layer
conv = Conv1d(in_channels=2, out_channels=3, filter_size=3)

# Perform forward pass
output = conv.forward(x)
print("Output after forward pass:")
print(output)

# Define error from next layer (delta_a)
delta_a = np.array([[10, 20], [15, 25], [30, 35]], dtype=np.float64)

# Perform backward pass
delta_w, delta_b, delta_x = conv.backward(x, delta_a)

print("\nGradients after backward pass:")
print("delta_w:", delta_w)
print("delta_b:", delta_b)
print("delta_x:", delta_x)


Output after forward pass:
[[16. 22.]
 [17. 23.]
 [18. 24.]]

Gradients after backward pass:
delta_w: [[[ 50.  80. 110.]
  [ 80. 110. 140.]]

 [[ 65. 105. 145.]
  [105. 145. 185.]]

 [[100. 165. 230.]
  [165. 230. 295.]]]
delta_b: [30. 40. 65.]
delta_x: [[ 55. 135. 135.  80.]
 [ 55. 135. 135.  80.]]


Problem 5 - (Advanced) Implementing padding

In [38]:
import numpy as np

class Conv1d:
    def __init__(self, in_channels, out_channels, filter_size, padding=0, padding_type='constant'):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.padding = padding
        self.padding_type = padding_type

        # Initialize weights and biases
        self.weights = np.ones((out_channels, in_channels, filter_size), dtype=np.float64)  # Ensure dtype is float64
        self.biases = np.array([i+1 for i in range(out_channels)], dtype=np.float64)  # Ensure dtype is float64

    def forward(self, x):
        """
        Perform the forward pass through the convolutional layer.

        Args:
        - x: Input array of shape (in_channels, features)

        Returns:
        - a: Output array after applying the convolution
        """
        # Ensure input x is float64
        x = x.astype(np.float64)

        # Apply padding to the input if necessary
        if self.padding > 0:
            if self.padding_type == 'constant':
                x = np.pad(x, ((0, 0), (self.padding, self.padding)), mode='constant', constant_values=0)
            elif self.padding_type == 'edge':
                x = np.pad(x, ((0, 0), (self.padding, self.padding)), mode='edge')

        # Output shape (adjusted for padding)
        out_features = x.shape[1] - self.filter_size + 1  # Feature length after convolution

        # Initialize output array
        a = np.zeros((self.out_channels, out_features), dtype=np.float64)

        # Perform convolution for each output channel
        for i in range(self.out_channels):
            for j in range(out_features):
                # Apply filter for this output channel
                conv_sum = 0
                for k in range(self.in_channels):
                    conv_sum += np.sum(x[k, j:j+self.filter_size] * self.weights[i, k])  # Convolution operation
                a[i, j] = conv_sum + self.biases[i]  # Add the bias

        return a

    def backward(self, x, delta_a):
        """
        Perform the backpropagation for the convolutional layer.

        Args:
        - x: Input array of shape (in_channels, features)
        - delta_a: Gradient of the loss with respect to the output

        Returns:
        - delta_w: Gradient of the loss with respect to the weights
        - delta_b: Gradient of the loss with respect to the biases
        - delta_x: Gradient of the loss with respect to the input
        """
        delta_w = np.zeros_like(self.weights, dtype=np.float64)
        delta_b = np.zeros_like(self.biases, dtype=np.float64)
        delta_x = np.zeros_like(x, dtype=np.float64)

        # Ensure delta_a is float64
        delta_a = delta_a.astype(np.float64)

        # Backpropagation for each output channel
        for i in range(self.out_channels):
            for j in range(delta_a.shape[1]):  # For each feature in the output
                # Compute the gradient with respect to the bias
                delta_b[i] += delta_a[i, j]

                # Compute the gradient with respect to the weights
                for k in range(self.in_channels):
                    delta_w[i, k] += x[k, j:j+self.filter_size] * delta_a[i, j]

                # Compute the gradient with respect to the input
                for k in range(self.in_channels):
                    delta_x[k, j:j+self.filter_size] += self.weights[i, k] * delta_a[i, j]

        return delta_w, delta_b, delta_x

# Example Usage
x = np.array([[1, 2, 3, 4], [2, 3, 4, 5]], dtype=np.float64)  # (2, 4), 2 input channels, 4 features
w = np.ones((3, 2, 3), dtype=np.float64)  # (3, 2, 3), 3 output channels, 2 input channels, filter size 3
b = np.array([1, 2, 3], dtype=np.float64)  # Bias for each output channel

# Create Conv1d layer with padding
conv = Conv1d(in_channels=2, out_channels=3, filter_size=3, padding=1, padding_type='constant')

# Perform forward pass
output = conv.forward(x)
print("Output after forward pass:")
print(output)

# Define error from next layer (delta_a)
delta_a = np.array([[10, 20], [15, 25], [30, 35]], dtype=np.float64)

# Perform backward pass
delta_w, delta_b, delta_x = conv.backward(x, delta_a)

print("\nGradients after backward pass:")
print("delta_w:", delta_w)
print("delta_b:", delta_b)
print("delta_x:", delta_x)


Output after forward pass:
[[ 9. 16. 22. 17.]
 [10. 17. 23. 18.]
 [11. 18. 24. 19.]]

Gradients after backward pass:
delta_w: [[[ 50.  80. 110.]
  [ 80. 110. 140.]]

 [[ 65. 105. 145.]
  [105. 145. 185.]]

 [[100. 165. 230.]
  [165. 230. 295.]]]
delta_b: [30. 40. 65.]
delta_x: [[ 55. 135. 135.  80.]
 [ 55. 135. 135.  80.]]


Problem 6 - (Advanced problem) Dealing with mini-batches

In [39]:
import numpy as np

class Conv1d:
    def __init__(self, in_channels, out_channels, filter_size, padding=0, padding_type='constant'):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.padding = padding
        self.padding_type = padding_type

        # Initialize weights and biases
        self.weights = np.ones((out_channels, in_channels, filter_size), dtype=np.float64)  # Ensure dtype is float64
        self.biases = np.array([i+1 for i in range(out_channels)], dtype=np.float64)  # Ensure dtype is float64

    def forward(self, x):
        """
        Perform the forward pass through the convolutional layer.

        Args:
        - x: Input array of shape (batch_size, in_channels, features)

        Returns:
        - a: Output array after applying the convolution (batch_size, out_channels, features)
        """
        # Ensure input x is float64
        x = x.astype(np.float64)

        # Apply padding to the input if necessary
        if self.padding > 0:
            if self.padding_type == 'constant':
                x = np.pad(x, ((0, 0), (0, 0), (self.padding, self.padding)), mode='constant', constant_values=0)
            elif self.padding_type == 'edge':
                x = np.pad(x, ((0, 0), (0, 0), (self.padding, self.padding)), mode='edge')

        # Output shape (adjusted for padding)
        batch_size, _, in_features = x.shape
        out_features = in_features - self.filter_size + 1  # Feature length after convolution

        # Initialize output array
        a = np.zeros((batch_size, self.out_channels, out_features), dtype=np.float64)

        # Perform convolution for each sample in the batch
        for b in range(batch_size):
            for i in range(self.out_channels):
                for j in range(out_features):
                    # Apply filter for this output channel and batch sample
                    conv_sum = 0
                    for k in range(self.in_channels):
                        conv_sum += np.sum(x[b, k, j:j+self.filter_size] * self.weights[i, k])  # Convolution operation
                    a[b, i, j] = conv_sum + self.biases[i]  # Add the bias

        return a

    def backward(self, x, delta_a):
        """
        Perform the backpropagation for the convolutional layer.

        Args:
        - x: Input array of shape (batch_size, in_channels, features)
        - delta_a: Gradient of the loss with respect to the output (batch_size, out_channels, features)

        Returns:
        - delta_w: Gradient of the loss with respect to the weights (out_channels, in_channels, filter_size)
        - delta_b: Gradient of the loss with respect to the biases (out_channels,)
        - delta_x: Gradient of the loss with respect to the input (batch_size, in_channels, features)
        """
        delta_w = np.zeros_like(self.weights, dtype=np.float64)
        delta_b = np.zeros_like(self.biases, dtype=np.float64)
        delta_x = np.zeros_like(x, dtype=np.float64)

        # Ensure delta_a is float64
        delta_a = delta_a.astype(np.float64)

        # Backpropagation for each output channel in each sample of the batch
        for b in range(x.shape[0]):  # Loop over the batch size
            for i in range(self.out_channels):
                for j in range(delta_a.shape[2]):  # For each feature in the output
                    # Compute the gradient with respect to the bias
                    delta_b[i] += delta_a[b, i, j]

                    # Compute the gradient with respect to the weights
                    for k in range(self.in_channels):
                        delta_w[i, k] += x[b, k, j:j+self.filter_size] * delta_a[b, i, j]

                    # Compute the gradient with respect to the input
                    for k in range(self.in_channels):
                        delta_x[b, k, j:j+self.filter_size] += self.weights[i, k] * delta_a[b, i, j]

        return delta_w, delta_b, delta_x

# Example Usage
x = np.array([[[1, 2, 3, 4], [2, 3, 4, 5]],  # (batch_size=2, in_channels=2, features=4)
              [[5, 6, 7, 8], [6, 7, 8, 9]]], dtype=np.float64)  # 2 samples in the batch
w = np.ones((3, 2, 3), dtype=np.float64)  # (3, 2, 3), 3 output channels, 2 input channels, filter size 3
b = np.array([1, 2, 3], dtype=np.float64)  # Bias for each output channel

# Create Conv1d layer with padding
conv = Conv1d(in_channels=2, out_channels=3, filter_size=3, padding=1, padding_type='constant')

# Perform forward pass
output = conv.forward(x)
print("Output after forward pass:")
print(output)

# Define error from next layer (delta_a)
delta_a = np.array([[[10, 20], [15, 25], [30, 35]],  # Gradient for first sample
                    [[5, 10], [15, 20], [25, 30]]], dtype=np.float64)  # Gradient for second sample

# Perform backward pass
delta_w, delta_b, delta_x = conv.backward(x, delta_a)

print("\nGradients after backward pass:")
print("delta_w:", delta_w)
print("delta_b:", delta_b)
print("delta_x:", delta_x)


Output after forward pass:
[[[ 9. 16. 22. 17.]
  [10. 17. 23. 18.]
  [11. 18. 24. 19.]]

 [[25. 40. 46. 33.]
  [26. 41. 47. 34.]
  [27. 42. 48. 35.]]]

Gradients after backward pass:
delta_w: [[[135. 180. 225.]
  [180. 225. 270.]]

 [[260. 335. 410.]
  [335. 410. 485.]]

 [[405. 525. 645.]
  [525. 645. 765.]]]
delta_b: [ 45.  75. 120.]
delta_x: [[[ 55. 135. 135.  80.]
  [ 55. 135. 135.  80.]]

 [[ 45. 105. 105.  60.]
  [ 45. 105. 105.  60.]]]


Problem 7 - (Advanced Task) Any number of strides

In [40]:
import numpy as np

# Simple Initializer for weights and biases
class SimpleInitializer:
    def __init__(self, scale=0.01):
        self.scale = scale

    def W(self, n_out_channels, n_in_channels, filter_size):
        return np.random.randn(n_out_channels, n_in_channels, filter_size) * self.scale

    def B(self, n_out_channels):
        return np.zeros(n_out_channels)

# Function to calculate the output size after convolution
def output_size_calculation(input_size, filter_size, padding, stride):
    return (input_size - filter_size + 2 * padding) // stride + 1

class Conv1d:
    def __init__(self, b_size, initializer, optimizer, batch_size, n_in_channels=1, n_out_channels=1, padding=0, stride=1):
        self.optimizer = optimizer
        self.padding = padding
        self.stride = stride
        self.batch_size = batch_size
        self.b_size = b_size  # filter size is now stored in b_size
        self.W = initializer.W(n_out_channels, n_in_channels, b_size)  # (n_out_channels, n_in_channels, filter_size)
        self.B = initializer.B(n_out_channels)  # (n_out_channels)
        self.n_in_channels = n_in_channels
        self.n_out_channels = n_out_channels

    def forward(self, X):
        # Debug output to check the shape of X
        print(f"Input X shape: {X.shape}")

        self.n_samples = X.shape[0]
        self.n_in = X.shape[2]
        self.n_out = output_size_calculation(self.n_in, self.b_size, self.padding, self.stride)

        # Data after padding
        X = self._padding(X)
        self.X = X
        print(f"Padded X shape: {self.X.shape}")  # Shape after padding

        # Calculating A (output of the convolution operation)
        A = np.zeros((self.n_samples, self.n_out_channels, self.n_out))
        for i in range(self.n_samples):
            for j in range(self.n_out_channels):
                for k in range(self.n_out):
                    start_index = k * self.stride
                    A[i, j, k] = np.sum(X[i, :, start_index:start_index + self.b_size] * self.W[j]) + self.B[j]

        # Debug output for the final output of the Conv1d layer
        print(f"Conv1d output (A) shape: {A.shape}")
        return A

    def backward(self, dA):
        self.dW = np.zeros_like(self.W)
        self.dB = np.zeros_like(self.B)
        dX = np.zeros_like(self.X)
        for i in range(self.n_samples):
            for j in range(self.n_out_channels):
                for k in range(self.n_out):
                    start_index = k * self.stride
                    self.dW[j] += dA[i, j, k] * self.X[i, :, start_index:start_index + self.b_size]
                    self.dB[j] += dA[i, j, k]
                    dX[i, :, start_index:start_index + self.b_size] += dA[i, j, k] * self.W[j]
        self.optimizer.update(self)
        dX = dX[:, :, self.padding:self.n_in + self.padding]
        return dX

    def _padding(self, X):
        if self.padding == 0:
            return X
        return np.pad(X, ((0, 0), (0, 0), (self.padding, self.padding)), 'constant')


class DummyOptimizer:
    def update(self, conv_layer):
        # Dummy optimizer for the sake of example (does not update weights or biases)
        print("Optimizer update called.")

# Example Usage
x = np.array([[1, 2, 3, 4], [2, 3, 4, 5]])  # (in_channels=2, input_size=4)
x = x.reshape(1, 2, 4)  # Reshaping to (batch_size=1, in_channels=2, input_size=4)

conv1d = Conv1d(b_size=3, initializer=SimpleInitializer(0.01), optimizer=DummyOptimizer(), batch_size=1, n_in_channels=2, n_out_channels=3, padding=0, stride=1)

output = conv1d.forward(x)

print("Output after forward pass:")
print(output)


Input X shape: (1, 2, 4)
Padded X shape: (1, 2, 4)
Conv1d output (A) shape: (1, 3, 2)
Output after forward pass:
[[[0.00515442 0.02152479]
  [0.0517969  0.07639969]
  [0.0231084  0.02183824]]]


Problem 8 - Learning and estimation

In [42]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Preprocess the data
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# Convert labels to one-hot encoding
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

# Reshape data to (batch_size, channels, input_size) for Conv1d (we flatten the 28x28 image)
train_images = train_images.reshape(train_images.shape[0], 28, 28)  # (batch_size, height, width)
test_images = test_images.reshape(test_images.shape[0], 28, 28)  # (batch_size, height, width)

# Model definition with Conv1d layers
model = models.Sequential()

# Use Input layer to define the input shape
model.add(layers.Input(shape=(28, 28)))  # Specify the input shape here, instead of in the Conv1D layer

# Add Conv1d layers to extract features
model.add(layers.Conv1D(32, 3, activation='relu'))  # Conv1d layer with 32 filters
model.add(layers.MaxPooling1D(2))  # Max pooling to reduce dimension
model.add(layers.Conv1D(64, 3, activation='relu'))  # Another Conv1d layer
model.add(layers.MaxPooling1D(2))  # Max pooling to reduce dimension
model.add(layers.Conv1D(128, 3, activation='relu'))  # Another Conv1d layer
model.add(layers.MaxPooling1D(2))  # Max pooling to reduce dimension

# Flatten the output to feed into the fully connected layer
model.add(layers.Flatten())

# Output layer (fully connected)
model.add(layers.Dense(10, activation='softmax'))  # 10 output units for 10 classes (MNIST)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print(f"Test accuracy: {test_acc}")


Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.8173 - loss: 0.6109 - val_accuracy: 0.9649 - val_loss: 0.1131
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9675 - loss: 0.1102 - val_accuracy: 0.9756 - val_loss: 0.0775
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9749 - loss: 0.0802 - val_accuracy: 0.9786 - val_loss: 0.0715
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.9802 - loss: 0.0626 - val_accuracy: 0.9801 - val_loss: 0.0635
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9851 - loss: 0.0484 - val_accuracy: 0.9815 - val_loss: 0.0606
313/313 - 1s - 3ms/step - accuracy: 0.9815 - loss: 0.0606
Test accuracy: 0.9815000295639038
