In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.datasets import mnist

# Set random seeds for reproducibility
tf.random.set_seed(42)

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Define the CNN architecture
model = models.Sequential()
model.add(layers.Conv2D(16, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D(pool_size=2))
model.add(layers.Conv2D(16, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.MaxPooling2D(pool_size=2))
model.add(layers.Flatten())
model.add(layers.Dense(10, activation='softmax'))

# Print the model summary
model.summary()

# Compile the model
model.compile(optimizer=optimizers.Adam(lr=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])



Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 28, 28, 16)        160       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 14, 14, 16)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 14, 14, 16)        2320      
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 7, 7, 16)         0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 dense_2 (Dense)             (None, 10)               

In [6]:
# Training loop
with tf.device('GPU:0'):
    epochs = 20
    for epoch in range(epochs):
        model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=1)

        # Evaluate on the validation set
        _, accuracy = model.evaluate(x_test, y_test, verbose=0)
        print(f'Epoch {epoch + 1}/{epochs}, Validation Accuracy: {accuracy * 100:.2f}%')

        # Stop training if the desired accuracy is reached
        if accuracy >= 0.994:
            print('Desired accuracy reached. Training stopped.')
            break


Epoch 1/20, Validation Accuracy: 98.39%
Epoch 2/20, Validation Accuracy: 98.39%
Epoch 3/20, Validation Accuracy: 98.47%
Epoch 4/20, Validation Accuracy: 98.51%
Epoch 5/20, Validation Accuracy: 98.58%
Epoch 6/20, Validation Accuracy: 98.65%
Epoch 7/20, Validation Accuracy: 98.70%
Epoch 8/20, Validation Accuracy: 98.75%
Epoch 9/20, Validation Accuracy: 98.77%
Epoch 10/20, Validation Accuracy: 98.66%
Epoch 11/20, Validation Accuracy: 98.68%
Epoch 12/20, Validation Accuracy: 98.76%
Epoch 13/20, Validation Accuracy: 98.70%
Epoch 14/20, Validation Accuracy: 98.66%
Epoch 15/20, Validation Accuracy: 98.66%
Epoch 16/20, Validation Accuracy: 98.41%
Epoch 17/20, Validation Accuracy: 98.60%
Epoch 18/20, Validation Accuracy: 98.60%
Epoch 19/20, Validation Accuracy: 98.45%
Epoch 20/20, Validation Accuracy: 98.48%


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Set random seeds for reproducibility
torch.manual_seed(42)

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(7 * 7 * 32, 10)  # 7x7 feature maps after convolution

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.max_pool2d(x, 2)
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Define the data transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])



In [8]:
# Load the MNIST dataset
train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('data', train=False, transform=transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Instantiate the CNN model
model = CNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data\MNIST\raw\train-images-idx3-ubyte.gz


100%|███████████████████████████████████████████████████████████████████| 9912422/9912422 [00:03<00:00, 3018113.91it/s]


Extracting data\MNIST\raw\train-images-idx3-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|███████████████████████████████████████████████████████████████████████| 28881/28881 [00:00<00:00, 2220554.59it/s]


Extracting data\MNIST\raw\train-labels-idx1-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|███████████████████████████████████████████████████████████████████| 1648877/1648877 [00:01<00:00, 1537788.11it/s]


Extracting data\MNIST\raw\t10k-images-idx3-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████| 4542/4542 [00:00<00:00, 1509670.24it/s]

Extracting data\MNIST\raw\t10k-labels-idx1-ubyte.gz to data\MNIST\raw






In [9]:
# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    # Evaluate on the validation set
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100.0 * correct / total
    print(f'Epoch {epoch + 1}/{epochs}, Validation Accuracy: {accuracy:.2f}%')

    # Stop training if the desired accuracy is reached
    if accuracy >= 99.40:
        print('Desired accuracy reached. Training stopped.')
        break


Epoch 1/10, Validation Accuracy: 98.18%
Epoch 2/10, Validation Accuracy: 98.44%
Epoch 3/10, Validation Accuracy: 98.47%
Epoch 4/10, Validation Accuracy: 98.79%
Epoch 5/10, Validation Accuracy: 98.88%
Epoch 6/10, Validation Accuracy: 98.98%
Epoch 7/10, Validation Accuracy: 98.69%
Epoch 8/10, Validation Accuracy: 98.96%
Epoch 9/10, Validation Accuracy: 98.88%
Epoch 10/10, Validation Accuracy: 98.76%
