In [1]:
import tensorflow as tf
from tensorflow import keras
from keras.datasets import mnist
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential
from tensorflow.keras import datasets, layers, models, losses, Model
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization

In [2]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Reshaping the images
train_images = train_images.reshape((60000,28,28,1))
train_images = train_images.astype('float32')/255

test_images = test_images.reshape((10000,28,28,1))
test_images = test_images.astype('float32')/255

In [4]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [5]:
model = models.Sequential()
model.add(layers.Conv2D(32,(3,3), activation='relu', input_shape = (28,28,1)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64,(3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64,(3,3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64,activation = 'relu'))
model.add(layers.Dense(10, activation= 'softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 64)          36928     
                                                                 
 flatten (Flatten)           (None, 576)               0

In [7]:
model.compile(optimizer = 'rmsprop',
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])
model.fit(train_images, train_labels, epochs=10, batch_size = 64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4bc177f100>

In [8]:
model.evaluate(test_images, test_labels)[1]



0.9915000200271606

In [None]:
## USING PyTorch

In [9]:
import torch
import torch.nn as nn

In [18]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [80]:
# Load MNIST dataset
from torchvision import datasets, transforms
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST('mnist_data', train=True, download=True, transform=transform)
val_dataset = datasets.MNIST('mnist_data', train=False, download=True, transform=transform)

In [16]:
# Create data loaders
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [81]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.relu1(self.conv1(x))
        x = self.pool1(x)
        x = self.relu2(self.conv2(x))
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [82]:
# Training loop
def train(model, train_loader, optimizer, criterion, device):
  model.train()
  train_loss = 0
  correct = 0
  for data, target in train_loader:
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      output = model(data)
      loss = criterion(output, target)
      train_loss += loss.item() * data.size(0)
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()
      loss.backward()
      optimizer.step()

  train_loss /= len(train_loader.dataset)
  accuracy = correct / len(train_loader.dataset)
  return train_loss, accuracy        

def validate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += criterion(output, target).item() * data.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    val_loss /= len(val_loader.dataset)
    accuracy = correct / len(val_loader.dataset)
    return val_loss, accuracy


In [77]:
# Initialization
cnn = SimpleCNN().to(device)
print(cnn)
params = list(cnn.parameters())
print('Len Params')
print(len(params))
print(params[0].size())
EPOCH = 3
BATCH_SIZE = 50
LR = 0.001 
optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()

SimpleCNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU(inplace=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=1568, out_features=10, bias=True)
)
Len Params
6
torch.Size([16, 1, 3, 3])


In [83]:
# Create the model
model = SimpleCNN().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [85]:
epochs = 10
for epoch in range(epochs):
  train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
  val_loss, val_acc = validate(model, val_loader, criterion, device)

  if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pt')

  print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")


# Load the best model
model.load_state_dict(torch.load('best_model.pt'))

# Test the model
test_loss, test_acc = validate(model, val_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f}")

Epoch 1/10 | Train Loss: 0.0096 | Train Acc: 0.9968 | Val Loss: 0.0437 | Val Acc: 5.9857
Epoch 2/10 | Train Loss: 0.0091 | Train Acc: 0.9971 | Val Loss: 0.0497 | Val Acc: 5.9831
Epoch 3/10 | Train Loss: 0.0078 | Train Acc: 0.9973 | Val Loss: 0.0299 | Val Acc: 5.9902
Epoch 4/10 | Train Loss: 0.0072 | Train Acc: 0.9974 | Val Loss: 0.0481 | Val Acc: 5.9812
Epoch 5/10 | Train Loss: 0.0066 | Train Acc: 0.9978 | Val Loss: 0.0236 | Val Acc: 5.9917
Epoch 6/10 | Train Loss: 0.0069 | Train Acc: 0.9975 | Val Loss: 0.0269 | Val Acc: 5.9909
Epoch 7/10 | Train Loss: 0.0061 | Train Acc: 0.9977 | Val Loss: 0.0654 | Val Acc: 5.9759
Epoch 8/10 | Train Loss: 0.0043 | Train Acc: 0.9985 | Val Loss: 0.0207 | Val Acc: 5.9925
Epoch 9/10 | Train Loss: 0.0053 | Train Acc: 0.9981 | Val Loss: 0.0340 | Val Acc: 5.9878
Epoch 10/10 | Train Loss: 0.0046 | Train Acc: 0.9984 | Val Loss: 0.0150 | Val Acc: 5.9953
Test Loss: 0.0150 | Test Acc: 5.9953
