In [6]:
# Libraries

import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, Input
from tensorflow.keras.models import Sequential, Model
from skimage.transform import resize
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
# Load the MNIST dataset
(train_X, train_y), (test_X, test_y) = mnist.load_data()

# Data preprocessing

train_X = train_X.astype('float32') / 255.0
train_y = to_categorical(train_y, num_classes=10)

# Evaluate the model on the test set

test_X = test_X.astype('float32') / 255.0
test_y = to_categorical(test_y, num_classes=10)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [7]:
def mod_checkpoint_callback() -> None:

    trained_model = ModelCheckpoint(filepath = '/model.h5',  # result file name
                             save_weights_only = False,     # Save all training results/params.
                             monitor = 'val_accuracy',      # check our model accuracy if it's step forward.
                             mode = 'auto',                 # enable auto save.
                             save_best_only = True,         # if ac_new > ac_old
                             verbose = 1)
    return trained_model
model_checkpoint_callback = mod_checkpoint_callback()

In [4]:
# Create the model
def create_model():
  input_shape = (28, 28,1)
  
  model_seq = Sequential()

  # CNN 
  model_seq.add(Conv2D(8, kernel_size=(3, 3), activation='relu',padding='same', input_shape=input_shape))
  model_seq.add(MaxPooling2D(pool_size=(2, 2)))
  model_seq.add(Conv2D(8, kernel_size=(3, 3), activation='relu',padding='same'))
  model_seq.add(MaxPooling2D(pool_size=(2, 2)))
  model_seq.add(Conv2D(16, kernel_size=(3, 3), activation='relu',padding='same'))
  model_seq.add(MaxPooling2D(pool_size=(2, 2)))
  model_seq.add(Conv2D(32, kernel_size=(3, 3), activation='relu',padding='same'))
  model_seq.add(MaxPooling2D(pool_size=(2, 2)))


  # flatten layer
  model_seq.add(Flatten())

  # fully connected layers
  model_seq.add(Dense(72, activation='relu'))
  model_seq.add(Dropout(0.3))
  model_seq.add(Dense(10, activation='softmax', name='digit_out'))

  # model
  optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
  model_seq.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
  
  return model_seq

model = create_model()
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 8)         80        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 8)        0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 8)         584       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 8)          0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 16)          1168      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 3, 3, 16)         0

In [8]:
# Train the model
model.fit(train_X, train_y, epochs=50, batch_size=32, callbacks = [model_checkpoint_callback],validation_split=0.3)

Epoch 1/50
Epoch 1: val_accuracy improved from -inf to 0.95333, saving model to /model.h5
Epoch 2/50
Epoch 2: val_accuracy improved from 0.95333 to 0.96850, saving model to /model.h5
Epoch 3/50
Epoch 3: val_accuracy improved from 0.96850 to 0.97433, saving model to /model.h5
Epoch 4/50
Epoch 4: val_accuracy improved from 0.97433 to 0.97750, saving model to /model.h5
Epoch 5/50
Epoch 5: val_accuracy did not improve from 0.97750
Epoch 6/50
Epoch 6: val_accuracy improved from 0.97750 to 0.97972, saving model to /model.h5
Epoch 7/50
Epoch 7: val_accuracy improved from 0.97972 to 0.98161, saving model to /model.h5
Epoch 8/50
Epoch 8: val_accuracy did not improve from 0.98161
Epoch 9/50
Epoch 9: val_accuracy improved from 0.98161 to 0.98172, saving model to /model.h5
Epoch 10/50
Epoch 10: val_accuracy improved from 0.98172 to 0.98339, saving model to /model.h5
Epoch 11/50
Epoch 11: val_accuracy improved from 0.98339 to 0.98350, saving model to /model.h5
Epoch 12/50
Epoch 12: val_accuracy did

<keras.callbacks.History at 0x7f6e43d397b0>

In [9]:
model.evaluate(test_X, test_y)



[0.07078444212675095, 0.9873999953269958]

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchvision.utils import make_grid
import matplotlib.pyplot as plt

# Load the MNIST dataset
train_dataset = MNIST(root='data/', train=True, transform=ToTensor(), download=True)
test_dataset = MNIST(root='data/', train=False, transform=ToTensor())

# Data preprocessing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=8, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        self.conv4 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=2)
        
        self.flatten = nn.Flatten()
        
        self.fc1 = nn.Linear(in_features=32*1*1, out_features=72)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(in_features=72, out_features=10)
    
    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.pool3(torch.relu(self.conv3(x)))
        x = self.pool4(torch.relu(self.conv4(x)))
        
        x = self.flatten(x)
        
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.softmax(self.fc2(x), dim=1)
        
        return x

# Create the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
print(model)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Evaluation
model.eval()
total_correct = 0
total_samples = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs, dim=1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)


CNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=32, out_features=72, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc2): Linear(in_features=72, out_features=10, bias=True)
)
Epoch [1/50], Loss: 1.6899
Epoch [2/50], Loss: 1.5365
Epoch [3/50], Loss: 1.5151
Epoch [4/50], Loss: 1.5064
Epoch [5/50], Loss: 1.5005
Ep

In [1]:

model.eval()

NameError: ignored