In [1]:
# Imports
import torch
import torch.nn as nn # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss Function
import torch.optim as optim # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F # All functions that don't have any parameters
from torch.utils.data import DataLoader # Gives easier dataset management and creates mini batches
import torchvision.datasets as datasets # Has standard datasets we can import in a nice and easy way
import torchvision.transforms as transforms # Transformations we can perform on our dataset

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cpu'

In [4]:
# CNN
class CNN(nn.Module):
	def __init__(self, in_channels= 1,num_classes = 10):
		super(CNN,self).__init__()
		# 1st conv layer
		self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = 8, kernel_size = (3,3), stride = (1,1), padding = (1,1))
		# Maxpool
		self.pool = nn.MaxPool2d(kernel_size = (2,2), stride = (2,2))
		# 2nd conv layer
		self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3,3), stride = (1,1), padding = (1,1))
		# Output Layer
		self.fc1 = nn.Linear(16*7*7,num_classes)

	def forward(self,x):

		x = F.relu(self.conv1(x))
		x = self.pool(x)
		x = F.relu(self.conv2(x))
		x = self.pool(x)
		x = x.reshape(x.shape[0],-1)
		x = self.fc1(x)

		return x
	

In [5]:
model = CNN()
x = torch.randn(64,1,28,28)
print(x.shape)
print(model(x).shape)

torch.Size([64, 1, 28, 28])
torch.Size([64, 10])


In [6]:
# Hyperparameters

in_channels = 1
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 15

In [7]:
# Load Data
train_dataset = datasets.MNIST(root = 'dataset/', train = True, transform = transforms.ToTensor(), download = True)
# Shuffle the data and create mini batches of size 64 using DataLoader
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_dataset = datasets.MNIST(root = 'dataset/', train = False, transform = transforms.ToTensor(), download = True)
# Shuffle the data and create mini batches of size 64 using DataLoaderes
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

In [8]:
# Initialize the network
model = CNN().to(device)


In [9]:
 = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [11]:
# Train Network
from tqdm import tqdm

for epoch in range(num_epochs):
    loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=False)
    for batch_idx, (data, targets) in loop:
        data = data.to(device)
        targets = targets.to(device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

        # Update tqdm loop with loss
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item())


                                                                               

In [13]:
# Check accuracy on training & test to see how good our model

def check_accuracy(loader, model):
	# Checking if we are checking accuracy on training or test dataset
	if loader.dataset.train:
		print("Checking accuracy on training data")
	else:
		print("Checking accuracy on test data")

	# Initialize the number of correct predictions and total number of predictions
	num_correct = 0
	num_samples = 0
	# Set the model to evaluation mode
	model.eval()

	# We don't need to calculate gradients when checking accuracy
	with torch.no_grad():
		# Loop over the data in loader
		for x,y in loader:
			# Move the data to device
			x = x.to(device = device)
			y = y.to(device = device)
			# Get the scores from the model
			scores = model(x)

			# Get the maximum value and index of the prediction
			_, predictions = scores.max(1)

			# Update the number of correct predictions and total number of predictions
			num_correct += (predictions == y).sum()
			num_samples += predictions.size(0)

		# Print the accuracy
		print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
	
	# Set the model back to training mode
	model.train()

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 59645/60000 with accuracy 99.41
Checking accuracy on test data
Got 9866/10000 with accuracy 98.66
