In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets as datasets
from torchvision import transforms as transforms
import numpy as np
from tqdm import tqdm

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda'

In [4]:
#hyperparameters

learning_rate = 0.001
batch_size = 64
num_epochs = 2
num_classes = 10
input_size = 784
sequence_length=28

In [5]:
train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=False
)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(
    root="dataset/", train=False, transform=transforms.ToTensor(), download=False
)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [21]:
class ANN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ANN, self).__init__()
        self.linear1 = nn.Linear(input_size, 1024)
        self.linear2 = nn.Linear(1024, 512)
        self.linear3 = nn.Linear(512, num_classes)
    
    def forward(self,x):
        x = F.relu(self.linear1(x))
        #print(x.shape)
        x = F.relu(self.linear2(x))
        #print(x.shape)
        x = self.linear3(x)
        #print(x.shape)
        return x
    
    
    
class RNN(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size = input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size*sequence_length, num_classes)
    
    def forward(self,x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.rnn(x,h0)
        out = out.reshape(out.shape[0], -1)
        out = self.linear(out)
        return out
    
class CNN(nn.Module):
    def __init__(self, channels, num_classes):
        super(CNN, self).__init__()
        self.cnn1 = nn.Conv2d(in_channels=channels, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.cnn2 = nn.Conv2d(in_channels=8, out_channels=16,kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.pool2 = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.fc1 = nn.Linear(16*7*7, num_classes)
    
    def forward(self, x):
        x = F.relu(self.cnn1(x))
        x = self.pool(x)
        x = F.relu(self.cnn2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0],-1)
        x = self.fc1(x)
        return x

In [22]:
#model = ANN(input_size, num_classes).to(device)
#model = RNN(input_size = 28, hidden_size=256, num_layers = 2, num_classes=10).to(device)
model = CNN(channels=1,num_classes=10).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [23]:

def train(network):
    
    training_dict = {'train':train_loader, 'valid':test_loader}
    for epoch in range(num_epochs):

        num_correct = 0
        num_samples = 0

        for phase in ['train','valid']:

            if phase == "train":
                model.train()
            else:
                model.eval()

            loop = tqdm(enumerate(training_dict[phase]),total=len(training_dict[phase]))

            for batch_idx, (data, targets) in loop:
                # Get data to cuda if possible
                if network ==  "ANN" or network == "CNN":
                    data = data.to(device=device)
                elif network  == "RNN":
                    data = data.to(device=device).squeeze(1)

                targets = targets.to(device=device)

            # Get to correct shape
                if network == "ANN":
                    data = data.reshape(data.shape[0], -1)

            # forward
                scores = model(data)
                loss = criterion(scores, targets)
                optimizer.zero_grad()

                # backward
                if phase =="train":
                    loss.backward()
                    # gradient descent or adam step
                    optimizer.step()

                _, predictions = scores.max(1)
                num_correct += (predictions == targets).sum()
                num_samples += predictions.size(0)

            print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}")



#model = ANN(input_size, num_classes).to(device)
#model = RNN(input_size = 28, hidden_size=256, num_layers = 2, num_classes=10).to(device)
model = CNN(channels=1,num_classes=10).to(device)

train("CNN")

100%|██████████| 938/938 [00:05<00:00, 185.27it/s]
 15%|█▍        | 23/157 [00:00<00:00, 227.10it/s]

Got 54363 / 60000 with accuracy 90.61


100%|██████████| 157/157 [00:00<00:00, 237.43it/s]
  2%|▏         | 19/938 [00:00<00:05, 181.53it/s]

Got 64042 / 70000 with accuracy 91.49


100%|██████████| 938/938 [00:04<00:00, 199.97it/s]
 15%|█▌        | 24/157 [00:00<00:00, 235.31it/s]

Got 58240 / 60000 with accuracy 97.07


100%|██████████| 157/157 [00:00<00:00, 241.26it/s]

Got 68000 / 70000 with accuracy 97.14



