In [2]:
import torch #main torch module
import torch.nn as nn #neural net module
import torch.optim as optim #optimizers
import torch.nn.functional as F #functions like ReLu Sig Tanh etc
from torch.utils.data import DataLoader #help us with datasets

import torchvision.datasets as datasets #using to access std data
import torchvision.transforms as transforms #transformations

Lets start by creating our network

In [3]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes): #(28x28)
        #inheritance from nn.Module
        super(NN,self).__init__()
        self.fc1 = nn.Linear(input_size,50)
        self.fc2 = nn.Linear(50,num_classes)
        
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x        

In [4]:
model = NN(784,10)
x = torch.randn(64,784)
#print(x)
model(x).shape

torch.Size([64, 10])

In [5]:
# Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# Hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 1

In [7]:
# Loading Data
train_dataset = datasets.MNIST(root='dataset/',
                               train=True, 
                               transform = transforms.ToTensor(),
                               download = True)

train_loader = DataLoader(dataset = train_dataset,
                         batch_size = batch_size,
                         shuffle = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [8]:
# Loading Test Data
test_dataset = datasets.MNIST(root='dataset/',
                               train=False, 
                               transform = transforms.ToTensor(),
                               download = True)

test_loader = DataLoader(dataset = test_dataset,
                         batch_size = batch_size,
                         shuffle = True)

In [9]:
#init the network
model = NN(input_size,
          num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),
                      lr = learning_rate)

In [12]:
# training loop
for epoch in range(num_epochs):
    for batch_idx,(data, targets) in enumerate(train_loader):
        # data to devices
        print(data.shape,targets.shape)
        data = data.to(device)
        targets = targets.to(device)
        data = data.reshape(data.shape[0],-1)#flattens
        print(data.shape)
        #fwd
        scores = model(data)
        loss = criterion(scores, targets)
        
        #back
        optimizer.zero_grad() #so that it does not store prev backprop calc
        loss.backward()
        
        #gradient desc
        optimizer.step()

torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 784])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 7

In [10]:
def check_accuracy(loader,model):
    if loader.dataset.train:
        print("Checking Training Data Accuracy")
    else:
        print("Checking Test Data Accuract")
    
    num_correct = 0
    num_samples = 0
    model.eval() #set to evaluation mode
    
    with torch.no_grad():
        #only have to check accuracy, dont compute grads
        for x,y in loader:
            x = x.to(device)
            y = y.to(device)
            x = x.reshape(x.shape[0],-1)
            
            scores = model(x)
            _, predictions = scores.max(1)
            
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
        accuracy = float(num_correct)/float(num_samples)*100
        print(f"Got {num_correct} / {num_samples} with accuracy {accuracy: .2f}")

    model.train()
    return accuracy

In [11]:
check_accuracy(test_loader,model)
check_accuracy(train_loader,model)

Checking Test Data Accuract
Got 9278 / 10000 with accuracy  92.78
Checking Training Data Accuracy
Got 55802 / 60000 with accuracy  93.00


93.00333333333334