# A Simple Pytorch Neural Network 

In [9]:
# Imports
import numpy as np
import tqdm
import torch 
import torch.nn as nn # all neural network modules nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim # for optimization algo. Adam, SGD
import torch.nn.functional as F # all the acitvation function that don't have the parameters like tanh, relu etc.
from torch.utils.data import DataLoader # Easier dataset management & creates mini batches
import torchvision.datasets as datasets # inbuilt datasets
import torchvision.transforms as transforms # transformations we can perform in our dataset

# Setting device to CUDA (optional)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create a fully connected network
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

# just for the testing purpose
# model = NN(784, 10) # we are putting 10 classes and 784 input_size ... ie. after flatterning the data the size in 784
# x = torch.randn(500, 784) # this is the input data... 500 examples and 784 long array for each example
# print(model(x).shape)

# Hyperparameters setting
input_size = 784
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 10


# Load the Data Train & Test
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


# Initialize the Network
model = NN(input_size=input_size, num_classes=num_classes).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the Network
for epoch in tqdm.tqdm(range(num_epochs)):
    for batch_idx, (data, targets) in enumerate(train_loader):
        
        # Convert the data to the CUDA for faster training
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # Flattening the data
        data = data.reshape(data.shape[0], -1)
        
        # Forward
        scores = model(data)
        loss = criterion(scores, targets)
        
        # Backward
        optimizer.zero_grad() # so that they don't store the previous gradrients
        loss.backward() # gradients computed here
        
        # gradient steps 
        optimizer.step() # here we update the weights based on the gradients computed on top loss.backward()
        
        
# Check Accuracy on training & test set 

def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on Train Data!!... ")
    else:
        print(" Checking accuracy on Test Data!!  ")
    
    num_correct = 0
    num_samples = 0
    model.eval() # let the model know that it is in evaluation mode
    
    # to let the model know that you dont have to compute the gradients while doing the tesing/accuracy checking
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device)
            y = y.to(device)
            
            x = x.reshape(x.shape[0], -1) # Flattening
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
        print('{}/{} with accuracy {}'.format(num_correct, num_samples, np.round(num_correct/num_samples, 2)))
        
    model.train()    
    
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)
            


  0%|                                                                                           | 0/10 [00:00<?, ?it/s]
0it [00:00, ?it/s][A
14it [00:00, 137.30it/s][A
26it [00:00, 129.90it/s][A
38it [00:00, 124.40it/s][A
52it [00:00, 128.35it/s][A
66it [00:00, 130.17it/s][A
77it [00:00, 117.08it/s][A
88it [00:00, 113.09it/s][A
102it [00:00, 119.40it/s][A
114it [00:00, 115.10it/s][A
126it [00:01, 109.51it/s][A
140it [00:01, 115.43it/s][A
153it [00:01, 118.46it/s][A
165it [00:01, 116.84it/s][A
177it [00:01, 113.11it/s][A
189it [00:01, 112.82it/s][A
204it [00:01, 121.88it/s][A
217it [00:01, 122.80it/s][A
230it [00:01, 119.04it/s][A
243it [00:02, 113.79it/s][A
255it [00:02, 108.68it/s][A
270it [00:02, 116.26it/s][A
282it [00:02, 111.47it/s][A
296it [00:02, 117.54it/s][A
311it [00:02, 124.76it/s][A
326it [00:02, 131.04it/s][A
340it [00:02, 118.96it/s][A
353it [00:02, 111.98it/s][A
366it [00:03, 116.52it/s][A
381it [00:03, 123.51it/s][A
394it [00:03, 120.84it/s

233it [00:02, 120.96it/s][A
246it [00:02, 120.78it/s][A
259it [00:02, 115.21it/s][A
272it [00:02, 118.63it/s][A
284it [00:02, 118.33it/s][A
298it [00:02, 122.79it/s][A
311it [00:02, 122.05it/s][A
324it [00:02, 118.55it/s][A
338it [00:02, 122.31it/s][A
351it [00:02, 121.04it/s][A
365it [00:03, 124.15it/s][A
378it [00:03, 125.12it/s][A
392it [00:03, 127.13it/s][A
406it [00:03, 128.58it/s][A
419it [00:03, 119.73it/s][A
432it [00:03, 109.61it/s][A
444it [00:03, 110.36it/s][A
456it [00:03, 109.08it/s][A
470it [00:04, 115.09it/s][A
483it [00:04, 116.31it/s][A
497it [00:04, 122.21it/s][A
510it [00:04, 121.31it/s][A
523it [00:04, 113.42it/s][A
535it [00:04, 108.44it/s][A
547it [00:04, 111.05it/s][A
559it [00:04, 111.07it/s][A
571it [00:04, 103.33it/s][A
582it [00:05, 103.47it/s][A
593it [00:05, 104.44it/s][A
604it [00:05, 104.84it/s][A
618it [00:05, 112.02it/s][A
632it [00:05, 118.55it/s][A
645it [00:05, 120.76it/s][A
658it [00:05, 115.50it/s][A
670it [00:05, 

Checking accuracy on Train Data!!... 
45434/60000 with accuracy 0.7599999904632568
 Checking accuracy on Test Data!!  
7581/10000 with accuracy 0.7599999904632568
