### Importing all the libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

### Creating a Neural Network

In [2]:
class NN(nn.Module):
    
    def __init__(self, input_size, num_classes):
        super(NN,self).__init__()
        self.fc1 = nn.Linear(input_size,300)
        self.fc2 = nn.Linear(300, 200)
        self.fc3 = nn.Linear(200, num_classes)
        
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        #x = F.softmax(x)
        return x


### Checking the model on random data

In [3]:
model = NN(784,10)
x = torch.randn(64,784)
print(model(x).shape)

torch.Size([64, 10])


In [4]:
arr = model(x)

In [5]:
arr

tensor([[ 0.1844,  0.1732, -0.0874, -0.0231,  0.0335, -0.2187,  0.1254, -0.0349,
          0.1977, -0.1596],
        [ 0.1523,  0.1441, -0.0775,  0.0701,  0.0566, -0.1772, -0.0663, -0.1093,
          0.2487, -0.1130],
        [ 0.2166,  0.1678, -0.2731, -0.0151, -0.1103, -0.1758, -0.0751, -0.1203,
          0.1286, -0.1955],
        [ 0.0391,  0.2051, -0.0776,  0.0757, -0.0467, -0.1010,  0.0735,  0.0225,
          0.1982, -0.1029],
        [ 0.0672,  0.2038, -0.0117,  0.0520, -0.1068, -0.1884, -0.0485, -0.1894,
          0.0660, -0.1963],
        [ 0.1234,  0.1905, -0.1016, -0.0234,  0.0165, -0.1088,  0.0331, -0.2198,
          0.2032, -0.2575],
        [ 0.1370,  0.2612, -0.1359,  0.0233, -0.0253, -0.1444,  0.1262, -0.1641,
          0.2239, -0.1516],
        [ 0.1197,  0.3092, -0.0347, -0.0604,  0.0486, -0.2086,  0.0153,  0.0493,
          0.2113, -0.0669],
        [ 0.0649,  0.2301, -0.0894, -0.0439, -0.0888, -0.1665, -0.0845,  0.0237,
          0.1161, -0.0932],
        [ 0.1088,  

In [6]:
torch.einsum("ij->i",arr)

tensor([ 0.1906,  0.1285, -0.4522,  0.2857, -0.3522, -0.1443,  0.1503,  0.3827,
        -0.1315, -0.0056,  0.2014, -0.2000,  0.0575, -0.0333, -0.1182,  0.1823,
        -0.0705, -0.3379, -0.3170, -0.0137,  0.0977, -0.1250,  0.0224, -0.3377,
         0.0273, -0.1314,  0.1083,  0.2057,  0.1808,  0.3910, -0.2406, -0.0168,
         0.0529, -0.6016, -0.0909, -0.3917, -0.0129, -0.1036,  0.1044, -0.3564,
        -0.5027, -0.0454, -0.1157, -0.2110,  0.2176, -0.3004, -0.3398,  0.3573,
        -0.1950,  0.0366,  0.3247, -0.2938,  0.1195,  0.1705, -0.2144,  0.3268,
        -0.1229,  0.1556, -0.2560, -0.0204, -0.3021, -0.1118, -0.5401,  0.2873],
       grad_fn=<SumBackward1>)

### Using CUDA if it is available

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Model Parameters

In [8]:
input_size = 784
num_classes = 10
learning_rate = 0.0001
batch_size = 64
num_epochs = 1

### Data Loading

In [9]:
train_dataset = datasets.MNIST(root='data/',train=True, transform=transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset = datasets.MNIST(root='data/',train=False, transform=transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)

### Intialize the Model

In [10]:
model = NN(input_size=input_size,num_classes=num_classes).to(device)

### Loss and Optimizer

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train Network

In [12]:
for epoch in range(num_epochs):
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get the data to CUDA if possible
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # Flattening the image data
        data = data.reshape(data.shape[0],-1)
        
        # forward
        scores = model(data)
        loss = criterion(scores,targets)
        
        #backward
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient Descent
        optimizer.step()

### Checking the accuracy of the Model

In [13]:
def check_accuracy(loader, model):
    
    if loader.dataset.train:
        print("Checking accuracy on training data")
        
    else :
        print("Checking accuracy on test data")
    
    num_correct = 0
    num_samples = 0
    
    #model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            x = x.reshape(x.shape[0],-1)
            
            scores = model(x)
            
            _, predictions = scores.max(1)
            num_correct += (predictions==y).sum()
            num_samples += predictions.size(0)
            
    print(f'Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
        
    #model.train()

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

Checking accuracy on training data
Got 54907/60000 with accuracy 91.51
Checking accuracy on test data
Got 9165/10000 with accuracy 91.65
