In [3]:
import torch
import torch.nn as nn
import torchvision as tv

train_dataset = torchvision.datasets.MNIST(root='./data',train=True, transform=tv.transforms.ToTensor(), download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',train=False, transform=tv.transforms.ToTensor(),download = True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=100, shuffle=False)

In [57]:
class MyMax(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.
    """

    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return
        a Tensor containing the output. ctx is a context object that can be used
        to stash information for backward computation. You can cache arbitrary
        objects for use in the backward pass using the ctx.save_for_backward method.
        """
        # indx cols
        idx = torch.argmax(input, 1)
        ctx.save_for_backward(idx)
        # each row
        each_row = torch.arange(input.size(0)).long()
        
        # result
        result = torch.zeros_like(input)
        result[each_row, idx] = input[each_row, idx]      
        
        return result

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        idx, = ctx.saved_tensors
        each_row = torch.arange(idx.size(0)).long()
    
        # result
        grad_input = torch.zeros_like(grad_output)
        grad_input[each_row, idx] = 1         
    
        #grad_input[idx] = 1
        return grad_input
    

class MyReLU(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.
    """

    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return
        a Tensor containing the output. ctx is a context object that can be used
        to stash information for backward computation. You can cache arbitrary
        objects for use in the backward pass using the ctx.save_for_backward method.
        """
        ctx.save_for_backward(input)
        return input.clamp(min=0)

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

In [58]:
class NeuralNet(nn.Module):
    def __init__(self, input_size,hidden_size,output_size):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        #self.relu = MyReLU.apply #MyMax.apply#nn.ReLU()
        self.relu = MyMax.apply
        
    def forward(self, x):
        output = self.layer1(x)
        output = self.relu(output)
        output = self.layer2(output)
        return output

input_size = 784
hidden_size = 500
output_size = 10
num_epochs = 5

learning_rate = 0.001

model = NeuralNet(input_size,hidden_size, output_size)

lossFunction = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.reshape(-1,28*28)
        out = model(images)
        loss = lossFunction(out,labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/5], Step [100/600], Loss: 2.3079
Epoch [1/5], Step [200/600], Loss: 2.3121
Epoch [1/5], Step [300/600], Loss: 2.2931
Epoch [1/5], Step [400/600], Loss: 2.2854
Epoch [1/5], Step [500/600], Loss: 2.4191
Epoch [1/5], Step [600/600], Loss: 2.3321
Epoch [2/5], Step [100/600], Loss: 2.3530
Epoch [2/5], Step [200/600], Loss: 2.4463
Epoch [2/5], Step [300/600], Loss: 2.3184
Epoch [2/5], Step [400/600], Loss: 2.3454
Epoch [2/5], Step [500/600], Loss: 2.3323
Epoch [2/5], Step [600/600], Loss: 2.4103
Epoch [3/5], Step [100/600], Loss: 2.3575
Epoch [3/5], Step [200/600], Loss: 2.3725
Epoch [3/5], Step [300/600], Loss: 2.3530
Epoch [3/5], Step [400/600], Loss: 2.3912
Epoch [3/5], Step [500/600], Loss: 2.3484
Epoch [3/5], Step [600/600], Loss: 2.4042
Epoch [4/5], Step [100/600], Loss: 2.3175
Epoch [4/5], Step [200/600], Loss: 2.4359
Epoch [4/5], Step [300/600], Loss: 2.3764
Epoch [4/5], Step [400/600], Loss: 2.4108
Epoch [4/5], Step [500/600], Loss: 2.3643
Epoch [4/5], Step [600/600], Loss:

In [59]:
with torch.no_grad():
    correct = 0
    total = 0
    for images,labels in test_loader:
        images = images.reshape(-1,28*28)
        out = model(images)
        _,predicted = torch.max(out.data,1)
        total += labels.size(0)
        correct += (predicted==labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 12.73 %
