<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Installing-and-verifying-PyTorch" data-toc-modified-id="Installing-and-verifying-PyTorch-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Installing and verifying PyTorch</a></span><ul class="toc-item"><li><span><a href="#Install-PyTorch" data-toc-modified-id="Install-PyTorch-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Install PyTorch</a></span></li><li><span><a href="#Import-PyTorch" data-toc-modified-id="Import-PyTorch-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Import PyTorch</a></span></li><li><span><a href="#Verify-PyTorch-install" data-toc-modified-id="Verify-PyTorch-install-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Verify PyTorch install</a></span></li></ul></li><li><span><a href="#Tensors" data-toc-modified-id="Tensors-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Tensors</a></span><ul class="toc-item"><li><span><a href="#Creating-a-Tensor" data-toc-modified-id="Creating-a-Tensor-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Creating a Tensor</a></span></li><li><span><a href="#Tensors-on-GPGPU" data-toc-modified-id="Tensors-on-GPGPU-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Tensors on GPGPU</a></span></li><li><span><a href="#Operations-with-Tensors" data-toc-modified-id="Operations-with-Tensors-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Operations with Tensors</a></span></li><li><span><a href="#Parts-of-a-Tensor" data-toc-modified-id="Parts-of-a-Tensor-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Parts of a Tensor</a></span></li></ul></li><li><span><a href="#Automatic-Differentiation" data-toc-modified-id="Automatic-Differentiation-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Automatic Differentiation</a></span><ul class="toc-item"><li><span><a href="#y-as-vector-value" data-toc-modified-id="y-as-vector-value-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>y as vector value</a></span></li></ul></li><li><span><a href="#Machine-Learning---LR-for-Image-Classification" data-toc-modified-id="Machine-Learning---LR-for-Image-Classification-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Machine Learning - LR for Image Classification</a></span></li><li><span><a href="#Deep-Learning---MLP-for-Image-Classification" data-toc-modified-id="Deep-Learning---MLP-for-Image-Classification-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Deep Learning - MLP for Image Classification</a></span></li></ul></div>

## Installing and verifying PyTorch

In [1]:
import sys
print('Python version:{}'.format(sys.version[:23]))

Python version:3.6.6 |Anaconda, Inc.| 


### Install PyTorch

In [2]:
# Modify the path to check CUDA version on your system
with open('/usr/local/cuda/version.txt') as f:
    print(f.read()) 

CUDA Version 9.2.148



In [3]:
!conda install --yes --prefix {sys.prefix} pytorch torchvision cuda92 -c pytorch
# or !{sys.executable} -m pip install pytorch torchvision cuda92

Solving environment: done

# All requested packages already installed.



### Import PyTorch

In [32]:
import torch
print('Torch version:{}'.format(torch.__version__))
torch.manual_seed(123)

Torch version:0.4.1


<torch._C.Generator at 0x7f8a6b69a930>

### Verify PyTorch install

In [2]:
cpu_tensor = torch.rand(3,5)
print('Tensor on CPU:\n',cpu_tensor)
if torch.cuda.is_available():
    gpu_tensor = cpu_tensor.cuda()
    print('Tensor on GPGPU:\n',gpu_tensor)
else:
    print('Torch on CUDA not available')

Tensor on CPU:
 tensor([[0.4962, 0.1538, 0.0902, 0.7859, 0.6387],
        [0.0409, 0.6671, 0.2795, 0.8469, 0.9989],
        [0.5040, 0.0473, 0.4016, 0.3811, 0.9353]])
Tensor on GPGPU:
 tensor([[0.4962, 0.1538, 0.0902, 0.7859, 0.6387],
        [0.0409, 0.6671, 0.2795, 0.8469, 0.9989],
        [0.5040, 0.0473, 0.4016, 0.3811, 0.9353]], device='cuda:0')


## Tensors

### Creating a Tensor

In [6]:
python_list = [1, 2, 3, 4]
print(python_list)
tensor_from_list = torch.tensor(python_list)
print(tensor_from_list)

[1, 2, 3, 4]
tensor([1, 2, 3, 4])


In [7]:
import numpy as np
numpy_array = np.array([5,6,7,8])
print(numpy_array)
tensor_from_numpy_array = torch.from_numpy(numpy_array)
print(tensor_from_numpy_array)

[5 6 7 8]
tensor([5, 6, 7, 8])


In [8]:
numpy_array_from_tensor = tensor_from_numpy_array.numpy()
print(type(numpy_array_from_tensor),numpy_array_from_tensor)

<class 'numpy.ndarray'> [5 6 7 8]


In [9]:
all_ones = torch.ones(3,4)
print(all_ones)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


In [10]:
all_zeros = torch.zeros(3,4)
print(all_zeros)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [11]:
all_random = torch.rand(3,4)
print(all_random)

tensor([[0.8669, 0.5278, 0.7756, 0.0596],
        [0.2067, 0.7052, 0.0145, 0.0369],
        [0.7459, 0.5661, 0.1640, 0.1375]])


In [12]:
all_float = torch.FloatTensor(3,4)
print(all_float)

tensor([[-1.3369e-16,  3.0816e-41,  1.6816e-44,  0.0000e+00],
        [        nan,  0.0000e+00,  6.4076e+07,  2.0706e-19],
        [ 7.3909e+22,  3.0492e-41,  2.6279e+30,  4.5656e-41]])


In [13]:
# slice and dice examples
print(all_random[1,:])

tensor([0.2067, 0.7052, 0.0145, 0.0369])


In [14]:
print(all_random[:,2])

tensor([0.7756, 0.0145, 0.1640])


In [15]:
print(all_random[0:2,0:2])

tensor([[0.8669, 0.5278],
        [0.2067, 0.7052]])


### Tensors on GPGPU

In [16]:
gpgpu_tensor = all_random.cuda()
print(gpgpu_tensor)

tensor([[0.8669, 0.5278, 0.7756, 0.0596],
        [0.2067, 0.7052, 0.0145, 0.0369],
        [0.7459, 0.5661, 0.1640, 0.1375]], device='cuda:0')


In [17]:
cpu_tensor = gpgpu_tensor.cpu()
print(cpu_tensor)

tensor([[0.8669, 0.5278, 0.7756, 0.0596],
        [0.2067, 0.7052, 0.0145, 0.0369],
        [0.7459, 0.5661, 0.1640, 0.1375]])


In [18]:
print(gpgpu_tensor+cpu_tensor)

RuntimeError: Expected object of type torch.cuda.FloatTensor but found type torch.FloatTensor for argument #3 'other'

In [19]:
cpu_tensor = cpu_tensor * 2
gpgpu_tensor = cpu_tensor.to('cuda:0')
print(gpgpu_tensor)

tensor([[1.7338, 1.0556, 1.5512, 0.1193],
        [0.4135, 1.4105, 0.0290, 0.0738],
        [1.4919, 1.1323, 0.3281, 0.2749]], device='cuda:0')


In [20]:
print(cpu_tensor.device)
print(gpu_tensor.device)

cpu
cuda:0


In [21]:
gpgpu_tensor = torch.rand((3,4),device=torch.device('cuda'))
print(gpgpu_tensor)

tensor([[0.4679, 0.1410, 0.1218, 0.8473],
        [0.0770, 0.1401, 0.6140, 0.4993],
        [0.8017, 0.0937, 0.8986, 0.4896]], device='cuda:0')


### Operations with Tensors

In [22]:
x = torch.rand(3,4)
y = torch.rand(3,4)
print(x,y)

tensor([[0.2508, 0.9035, 0.6209, 0.9725],
        [0.7348, 0.2872, 0.7109, 0.9939],
        [0.6496, 0.5582, 0.5093, 0.4237]]) tensor([[0.9911, 0.6459, 0.1771, 0.6210],
        [0.3322, 0.3387, 0.8688, 0.8867],
        [0.3151, 0.1992, 0.1532, 0.9190]])


In [23]:
print(torch.mul(x,y))

tensor([[0.2485, 0.5836, 0.1100, 0.6040],
        [0.2441, 0.0973, 0.6176, 0.8813],
        [0.2047, 0.1112, 0.0780, 0.3894]])


In [24]:
print(x*y)

tensor([[0.2485, 0.5836, 0.1100, 0.6040],
        [0.2441, 0.0973, 0.6176, 0.8813],
        [0.2047, 0.1112, 0.0780, 0.3894]])


In [25]:
print(x)
x.mul_(y)
print(x)

tensor([[0.2508, 0.9035, 0.6209, 0.9725],
        [0.7348, 0.2872, 0.7109, 0.9939],
        [0.6496, 0.5582, 0.5093, 0.4237]])
tensor([[0.2485, 0.5836, 0.1100, 0.6040],
        [0.2441, 0.0973, 0.6176, 0.8813],
        [0.2047, 0.1112, 0.0780, 0.3894]])


### Parts of a Tensor

In [26]:
print(x.data)

tensor([[0.2485, 0.5836, 0.1100, 0.6040],
        [0.2441, 0.0973, 0.6176, 0.8813],
        [0.2047, 0.1112, 0.0780, 0.3894]])


In [27]:
print(x.grad)

None


In [28]:
print(x.grad_fn)

None


In [29]:
x.requires_grad_(True)

tensor([[0.2485, 0.5836, 0.1100, 0.6040],
        [0.2441, 0.0973, 0.6176, 0.8813],
        [0.2047, 0.1112, 0.0780, 0.3894]], requires_grad=True)

In [30]:
z = x+y
print(z.requires_grad)

True


In [31]:
print(z.grad_fn)

<ThAddBackward object at 0x7f450c075518>


## Automatic Differentiation

In [21]:
x = torch.rand(1)
w = torch.rand(1)
b = torch.rand(1)

print('x:',x)
print('w:',w)
print('b:',b)

x: tensor([0.3227])
w: tensor([0.8098])
b: tensor([0.7103])


In [22]:
y = w * x + b
print('y:',y)

y: tensor([0.9716])


In [17]:
print('x:',x.grad, x.grad_fn, x.requires_grad)
print('w:',w.grad, w.grad_fn, w.requires_grad)
print(b.grad, b.grad_fn, b.requires_grad)
print(y.grad, y.grad_fn, y.requires_grad)

None None False
None None False
None None False
None None False


In [23]:
y.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [33]:
x = torch.rand(1)
w = torch.rand(1,requires_grad=True)
b = torch.rand(1,requires_grad=True)

print('x:',x)
print('w:',w)
print('b:',b)

x: tensor([0.2961])
w: tensor([0.5166], requires_grad=True)
b: tensor([0.2517], requires_grad=True)


In [34]:
y = w * x + b
print(y)

tensor([0.4046], grad_fn=<ThAddBackward>)


In [35]:
print('x:',x.grad, x.grad_fn, x.requires_grad)
print('w:',w.grad, w.grad_fn, w.requires_grad)
print('b:',b.grad, b.grad_fn, b.requires_grad)
print('y:',y.grad, y.grad_fn, y.requires_grad)

None None False
None None True
None None True
None <ThAddBackward object at 0x7f8a286284a8> True


In [36]:
y.backward()
print('x:',x.grad, x.grad_fn, x.requires_grad)
print('w:',w.grad, w.grad_fn, w.requires_grad)
print('b:',b.grad, b.grad_fn, b.requires_grad)
print('y:',y.grad, y.grad_fn, y.requires_grad)

None None False
tensor([0.2961]) None True
tensor([1.]) None True
None <ThAddBackward object at 0x7f8a286294e0> True


In [37]:
y.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [43]:
x = torch.rand(1)
w = torch.rand(1,requires_grad=True)
b = torch.rand(1,requires_grad=True)
y = w * x + b
z = y * y

In [44]:
print('x:',x.grad, x.grad_fn, x.requires_grad)
print('w:',w.grad, w.grad_fn, w.requires_grad)
print('b:',b.grad, b.grad_fn, b.requires_grad)
print('y:',y.grad, y.grad_fn, y.requires_grad)
print('z:',z.grad, z.grad_fn, z.requires_grad)

None None False
None None True
None None True
None <ThAddBackward object at 0x7f8a28628ac8> True
None <ThMulBackward object at 0x7f8a28628b38> True


In [45]:
z.backward()
print('x:',x.grad, x.grad_fn, x.requires_grad)
print('w:',w.grad, w.grad_fn, w.requires_grad)
print('b:',b.grad, b.grad_fn, b.requires_grad)
print('y:',y.grad, y.grad_fn, y.requires_grad)
print('z:',z.grad, z.grad_fn, z.requires_grad)

None None False
tensor([0.0541]) None True
tensor([0.3961]) None True
None <ThAddBackward object at 0x7f8a286289e8> True
None <ThMulBackward object at 0x7f8a28628ac8> True


In [48]:
x = torch.rand(1)
w = torch.rand(1,requires_grad=True)
b = torch.rand(1,requires_grad=True)
y = w * x + b
y.retain_grad()
z = y * y
z.backward()
print('x:',x.grad, x.grad_fn, x.requires_grad)
print('w:',w.grad, w.grad_fn, w.requires_grad)
print('b:',b.grad, b.grad_fn, b.requires_grad)
print('y:',y.grad, y.grad_fn, y.requires_grad)
print('z:',z.grad, z.grad_fn, z.requires_grad)

x: None None False
w: tensor([0.7031]) None True
b: tensor([1.7501]) None True
y: tensor([1.7501]) <ThAddBackward object at 0x7f8a2862a5c0> True
z: None <ThMulBackward object at 0x7f8a2862a6d8> True


### y as vector value

In [52]:
x = torch.rand(3)
w = torch.rand(3,requires_grad=True)
b = torch.rand(3,requires_grad=True)
y = w * x + b
print('x:',x.data,x.grad, x.grad_fn, x.requires_grad)
print('w:',w.data,w.grad, w.grad_fn, w.requires_grad)
print('b:',b.data,b.grad, b.grad_fn, b.requires_grad)
print('y:',y.data,y.grad, y.grad_fn, y.requires_grad)

x: tensor([0.8573, 0.8993, 0.0390]) None None False
w: tensor([0.9268, 0.7388, 0.7179]) None None True
b: tensor([0.7058, 0.9156, 0.4340]) None None True
y: tensor([1.5004, 1.5800, 0.4620]) None <ThAddBackward object at 0x7f8a2862a5c0> True


In [53]:
y.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [54]:
y.backward(torch.ones_like(y))
print('x:',x.data,x.grad, x.grad_fn, x.requires_grad)
print('w:',w.data,w.grad, w.grad_fn, w.requires_grad)
print('b:',b.data,b.grad, b.grad_fn, b.requires_grad)
print('y:',y.data,y.grad, y.grad_fn, y.requires_grad)

x: tensor([0.8573, 0.8993, 0.0390]) None None False
w: tensor([0.9268, 0.7388, 0.7179]) tensor([0.8573, 0.8993, 0.0390]) None True
b: tensor([0.7058, 0.9156, 0.4340]) tensor([1., 1., 1.]) None True
y: tensor([1.5004, 1.5800, 0.4620]) None <ThAddBackward object at 0x7f8a2862c208> True


## Machine Learning - LR for Image Classification

In [56]:
# load dslib
DSLIB_HOME = '../datasetslib'
import sys
if not DSLIB_HOME in sys.path:
    sys.path.append(DSLIB_HOME)
%reload_ext autoreload
%autoreload 2
import datasetslib as dslib

# set the datasets root folder before you do the datasetslib import
import os
dslib.dsroot = os.path.join(os.path.expanduser('~'),'datasets')

from datasetslib.utils import imutil
from datasetslib.utils import nputil
from datasetslib.mnist import MNIST

In [57]:
# create an object of class datasetslib.mnist.MNIST
mnist=MNIST()

# load images in x and labels in y for train and test sets
mnist.y_onehot = True
mnist.x_layout = imutil.LAYOUT_NP

x_train,y_train,x_test,y_test=mnist.load_data()

x_train = mnist.load_images(x_train)
y_train = nputil.onehot(y_train)
x_test = mnist.load_images(x_test)
y_test = nputil.onehot(y_test)

print('Loaded x and y')
print('Train: x:{}, y:{}'.format(x_train.shape,y_train.shape))
print('Test: x:{}, y:{}'.format(x_test.shape,y_test.shape))

Already exists: /home/armando/datasets/mnist/train-images-idx3-ubyte.gz
Already exists: /home/armando/datasets/mnist/train-labels-idx1-ubyte.gz
Already exists: /home/armando/datasets/mnist/t10k-images-idx3-ubyte.gz
Already exists: /home/armando/datasets/mnist/t10k-labels-idx1-ubyte.gz
Extracting and rearchiving as jpg files...
/home/armando/datasets/mnist/train-images-idx3-ubyte.gz
Reading from  /home/armando/datasets/mnist/train-images-idx3-ubyte.gz
Reading from  /home/armando/datasets/mnist/train-labels-idx1-ubyte.gz
Saving  train
Zip file not modified
/home/armando/datasets/mnist/train-images-idx3-ubyte.gz
Reading from  /home/armando/datasets/mnist/t10k-images-idx3-ubyte.gz
Reading from  /home/armando/datasets/mnist/t10k-labels-idx1-ubyte.gz
Saving  test
Zip file not modified
Loading in x and y... start
Loading in x and y... done
Loaded x and y
Train: x:(60000, 784), y:(60000, 10)
Test: x:(10000, 784), y:(10000, 10)


In [58]:
# define hyperparameters
learning_rate = 0.0000001
n_epochs = 50
epsilon = 1e-6

# define input images
x = torch.tensor(x_train,dtype=torch.float32,requires_grad=False)
# define output labels
y = torch.tensor(y_train,dtype=torch.float32,requires_grad=False)

# model parameters
w = torch.tensor(torch.zeros([mnist.n_features, mnist.n_classes]), 
                 requires_grad=True)
b = torch.tensor(torch.zeros([mnist.n_classes]),
                 requires_grad=True)



In [62]:
# train and test

for epoch in range(n_epochs):
    # Forward propagation
    logits = torch.add(torch.matmul(x, w), b)
    y_hat = torch.nn.functional.softmax(logits, dim=1)
    
    # Calculate loss
    y_hat_clipped = torch.clamp(y_hat, epsilon, 1 - epsilon)
    y_hat_log = torch.log(y_hat_clipped)
    cross_entropy = -torch.sum(y * y_hat_log)
    loss = torch.mean(cross_entropy)
    
    # set the gradients to zero
    for param in [w,b]:
        if not param.grad is None: param.grad.data.zero_()
    
    # Backward propagation
    loss.backward()
    
    # update the gradients
    w.data -= learning_rate * w.grad.data
    b.data -= learning_rate * b.grad.data
    
    # Predict and print train set accuracy
    _,y_idx = torch.max(y,1)
    _,y_hat_idx = torch.max(y_hat,1)
    
    predictions_check = torch.tensor(torch.eq(y_hat_idx, y_idx),
                                     dtype=torch.float32)
    accuracy_score = torch.mean(predictions_check)
    print('epoch {0:04d}  accuracy={1:.8f}'
          .format(epoch, accuracy_score))

epoch 0000  accuracy=0.09871667
epoch 0001  accuracy=0.66996664
epoch 0002  accuracy=0.72711664
epoch 0003  accuracy=0.74848336
epoch 0004  accuracy=0.76136667
epoch 0005  accuracy=0.76831669
epoch 0006  accuracy=0.77590001
epoch 0007  accuracy=0.78108335
epoch 0008  accuracy=0.78500003
epoch 0009  accuracy=0.78796667
epoch 0010  accuracy=0.78986669
epoch 0011  accuracy=0.79083335
epoch 0012  accuracy=0.79110003
epoch 0013  accuracy=0.79430002
epoch 0014  accuracy=0.79560000
epoch 0015  accuracy=0.79746670
epoch 0016  accuracy=0.79883331
epoch 0017  accuracy=0.80084997
epoch 0018  accuracy=0.80145001
epoch 0019  accuracy=0.80255002
epoch 0020  accuracy=0.80281669
epoch 0021  accuracy=0.80328333
epoch 0022  accuracy=0.80133331
epoch 0023  accuracy=0.80051666
epoch 0024  accuracy=0.79311669
epoch 0025  accuracy=0.78544998
epoch 0026  accuracy=0.75555003
epoch 0027  accuracy=0.77738333
epoch 0028  accuracy=0.77209997
epoch 0029  accuracy=0.79746670
epoch 0030  accuracy=0.80400002
epoch 00

## Deep Learning - MLP for Image Classification

In [127]:
learning_rate = 0.001
n_epochs = 50
model = torch.nn.Sequential(torch.nn.Linear(784,512),
                          torch.nn.ReLU(),
                          torch.nn.Linear(512,256),
                          torch.nn.ReLU(),
                          torch.nn.Linear(256,10))
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

for epoch in range(n_epochs):
    # Forward prop
    y_hat = model(x)
    
    # calculate loss
    _,y_idx = torch.max(y,1)
    loss = loss_f(y_hat,y_idx)
    
    # Backward prop
    optimizer.zero_grad()
    loss.backward()
    
    # Apply gradients
    optimizer.step()
    
    # Check accurcay
    _,y_hat_idx = torch.max(y_hat,1)
    predictions_check = torch.tensor(torch.eq(y_hat_idx, y_idx),
                                     dtype=torch.float32)
    accuracy_score = torch.mean(predictions_check)
    print('epoch {0:04d}  accuracy={1:.8f}'
          .format(epoch, accuracy_score))

epoch 0000  accuracy=0.08321667
epoch 0001  accuracy=0.31131667
epoch 0002  accuracy=0.21960001
epoch 0003  accuracy=0.34123334
epoch 0004  accuracy=0.37403333
epoch 0005  accuracy=0.49971667
epoch 0006  accuracy=0.52168334
epoch 0007  accuracy=0.52668333
epoch 0008  accuracy=0.52846664
epoch 0009  accuracy=0.58356667
epoch 0010  accuracy=0.69476664
epoch 0011  accuracy=0.74675000
epoch 0012  accuracy=0.72381669
epoch 0013  accuracy=0.69466668
epoch 0014  accuracy=0.70195001
epoch 0015  accuracy=0.73746669
epoch 0016  accuracy=0.76611668
epoch 0017  accuracy=0.78046668
epoch 0018  accuracy=0.79783332
epoch 0019  accuracy=0.81875002
epoch 0020  accuracy=0.83628333
epoch 0021  accuracy=0.84516668
epoch 0022  accuracy=0.84745002
epoch 0023  accuracy=0.84791666
epoch 0024  accuracy=0.84890002
epoch 0025  accuracy=0.85294998
epoch 0026  accuracy=0.85921669
epoch 0027  accuracy=0.86693335
epoch 0028  accuracy=0.87440002
epoch 0029  accuracy=0.88108331
epoch 0030  accuracy=0.88630003
epoch 00

In [64]:
class MLPModel(torch.nn.Module):
    def __init__(self, n_features, n_labels):
        super().__init__()
        self.linear_in = torch.nn.Linear(n_features,512)
        self.relu_in_1 = torch.nn.ReLU()
        self.linear_h1 = torch.nn.Linear(512,256)
        self.relu_1_out = torch.nn.ReLU()
        self.linear_out = torch.nn.Linear(256,n_labels)
    
    def forward(self, x):
        y_pred = self.linear_out(
                    self.relu_1_out(
                        self.linear_h1(
                            self.relu_in_1(
                                self.linear_in(x)))))
        return y_pred
    
learning_rate = 0.001
n_epochs = 50
model = MLPModel(784,10)

loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

for epoch in range(n_epochs):
    # Forward prop
    y_hat = model(x)
    
    # calculate loss
    _,y_idx = torch.max(y,1)
    loss = loss_f(y_hat,y_idx)
    
    # Backward prop
    optimizer.zero_grad()
    loss.backward()
    
    # Apply gradients
    optimizer.step()
    
    # Check accurcay
    _,y_hat_idx = torch.max(y_hat,1)
    predictions_check = torch.tensor(torch.eq(y_hat_idx, y_idx),
                                     dtype=torch.float32)
    accuracy_score = torch.mean(predictions_check)
    print('epoch {0:04d}  accuracy={1:.8f}'
          .format(epoch, accuracy_score))

epoch 0000  accuracy=0.11005000
epoch 0001  accuracy=0.25784999
epoch 0002  accuracy=0.34143335
epoch 0003  accuracy=0.45956665
epoch 0004  accuracy=0.57298332
epoch 0005  accuracy=0.59201664
epoch 0006  accuracy=0.62438333
epoch 0007  accuracy=0.63319999
epoch 0008  accuracy=0.66495001
epoch 0009  accuracy=0.72753334
epoch 0010  accuracy=0.77231669
epoch 0011  accuracy=0.70683336
epoch 0012  accuracy=0.68645000
epoch 0013  accuracy=0.72438335
epoch 0014  accuracy=0.77261668
epoch 0015  accuracy=0.80868334
epoch 0016  accuracy=0.83238333
epoch 0017  accuracy=0.84060001
epoch 0018  accuracy=0.83436668
epoch 0019  accuracy=0.82796669
epoch 0020  accuracy=0.83329999
epoch 0021  accuracy=0.84573334
epoch 0022  accuracy=0.86026669
epoch 0023  accuracy=0.87271667
epoch 0024  accuracy=0.88348335
epoch 0025  accuracy=0.88905001
epoch 0026  accuracy=0.89166665
epoch 0027  accuracy=0.89295000
epoch 0028  accuracy=0.89331669
epoch 0029  accuracy=0.89406669
epoch 0030  accuracy=0.89636666
epoch 00