# Installation of Pytorch
Pytorch binary can be installed by pip, or complied following instructions in https://github.com/pytorch/pytorch#from-source.

Here we will use pip.

## Define a Set of Computation Nodes

| &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Name  &nbsp;&nbsp;&nbsp;&nbsp;  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;    |   &nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Fomula &nbsp;&nbsp;&nbsp;&nbsp;   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;    |   &nbsp;&nbsp;  Gradients  |
|:-------------:|:------------- |:----- |
| Linear      | $y=x^T\cdot W+b$ | $\frac{\partial \mathcal{L}}{\partial x}=W\cdot\frac{\partial \mathcal{L}}{\partial y}\\\frac{\partial \mathcal{L}}{\partial W}=x^T\cdot\frac{\partial \mathcal{L}}{\partial y}\\\frac{\partial \mathcal{L}}{\partial b}=\frac{\partial \mathcal{L}}{\partial y}$ |
| Sigmoid     | $y=\frac{1}{1+e^{-x}}$  | $\frac{\partial \mathcal{L}}{\partial x}=\frac{\partial \mathcal{L}}{\partial y}(1-y)y$ |
| Softmax     | $y_j=\frac{e^{x_j}}{\sum\limits_i e^{x_i}}$ | $\frac{\partial \mathcal{L}}{\partial x_j}=\frac{\partial \mathcal{L}}{\partial y_j}y_j-y_j\sum\limits_i \frac{\partial \mathcal{L}}{\partial y_i}y_i$ |
| CrossEntropy | $y=-\sum\limits_i p_i \log(x_i)$ | $\frac{\partial \mathcal{L}}{\partial x_i}=-\frac{\partial \mathcal{L}}{\partial y}\frac{p_i}{x_i}$ |
| Mean  | $y=\frac{1}{N}\sum\limits_i x_i$ | $\frac{\partial \mathcal{L}}{\partial x_i}=\frac{1}{N}\frac{\partial \mathcal{L}}{\partial y}$ |

In [10]:
try:
    import torch
except:
    !pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
    import torch

In [11]:
from torchvision import datasets, transforms

# data loader: we split the dataset into training set and test set.
batch_size = 200
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

In [12]:
import torch.nn as nn
import torch.nn.functional as F

# create a neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear = nn.Linear(28 * 28, 10)

    def forward(self, x):
        return F.log_softmax(self.linear(x))


net = Net()
print(net)

Net(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)


In [None]:
import torch.optim as optim

def train(learning_rate=0.5, epochs=10):

    # create a stochastic gradient descent optimizer
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0)
    # create a loss function
    loss_func = nn.NLLLoss()

    # run the main training loop
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
            data = data.view(-1, 28 * 28)
            optimizer.zero_grad()
            net_out = net(data)
            loss = loss_func(net_out, target)
            loss.backward()
            optimizer.step()
        print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.data[0]))

    # run a test loop
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        net_out = net(data)
        # sum up batch loss
        test_loss += loss_func(net_out, target).data[0]
        # get the index of the max log-probability
        pred = net_out.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
train()

  # This is added back by InteractiveShellApp.init_path()


Train Epoch: 0 	Loss: 0.418251
Train Epoch: 1 	Loss: 0.389646
