# Installation of Pytorch
Pytorch binary can be installed by pip, or complied following instructions in https://github.com/pytorch/pytorch#from-source.

Here we will use pip.

In [1]:
try:
    import torch
except:
    !pip install torch torchvision
    import torch

# Introducing Pytorch
Pytoch is composed of three parts: Tensors, Autograd and Module.

For more detailed tutorial, refer to http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html.

## Tensors
They are like numpy arrays, you can do basic operations on them, like:

In [2]:
import numpy as np
import torch

a = torch.Tensor([1,2,3])
b = torch.Tensor([3,4,5])

c = a+b # add.

a = torch.ones(4,4)
b = torch.ones(4)

c = torch.mv(a,b) # matrix multiply vector.

a = torch.randn(4,4)
b = torch.randn(4,3)

c = torch.mm(a,b) # matrix multiply matrix.

# indexing Tensor is the same as indexing numpy array.

c[:,0]

a_np = a.numpy() # convert tensor to numpy array.

d = torch.from_numpy(a_np) # convert numpy array to tensor.

## Autograd
Also, after pytorch 0.4, tensors can be used as a node in computation graph(or called Variable in 0.3 and before). They can be used to calculate gradients, like:

In [3]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
print(out) # the result of above computation.
print(out.grad_fn) # print the former point of out in computation graph.
out.backward() # do backward of this computation graph.
print(x.grad) # the gradient of original X.

print(x.detach()) # detach() gives a copy of tensor called on and this copy doesn't need gradients.
print(out.detach().numpy()) # note that after used in computation graph you have to detach to call numpy().

tensor(27.)
<MeanBackward1 object at 0x111cad2b0>
tensor([[ 4.5000,  4.5000],
        [ 4.5000,  4.5000]])
tensor([[ 1.,  1.],
        [ 1.,  1.]])
27.0


## Module
To define a neural network, we need Module. Module is the base class for all neural network modules.

The following code define a neural network same as the neural network wrote using numpy in BP tutorial

In [4]:
import torch.nn as nn
import torch.nn.functional as F

# create a neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear = nn.Linear(28 * 28, 10)

    def forward(self, x):
        return F.log_softmax(self.linear(x))


net = Net()
print(net)

Net(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)


# Demo: A Digit Classification Problem using Pytorch



In [5]:
from torchvision import datasets, transforms

# data loader: we split the dataset into training set and test set.
batch_size = 200
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

In [6]:
import torch.optim as optim

def train(learning_rate=0.5, epochs=10):

    # create a stochastic gradient descent optimizer
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0)
    # create a loss function
    loss_func = nn.NLLLoss()

    # run the main training loop
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
            data = data.view(-1, 28 * 28)
            optimizer.zero_grad()
            net_out = net(data)
            loss = loss_func(net_out, target)
            loss.backward()
            optimizer.step()
        print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.data[0]))

    # run a test loop
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        net_out = net(data)
        # sum up batch loss
        test_loss += loss_func(net_out, target).data[0]
        # get the index of the max log-probability
        pred = net_out.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [7]:
train()

  # This is added back by InteractiveShellApp.init_path()


Train Epoch: 0 	Loss: 0.557232
Train Epoch: 1 	Loss: 0.517485
Train Epoch: 2 	Loss: 0.279360
Train Epoch: 3 	Loss: 0.644049
Train Epoch: 4 	Loss: 0.521326
Train Epoch: 5 	Loss: 1.066420
Train Epoch: 6 	Loss: 1.029705
Train Epoch: 7 	Loss: 0.623573
Train Epoch: 8 	Loss: 0.221929
Train Epoch: 9 	Loss: 0.986599





Test set: Average loss: 0.0052, Accuracy: 8380/10000 (83%)

