# Intro to Pytorch

In [103]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

# Tensors

In [2]:
# create uninitialized tensor
x = torch.Tensor(5, 3)
# create initilalized tensor
y = torch.ones(5)
z = torch.zeros(5)

# addition
x = torch.add(y, z)
torch.add(y, z, out=x)

# modify in place - any function that modifies has a trailing _
x.add_(1)


 2
 2
 2
 2
 2
[torch.FloatTensor of size 5]

In [3]:
# moving to and from numpy - they still share the same memory
a = np.ones(5)
b = torch.from_numpy(a)
b.add_(1)
print(b, a)

c = torch.ones(3)
d = c.numpy()
d += 5
print(c, d)


 2
 2
 2
 2
 2
[torch.DoubleTensor of size 5]
 [ 2.  2.  2.  2.  2.]

 6
 6
 6
[torch.FloatTensor of size 3]
 [ 6.  6.  6.]


In [4]:
# do we have a GPU?
print(torch.cuda.is_available())
x = torch.ones(5)
y = torch.zeros(5) + 7

# move things onto the GPU (this will fail if we don't have a GPU I think)
x.cuda()
y.cuda()
print(x + y)

True

 8
 8
 8
 8
 8
[torch.FloatTensor of size 5]



# Variables
http://pytorch.org/docs/master/_modules/torch/autograd/variable.html

In [54]:
# create a tensor of length 5 (column vector)
x = torch.randn(5)

# we can wrap this as a variable which adds some features we will explore in autograd
var = autograd.Variable(x)
print(type(x), type(var))

# get back the tensor
assert id(var.data) == id(x)

<class 'torch.FloatTensor'> <class 'torch.autograd.variable.Variable'>


# Autograd

In [6]:
# Variable "Wraps a tensor and records the operations applied to it."
# http://pytorch.org/docs/0.3.0/autograd.html#torch.autograd.Variable
x = autograd.Variable(torch.ones(2, 2), requires_grad=True)
print(x)

y = x + 2
print(y)

z = y * y * 3
print(z)

out = z.mean()
print(out)

# Variable.backward "Computes the gradient of current variable w.r.t. graph leaves."
# http://pytorch.org/docs/0.3.0/autograd.html#torch.autograd.Variable.backward
out.backward()
print(x.grad) # d(out)/dx. Note that you can't do this for y or z

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]

Variable containing:
 27  27
 27  27
[torch.FloatTensor of size 2x2]

Variable containing:
 27
[torch.FloatTensor of size 1]

Variable containing:
 4.5000  4.5000
 4.5000  4.5000
[torch.FloatTensor of size 2x2]



In [7]:
x = torch.randn(3)
x = autograd.Variable(x, requires_grad=True)

y = x * 2
n = 1
while y.data.norm() < 1000:
    n += 1
    y = y * 2

print(n)
print(y)
# y = 2^n * x
# dy/dx = 2^n
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)
# x.grad = 2^n scaled by the tensor we passed to y.backward. Unclear why we do this
print(x.grad)

9
Variable containing:
 -155.0617
-1419.7878
 -276.7879
[torch.FloatTensor of size 3]

Variable containing:
  51.2000
 512.0000
   0.0512
[torch.FloatTensor of size 3]



# Neural Networks

Convolutions, pooling, non-linear activation layers (relu?), linear, loss.

In [55]:
# Linear Layer

infeatures, outfeatures = 3, 5
m = nn.Linear(infeatures, outfeatures)
print("NN layer weights:", m.weight)
print("NN layer biases:", m.bias)

inp = autograd.Variable(torch.randn(2, 3))
print("Input:", inp)

out = m(inp)
print("Output:", out)

# out = m.weight * inp + m.bias; note that weight is a 5x3 and bias is a 5x1 so that 
# (weight) 5x3 * 3x1 = 5x1 + (bias) 5x1 = 5x1 output

# Sanity check that we know what is going on
# First row of weights is dotted again column input vector to get first element of column output vector.
# Add weight to this.
print(np.dot(m.weight.data[0].numpy(), inp.data[0].numpy()) + m.bias[0].data.numpy(), out.data[0][0])

NN layer weights: Parameter containing:
 0.5076 -0.4729 -0.5471
-0.5363  0.2122  0.4230
 0.1265 -0.3034 -0.4931
 0.3578 -0.0830  0.1755
-0.1496  0.0076 -0.2130
[torch.FloatTensor of size 5x3]

NN layer biases: Parameter containing:
-0.3478
 0.0717
-0.3775
-0.0733
-0.5364
[torch.FloatTensor of size 5]

Input: Variable containing:
-0.8302  1.2876  0.5830
-0.6887 -0.1046 -0.0343
[torch.FloatTensor of size 2x3]

Output: Variable containing:
-1.6970  1.0368 -1.1607 -0.3749 -0.5266
-0.6291  0.4043 -0.4160 -0.3171 -0.4268
[torch.FloatTensor of size 2x5]

[-1.69702673] -1.6970267295837402


In [85]:
# Convolution layer

# "channel" is ~ attribute of data
# 1 channel - grayscale image. 2 channels - height and weight of person. 3 channels - RGB image
# 1d here means that the vector is 1d (just has a length)
# This mostly just changes the number of channels, but also changes the size of the data based off how many
# full convolutions it can fit

in_channels, out_channels, kernel_size = 2, 3, 4
m = nn.Conv1d(in_channels, out_channels, kernel_size)
assert m.weight.shape == torch.Size([out_channels, in_channels, kernel_size])
assert m.bias.shape == torch.Size([out_channels])

# 4 variables that have 2 channel of 8 items
var_num, var_channels, var_length = 4, in_channels, 8
inp = autograd.Variable(torch.randn(var_num, var_channels, var_length))

out = m(inp)
assert out.shape == torch.Size([var_num, out_channels, var_length - kernel_size + 1])

print(
    np.dot(inp.data[0][0][:4], m.weight.data[0][0]) + # 0 channel with x, 0 weight
    np.dot(inp.data[0][1][:4], m.weight.data[0][1]) + # 1 channel with x, 1 weight
    m.bias.data[0], # x bias
    out.data[0][0][0]) # x channel out. (x == 0 for this example)

-0.570194363594 -0.5701944231987


In [100]:
# Pool layer

kernel_size = 2
m = nn.MaxPool1d(2) # Can also average pool etc

var_num, var_channels, var_length = 4, 2, 8
inp = autograd.Variable(torch.randn(var_num, var_channels, var_length))

out = m(inp)

assert out.data[0][0][0] == np.max(inp.data[0][0][0:kernel_size].numpy())
assert out.data[3][1][3] == np.max(inp.data[3][1][3*kernel_size:3*kernel_size+kernel_size].numpy())

In [102]:
# Non linear layers

m = nn.ReLU() # can also sigmoid, softmax etc
inp = autograd.Variable(torch.randn(2, 3))
out = m(inp)
print(inp, out) # just coverts elementwise to max(0, x)

Variable containing:
 0.1145 -1.8109  1.0012
-0.0845 -0.0879  0.0550
[torch.FloatTensor of size 2x3]
 Variable containing:
 0.1145  0.0000  1.0012
 0.0000  0.0000  0.0550
[torch.FloatTensor of size 2x3]



# Functional

In the NN layer we have been defining full layers. These are things that are trained (have weights that will be updated). We also just want to do purely arithmetic things (actually the ReLU was like that). For these purely arithmetic things there is `torch.nn.Functional`. See discussion https://discuss.pytorch.org/t/how-to-choose-between-torch-nn-functional-and-torch-nn-module/2800 for a bit more.

In [111]:
# Functional layers

inp = autograd.Variable(torch.randn(2, 3))
out = F.relu(inp)
print(inp, out) # just coverts elementwise to max(0, x)

Variable containing:
 1.3510 -0.7928  0.7239
 0.3445  0.0012 -0.4069
[torch.FloatTensor of size 2x3]
 Variable containing:
 1.3510  0.0000  0.7239
 0.3445  0.0012  0.0000
[torch.FloatTensor of size 2x3]

