#### PyTorch has what's called a define-by-run framework
##### every time you run a piece of code, it puts it on the GPU, runs it, sends it back, all in one go.

In [6]:
%matplotlib inline
import importlib
import torch.utils; importlib.reload(torch.utils)
from torch.utils import *

In [33]:
import torch 
from torch.autograd import Variable
from torch import nn, optim
import torch.nn.functional as F

#### Basically PyTorch looks a lot like Numpy

In [9]:
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
import operator

### Tensors are similar to numpy's ndarrays, but Tensors can be used on a GPU

In [11]:
x = torch.Tensor(5,3); x


 0.0000e+00  0.0000e+00  4.2039e-43
 0.0000e+00  7.0065e-45  0.0000e+00
-2.8447e+07  4.3440e-44  0.0000e+00
 0.0000e+00  7.0065e-45  0.0000e+00
-2.8458e+07  4.3440e-44  0.0000e+00
[torch.FloatTensor of size 5x3]

In [13]:
x = torch.rand(5,3); x


 0.8349  0.2083  0.0479
 0.8238  0.1103  0.2421
 0.3275  0.6918  0.0474
 0.9417  0.1275  0.5774
 0.5206  0.9133  0.6976
[torch.FloatTensor of size 5x3]

In [14]:
x.size() #rather than .shapein numoy it's got .size  

torch.Size([5, 3])

In [15]:
y = torch.rand(5,3); y


 0.0836  0.2216  0.0986
 0.9607  0.9257  0.1007
 0.5616  0.0950  0.3664
 0.5247  0.7780  0.6622
 0.1651  0.2745  0.1383
[torch.FloatTensor of size 5x3]

In [16]:
x + y #infix version 
#unlike in TensorFlow or Theano, we can just say "x+y" and there it is 


 0.9184  0.4299  0.1465
 1.7845  1.0360  0.3428
 0.8891  0.7868  0.4138
 1.4664  0.9055  1.2397
 0.6857  1.1878  0.8359
[torch.FloatTensor of size 5x3]

In [17]:
torch.add(x,y) #prefix version


 0.9184  0.4299  0.1465
 1.7845  1.0360  0.3428
 0.8891  0.7868  0.4138
 1.4664  0.9055  1.2397
 0.6857  1.1878  0.8359
[torch.FloatTensor of size 5x3]

In [18]:
result = torch.Tensor(5,3)
torch.add(x,y, out=result) #You can often add an "out=" and that puts the results in this pre-allocated memory
#It's particularly important on GPUs.


 0.9184  0.4299  0.1465
 1.7845  1.0360  0.3428
 0.8891  0.7868  0.4138
 1.4664  0.9055  1.2397
 0.6857  1.1878  0.8359
[torch.FloatTensor of size 5x3]

In [19]:
y.add_(x) #trick is that you can stick an underscore on the end of most things and it causes it to do it in-place
#This is basically y plus equals x


 0.9184  0.4299  0.1465
 1.7845  1.0360  0.3428
 0.8891  0.7868  0.4138
 1.4664  0.9055  1.2397
 0.6857  1.1878  0.8359
[torch.FloatTensor of size 5x3]

In [20]:
x[:,1] #slicing just like Numpy


 0.2083
 0.1103
 0.6918
 0.1275
 0.9133
[torch.FloatTensor of size 5]

### Converting torch Tensor to numpy Array

In [21]:
a = torch.ones(5); a


 1
 1
 1
 1
 1
[torch.FloatTensor of size 5]

In [23]:
b = a.numpy(); b
#One thing to be very aware of is that "a" and "b" are now referring to the same thing.

array([ 1.,  1.,  1.,  1.,  1.], dtype=float32)

In [24]:
#if I now add underscore to in-place a plus equals 1, it also changes b.
a.add_(1); a, b

(
  2
  2
  2
  2
  2
 [torch.FloatTensor of size 5],
 array([ 2.,  2.,  2.,  2.,  2.], dtype=float32))

### Converting numpy Array to torch Tensor 

In [25]:
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a); a, b

(array([ 2.,  2.,  2.,  2.,  2.]), 
  2
  2
  2
  2
  2
 [torch.DoubleTensor of size 5])

### Autogradient: automatic differentiation

In [42]:
#you just take Torch tensor, wrap it in Variable and add requires_grad parameter to it
x = Variable(torch.ones(2,2), requires_grad=True); x 

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

In [28]:
#From now on, anything I do to x, it's going to remember what I did so that it can take the gradient of it
y = x+2; y

Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]

In [36]:
y.grad_fn

<AddBackward0 at 0x1fcf0a42b0>

In [37]:
z = y*y*3; z

Variable containing:
 27  27
 27  27
[torch.FloatTensor of size 2x2]

In [38]:
out = z.mean(); out

Variable containing:
 27
[torch.FloatTensor of size 1]

In [39]:
out.backward
#critical difference between a tensor and a variable.  
#They have exactly the same API, except variable also has .backward and .grad gets you the gradient.

<bound method Variable.backward of Variable containing:
 27
[torch.FloatTensor of size 1]
>

In [43]:
x.grad; x

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

#### Gradients of loops

In [44]:
x = torch.randn(3)
x = Variable(x, requires_grad=True)

In [47]:
y = x*2
while y.data.norm()<1000:
    y=y*2

In [48]:
y

Variable containing:
  60.7797
 729.1001
 916.0372
[torch.FloatTensor of size 3]

In [51]:
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)

In [52]:
x.grad

Variable containing:
  102.4000
 1024.0000
    0.1024
[torch.FloatTensor of size 3]

### Neural Networks

In [54]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input channel, 6 output channels, 5x5 kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120) # like keras' Dense()
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        return reduce(operator.mul, x.size()[1:])

In [55]:
net = Net(); net

Net(
  (conv1): Conv2d (1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d (6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120)
  (fc2): Linear(in_features=120, out_features=84)
  (fc3): Linear(in_features=84, out_features=10)
)