## Introduction to PyTorch

In [2]:
import torch
import numpy as np

## 1. PyTorch = Numpy + GPU + (other features)
It is easy to convert numpy tensors to pytorch tensors and vice versa.

In [4]:
t1 = np.arange(5)
t1

array([0, 1, 2, 3, 4])

In [6]:
t2 = torch.Tensor([1,2,3])
t2


 1
 2
 3
[torch.FloatTensor of size 3]

In [11]:
# Convert Numpy array to PyTorch Tensor
t3 = torch.Tensor(t1)
print(t3)
# and back again
t4 = t3.numpy()
print(t4)


 0
 1
 2
 3
 4
[torch.FloatTensor of size 5]

[0. 1. 2. 3. 4.]


## 2. Switching between GPU and CPU is simple

In [13]:
# Bring PyTorch tensor on gpu by calling cuda()
#gpu_t3 = t3.cuda() 

# and back to the cpu
cpu_t3 = gpu_t3.cpu()

NameError: name 'gpu_t3' is not defined

## 3. Matrix multiplication 

In [17]:
t1 = torch.rand(4,5)
t2 = torch.rand(5,10)
t1.mm(t2)


 1.1730  1.5206  0.9610  1.7638  0.8436  1.1653  1.3729  1.0137  0.7185  1.3678
 0.6372  1.1591  0.8026  1.3920  0.4620  0.8159  0.8937  0.8693  0.5985  1.0270
 1.0437  1.7046  1.2585  1.5093  0.7270  1.0349  1.0800  1.0654  1.0014  1.5985
 1.0852  1.6536  1.1415  1.5994  0.7474  1.0597  1.1966  1.0838  0.9169  1.5548
[torch.FloatTensor of size 4x10]

## 4. Automatic gradient calculation
Pytorch can calculate the gradient for you after you have done arbitrary computations. You only need to make sure to use torch Variables for your computations instead of torch Tensors.

### Variable = Tensor + gradient features

In [18]:
from torch.autograd import Variable

In [26]:
v1 = Variable(t1) # creates a Variable from a Tensor
v1

Variable containing:
 0.4517  0.9266  0.6610  0.1498  0.2513
 0.3308  0.3854  0.4631  0.0153  0.7253
 0.9569  0.5930  0.1220  0.0519  0.8824
 0.7728  0.6558  0.1840  0.1647  0.7962
[torch.FloatTensor of size 4x5]

In [30]:
v1.data # contains out tensor


 3.4517  3.9266  3.6610  3.1498  3.2513
 3.3308  3.3854  3.4631  3.0153  3.7253
 3.9569  3.5930  3.1220  3.0519  3.8824
 3.7728  3.6558  3.1840  3.1647  3.7962
[torch.FloatTensor of size 4x5]

In [33]:
print(v1.grad) # contains out gradient but it's currently none

None


In [37]:
# define inputs
x_tensor = torch.randn(10, 20)
y_tensor = torch.randn(10, 5)

x = Variable(x_tensor, requires_grad=False) # no gradient will be calculated for this variable
y = Variable(y_tensor, requires_grad=False) # no gradient will be calculated for this variable

# define some weights
w = Variable(torch.randn(20, 5), requires_grad=True) # gradient will be calculated for this variable

# get variable tensor
print(type(w.data))  # torch.FloatTensor
# get variable gradient
print(w.grad)  # None

<class 'torch.FloatTensor'>
None


In [69]:
# now we need a function for which we want to calculate a gradient 
# => This will always be our cost function

loss = torch.mean(y - x.mm(w))

### HERE IS ALL THE MAGIC - calculate all gradients in one line ###
loss.backward()

# results can be found in the grad attribute of each variable
print(w.grad)  # some gradients

# See what happends when you run loss.backward() several times (just execute the cell several times)

Variable containing:
 1.5055  1.5055  1.5055  1.5055  1.5055
-1.3779 -1.3779 -1.3779 -1.3779 -1.3779
 0.9115  0.9115  0.9115  0.9115  0.9115
-1.3805 -1.3805 -1.3805 -1.3805 -1.3805
-0.3577 -0.3577 -0.3577 -0.3577 -0.3577
 1.5802  1.5802  1.5802  1.5802  1.5802
-0.6379 -0.6379 -0.6379 -0.6379 -0.6379
 0.3137  0.3137  0.3137  0.3137  0.3137
-0.2449 -0.2449 -0.2449 -0.2449 -0.2449
 0.6991  0.6991  0.6991  0.6991  0.6991
-2.1097 -2.1097 -2.1097 -2.1097 -2.1097
-0.3521 -0.3521 -0.3521 -0.3521 -0.3521
 0.4066  0.4066  0.4066  0.4066  0.4066
-0.1653 -0.1653 -0.1653 -0.1653 -0.1653
-1.6496 -1.6496 -1.6496 -1.6496 -1.6496
-0.3047 -0.3047 -0.3047 -0.3047 -0.3047
-0.7570 -0.7570 -0.7570 -0.7570 -0.7570
-0.2626 -0.2626 -0.2626 -0.2626 -0.2626
 1.2828  1.2828  1.2828  1.2828  1.2828
-0.4467 -0.4467 -0.4467 -0.4467 -0.4467
[torch.FloatTensor of size 20x5]



In [59]:
# We have to actively set the gradients to zero after each backpropagation step.
w.grad.data.zero_();

### 5. Build and train our first neural network

In [71]:
# nn is called the "Neural Network Package" and contains all modules which are related to NNs.
import torch.nn.functional as F

# define x and y trainings data
x = Variable(torch.randn(10, 20), requires_grad=False)
y = Variable(torch.randn(10, 3), requires_grad=False)

# define some weights
w1 = Variable(torch.randn(20, 5), requires_grad=True)
w2 = Variable(torch.randn(5, 3), requires_grad=True)

learning_rate = 0.1
loss_fn = torch.nn.MSELoss() # F.mse_loss() functional interface
optimizer = torch.optim.SGD([w1, w2], lr=learning_rate)


for step in range(5):
    
    # forward pass
    pred = F.sigmoid(x.mm(w1))
    pred = F.sigmoid(pred.mm(w2))
    
    # calculate loss
    loss = loss_fn(pred, y)

    # instead of setting all gradients to zero manually,
    # we can use an optimizer function
    optimizer.zero_grad()
    
    # backward pass => calculate gradients
    loss.backward()
    
    # do one step of st. gradient descent update
    optimizer.step()

#### And now how can I use it to make predictions?  
You have to do it "by hand" meaning you need to call the function sigmoid twice with w1 und w2.  
But there is also a more convenient way: Modules

### 6. Building a neural network model

In [77]:
#### 1. Method: Sequential #### 
import torch.nn as nn

model = nn.Sequential(
    nn.Linear(10,5),
    nn.ReLU(),
    nn.Linear(5,1),
    nn.Sigmoid()
)

In [78]:
model()

TypeError: forward() missing 1 required positional argument: 'input'

In [86]:
model(Variable(torch.rand(10)))

Variable containing:
 0.5016
[torch.FloatTensor of size 1]

In [113]:
model[0].weight

Parameter containing:
 0.2451  0.1578 -0.0938  0.2572 -0.2049 -0.1006  0.0939 -0.2246  0.2534 -0.0706
-0.2203 -0.0450 -0.2360  0.1251  0.1969  0.1551  0.0931 -0.2479 -0.2977  0.2848
-0.0695  0.2488 -0.0392  0.2784 -0.2997 -0.1833 -0.2859 -0.1568 -0.0128  0.1833
 0.0865  0.0096 -0.0898  0.0710  0.1341 -0.2149 -0.1091 -0.0093 -0.0908 -0.2079
-0.2374  0.2335 -0.2905  0.2679  0.2320  0.0981  0.1184  0.3135  0.1505  0.0948
[torch.FloatTensor of size 5x10]

In [115]:
model[2].weight

Parameter containing:
-0.0079 -0.2133 -0.0082 -0.3442 -0.1578
[torch.FloatTensor of size 1x5]

In [125]:
class RoboRacer(nn.Module):
    def __init__(self):
        super().__init__() # always first line
        
        # define all layers you want to use later here
        self.linear1 = nn.Linear(10,5)
        self.linear2 = nn.Linear(5,1)

    def forward(self, x):
        # call all layers in the right order here and return the result
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.sigmoid(x)
        return x

In [126]:
robo = RoboRacer()

In [127]:
robo(Variable(torch.rand(10)))

Variable containing:
 0.4070
[torch.FloatTensor of size 1]

In [129]:
print(robo) # useful to get a summary

RoboRacer(
  (linear1): Linear(in_features=10, out_features=5)
  (linear2): Linear(in_features=5, out_features=1)
)
