# PyTorch basics

In [1]:
import torch
from torch.autograd import Variable

#### PyTorch has Tensors, too

In [2]:
dtype = torch.FloatTensor
#dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

X = torch.randn(3, 4).type(dtype)
X


-0.9806  1.5010 -0.7010  0.1588
 0.5944  1.1888  1.2275  0.8646
 0.2313 -1.5468  1.0748 -0.4441
[torch.FloatTensor of size 3x4]

#### Tensors are wrapped in Variables that will also store gradients

In [3]:
X_Var = torch.autograd.Variable(X)

### Porting the numpy network to PyTorch

In [4]:
n = 64
num_features = 1000
hidden_dim = 100
output_dim = 10

learning_rate = 1e-6
num_epochs = 500

In [5]:
dtype = torch.FloatTensor

In [6]:
# we do not need to compute gradients with respect to these Variables during the backward pass
X = Variable(torch.randn(n, num_features).type(dtype), requires_grad=False)
y = Variable(torch.randn(n, output_dim).type(dtype), requires_grad=False)

In [7]:
X.data


-1.5663e+00 -2.9332e-01 -4.5436e-01  ...  -7.4544e-01  3.2587e-02 -2.8738e-01
 8.0001e-01  6.7066e-01  1.0794e+00  ...  -3.6639e-01 -6.4698e-01  8.1326e-01
-8.4159e-01  1.2083e-01  5.1612e-01  ...   1.2136e+00 -4.6920e-01 -1.3981e-01
                ...                   ⋱                   ...                
 7.8464e-01 -6.7403e-01  9.8211e-01  ...   2.3518e-01  2.1589e-03  5.1040e-01
 2.4325e-01  3.0046e-01  2.3849e-02  ...   7.0949e-01  9.0595e-01  6.0408e-01
 9.7909e-01  7.8501e-01  5.5957e-01  ...  -1.2479e-01 -2.6334e-01 -2.6671e-02
[torch.FloatTensor of size 64x1000]

In [8]:
# for the weights we do need to ;-)
W1 = Variable(torch.randn(num_features, hidden_dim).type(dtype), requires_grad=True)
W2 = Variable(torch.randn(hidden_dim, output_dim).type(dtype), requires_grad=True)

In [9]:
for epoch in range(num_epochs):
    
  # Forward pass
  # We do not need to keep references to intermediate values
  # since we are not implementing the backward pass by hand!
  y_pred = X.mm(W1).clamp(min=0).mm(W2)
  
  # Compute and print loss using operations on Variables.
  # loss.data is a Tensor of shape (1,); loss.data[0] is a scalar value holding the loss.
  loss = (y_pred - y).pow(2).sum()
  print(epoch, loss.data[0])
  
  # Use autograd to compute the backward pass. 
  # After this call W1.grad and W2.grad will be Variables holding the gradient
  # of the loss with respect to W1 and W2 respectively.
  loss.backward()

  # Update weights using gradient descent
  W1.data -= learning_rate * W1.grad.data
  W2.data -= learning_rate * W2.grad.data

  # Manually zero the gradients after updating the weights
  W1.grad.data.zero_()
  W2.grad.data.zero_()


0 33635448.0
1 32699266.0
2 36027228.0
3 36942800.0
4 31178436.0
5 20375876.0
6 10777758.0
7 5240346.0
8 2743513.0
9 1678928.5
10 1192125.5
11 932193.625
12 768378.375
13 651148.625
14 560275.4375
15 486753.15625
16 425803.3125
17 374572.5625
18 331054.03125
19 293790.9375
20 261732.46875
21 233997.375
22 209869.59375
23 188847.09375
24 170416.265625
25 154206.890625
26 139866.921875
27 127145.34375
28 115805.109375
29 105672.875
30 96595.0546875
31 88443.328125
32 81094.5
33 74471.859375
34 68481.59375
35 63055.7421875
36 58133.234375
37 53675.73046875
38 49616.37890625
39 45911.17578125
40 42525.5234375
41 39429.078125
42 36587.1796875
43 33977.75390625
44 31581.806640625
45 29376.203125
46 27342.87109375
47 25468.095703125
48 23737.8515625
49 22139.232421875
50 20660.029296875
51 19290.8203125
52 18022.02734375
53 16845.61328125
54 15753.8974609375
55 14740.6298828125
56 13798.8369140625
57 12922.8310546875
58 12108.51171875
59 11350.06640625
60 10643.232421875
61 9984.2919921875
62

384 0.0034147179685533047
385 0.003292033215984702
386 0.0031708278693258762
387 0.003054441185668111
388 0.0029439374338835478
389 0.002835300285369158
390 0.0027364420238882303
391 0.0026398752816021442
392 0.002546808449551463
393 0.002456151880323887
394 0.002369202906265855
395 0.002285125432536006
396 0.0022054899018257856
397 0.002128449035808444
398 0.0020564028527587652
399 0.0019854202400892973
400 0.0019191065803170204
401 0.0018517763819545507
402 0.0017885416746139526
403 0.0017289473908022046
404 0.0016689448384568095
405 0.001612377935089171
406 0.0015593618154525757
407 0.0015075599076226354
408 0.0014581759460270405
409 0.0014071529731154442
410 0.0013623512350022793
411 0.0013172876788303256
412 0.0012762716505676508
413 0.001236027106642723
414 0.0011954953661188483
415 0.0011590278008952737
416 0.001121258595958352
417 0.0010859729954972863
418 0.0010521786753088236
419 0.0010196126531809568
420 0.000988935586065054
421 0.0009572082781232893
422 0.000928837980609387