In [1]:
%%capture
import numpy as np
import torch
from torch.nn.init import normal_
from torch.autograd import Variable
import torch.optim as optim

# pytorch optimization

In [5]:

# variable init
x = torch.empty(1, requires_grad=True)
print("empty x: ", x)
normal_(x)
print("random x: ", x)

# create function to optimize
y = 2 * x
print("y: ",y)

# calculate the gradient and feed it back
print("gradient of x before backward: ", x.grad)

y.backward()

print("gradient of x after backward: ", x.grad)

# optimize with backprop information
optimizer = optim.Adam([x], lr=0.1)

y = 2 * x


optimizer.zero_grad()
y.backward()

print("x before optimize: ", x)
print("y: ", 2 * x)

optimizer.step()

print("x after optimize: ", x)
print("y: ", 2 * x)

empty x:  tensor([0.0521], requires_grad=True)
random x:  tensor([-1.7507], requires_grad=True)
y:  tensor([-3.5015], grad_fn=<MulBackward0>)
gradient of x before backward:  None
gradient of x after backward:  tensor([2.])
x before optimize:  tensor([-1.7507], requires_grad=True)
y:  tensor([-3.5015], grad_fn=<MulBackward0>)
x after optimize:  tensor([-1.8507], requires_grad=True)
y:  tensor([-3.7015], grad_fn=<MulBackward0>)


Conclusion:
- torch.Tensor.backward() transfer the gradient information to the variable(weights).
- torch.nn.optim.Adam() update the variable(weights) based on transfer the gradient information.

Hence we use this train a model as following: 

# pytorch parameter training

In [None]:
# 1. basic train
# data:
datagen = zip([torch.rand(1, 4) for _ in range(12)], [torch.zeros(1) for _ in range(12)])

# variable(weights)
W = Variable(torch.randn(4, 1), requires_grad=True)
b = Variable(torch.randn(1), requires_grad=True)
model = lambda x: torch.matmul(x, W) + b

optimizer = optim.Adam([W, b])

for x, y in datagen:
    optimizer.zero_grad()

    pred = model(x)
    loss = (pred - y) ** 2

    loss.backward()
    optimizer.step()

    print(loss)

# initialize variables(or tensors, or parameters in model)

In [None]:
from torch.nn.init import xavier_uniform_

# Normal: (just like before)
x = torch.empty(5, 3, requires_grad=True)
print("empty", x)

xavier_uniform_(x)
print("initialized", x)

In [16]:

# Direct:（if want it as variable, set requires_grad=True）
x = torch.tensor([5.5, 3])
print(x)

x = torch.zeros(5, 3, dtype=torch.long)
print("zeros", x)

x = torch.ones(5, 3, dtype=torch.long)
print("ones", x)

x = torch.rand(5, 3)
print("rand", x)


empty(single): tensor([-3.0596e+13], requires_grad=True)
empty: tensor([[-3.0596e+13,  4.5577e-41,  2.8648e-36],
        [ 0.0000e+00,  4.4842e-44,  0.0000e+00],
        [ 1.5695e-43,  0.0000e+00,  2.8642e-36],
        [ 0.0000e+00,  7.7052e+31,  7.2148e+22],
        [ 2.5226e-18,  1.0372e-08,  1.0470e-11]])
zeros: tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
rand: tensor([[0.3606, 0.0407, 0.1474],
        [0.1027, 0.4070, 0.7913],
        [0.2934, 0.8099, 0.8565],
        [0.9631, 0.2912, 0.1615],
        [0.7747, 0.2285, 0.4873]])
tensor([5.5000, 3.0000])


In [None]:
# Indirect:
# from numpy
a = np.ones(5)
b = torch.from_numpy(a)
print(b)
print(b.numpy())

# from torch tensor
x = torch.randn_like(b, dtype=torch.float)  # override dtype!
print(x)

x = x.new_ones(5, 3)      # new_* methods take in sizes
print(x)


# operation

In [24]:
x = torch.randn(4, 4)
x2 = torch.randn(4, 4)
x3 = torch.randn(3, 2)

### operation: self-operation

In [26]:
y = x.view(16)    # : reshape
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions

# get size
print(x.size(), y.size(), z.size())

# get value
print(x.numpy())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
[[-1.0876321   0.31195012  0.9559226  -0.77233636]
 [-0.77088886  0.33271268 -1.6035181   0.6393377 ]
 [ 0.8281358   1.2581091   0.69183874  0.4478285 ]
 [ 0.6002358   0.55723464 -0.53133804  0.19073093]]


In [31]:
print(x.argmax(1).numpy())
print(x.sum(1).numpy())

[2 3 1 0]
[-0.59209573 -1.4023566   3.225912    0.81686336]


### operation: two

In [6]:
y = torch.add(x,x2)
print(y)

tensor([[1.2781, 1.2587, 0.3948],
        [0.3611, 1.3786, 1.5138],
        [1.3472, 1.1214, 1.0918],
        [0.6161, 1.2757, 0.5494],
        [0.4003, 1.6258, 1.7759]])


In [None]:
y = torch.matmul(x3, x3.T)
print(y)

In [33]:
print(x == x)
print(x == x2)

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


# get gradient

In [13]:
x3 = torch.randn(3, 2)
y = torch.matmul(x3, x3.T)

print(y.grad_fn)
y.backward()
print(x3.grad)   # grad is more like delta x

<MmBackward object at 0x7fcc13e8a128>
tensor([[2., 2.]])
