In [1]:
import torch
import numpy as np

x = torch.rand(3, 2)
y = torch.ones(3, 2, )

z = x + 2 * y
z

tensor([[2.1945, 2.0771],
        [2.1152, 2.0848],
        [2.5610, 2.0509]])

In [2]:
print(z[2, :])
print(z[:, 1])
print(z[:, :])

tensor([2.5610, 2.0509])
tensor([2.0771, 2.0848, 2.0509])
tensor([[2.1945, 2.0771],
        [2.1152, 2.0848],
        [2.5610, 2.0509]])


Converting ndarray to tensor and other operations. Tested with CPU version of torch, with GPU mb we can have different result.

In [3]:
a = torch.ones(5)
print(a)
b = a.numpy()
print(b)

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]


In [4]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [5]:
a += 1
print(a)
print(b)

tensor([3., 3., 3., 3., 3.])
[3. 3. 3. 3. 3.]


В ячейке ниже можно видеть, как можно отвязать переменную а от изначальных данных, теперь в a и b у нас лежит разная информация.

In [6]:
a = a + 1
print(a)
print(b)

tensor([4., 4., 4., 4., 4.])
[3. 3. 3. 3. 3.]


In [7]:
a = np.ones(5)
print(a)
b = torch.from_numpy(a)
print(b)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [8]:
a += 1
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


Если хотим у тензора высчитывать градиент в будущем, то явно указываем это:

In [9]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x + 2
print(y)

tensor([-1.3985,  0.6900,  0.6473], requires_grad=True)
tensor([0.6015, 2.6900, 2.6473], grad_fn=<AddBackward0>)


In [10]:
z = y * y * 2  # вектор длины 3
z = z.mean()  # скаляр
print(z)

tensor(9.7374, grad_fn=<MeanBackward0>)


In [11]:
z.backward()  # будет работать только если z это скаляр
print(x.grad)

tensor([0.8020, 3.5866, 3.5298])


Чтобы можно было делать backward не только для скаляра - можно прописать ещё один вектор и передать его в аргументы

In [12]:
z = y * y * 2
v = torch.tensor([1, 1, 0.003], dtype=torch.float32)
z.backward(v)
print(x.grad)

tensor([ 3.2080, 14.3465,  3.5616])


Когда берём градиенты (например в цикле), то помним, что значения начнут суммироваться

In [13]:
weights = torch.ones(4, requires_grad=True)

for epochh in range(3):
    model_output = (weights * 3).sum()

    model_output.backward()

    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


Поэтому нужно градиенты очищать

In [14]:
weights = torch.ones(4, requires_grad=True)

for epochh in range(3):
    model_output = (weights * 3).sum()

    model_output.backward()

    print(weights.grad)
    weights.grad.zero_()


tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


Напишем линейную регрессию через numpy:

In [15]:
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0


# linear regression
def forward(x):
    return w * x


# MSE
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()


#gradient
def gradient(x, y, y_pred):
    return np.dot(2 * x, y_pred - y).mean()


print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # predict = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # calculate gradients
    dw = gradient(X, Y, y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 3: w = 1.872, loss = 0.76800019
epoch 5: w = 1.980, loss = 0.01966083
epoch 7: w = 1.997, loss = 0.00050331
epoch 9: w = 1.999, loss = 0.00001288
epoch 11: w = 2.000, loss = 0.00000033
epoch 13: w = 2.000, loss = 0.00000001
epoch 15: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


А теперь избавимся от отдельно прописанной функции градиента и воспользуемся torch

In [16]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


# linear regression
def forward(x):
    return w * x


# MSE
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()


# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # predict = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # calculate gradients. That's the difference!!! gradients = backward pass
    l.backward()

    # Нам нужно обновить вес, при этом не поменяв функцию градиента, поэтому пишем так:
    with torch.no_grad():
        w -= learning_rate * w.grad

    w.grad.zero_()

    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 3: w = 0.772, loss = 15.66018772
epoch 5: w = 1.113, loss = 8.17471695
epoch 7: w = 1.359, loss = 4.26725292
epoch 9: w = 1.537, loss = 2.22753215
epoch 11: w = 1.665, loss = 1.16278565
epoch 13: w = 1.758, loss = 0.60698116
epoch 15: w = 1.825, loss = 0.31684780
epoch 17: w = 1.874, loss = 0.16539653
epoch 19: w = 1.909, loss = 0.08633806
epoch 21: w = 1.934, loss = 0.04506890
epoch 23: w = 1.952, loss = 0.02352631
epoch 25: w = 1.966, loss = 0.01228084
epoch 27: w = 1.975, loss = 0.00641066
epoch 29: w = 1.982, loss = 0.00334642
epoch 31: w = 1.987, loss = 0.00174685
epoch 33: w = 1.991, loss = 0.00091188
epoch 35: w = 1.993, loss = 0.00047601
epoch 37: w = 1.995, loss = 0.00024848
epoch 39: w = 1.996, loss = 0.00012971
epoch 41: w = 1.997, loss = 0.00006770
epoch 43: w = 1.998, loss = 0.00003534
epoch 45: w = 1.999, loss = 0.00001845
epoch 47: w = 1.999, loss = 0.00000963
epoch 49: w = 1.999, loss = 0.00000503
epoch 51: w = 1.999, loss = 

Теперь сделаем ещё и автоматизированный loss и также вставим pytorch optimizer

In [17]:
import torch.nn as nn

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# linear regression
def forward(x):
    return w * x

# Training
learning_rate = 0.01
n_iters = 100

# loss больше сами не определяем!!!
loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
    # predict = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # calculate gradients. That's the difference!!! gradients = backward pass
    l.backward()

    # сами веса больше не обновляем!!! Optimizer делает всё за нас
    optimizer.step()

    optimizer.zero_grad()

    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 3: w = 0.772, loss = 15.66018772
epoch 5: w = 1.113, loss = 8.17471695
epoch 7: w = 1.359, loss = 4.26725292
epoch 9: w = 1.537, loss = 2.22753215
epoch 11: w = 1.665, loss = 1.16278565
epoch 13: w = 1.758, loss = 0.60698116
epoch 15: w = 1.825, loss = 0.31684780
epoch 17: w = 1.874, loss = 0.16539653
epoch 19: w = 1.909, loss = 0.08633806
epoch 21: w = 1.934, loss = 0.04506890
epoch 23: w = 1.952, loss = 0.02352631
epoch 25: w = 1.966, loss = 0.01228084
epoch 27: w = 1.975, loss = 0.00641066
epoch 29: w = 1.982, loss = 0.00334642
epoch 31: w = 1.987, loss = 0.00174685
epoch 33: w = 1.991, loss = 0.00091188
epoch 35: w = 1.993, loss = 0.00047601
epoch 37: w = 1.995, loss = 0.00024848
epoch 39: w = 1.996, loss = 0.00012971
epoch 41: w = 1.997, loss = 0.00006770
epoch 43: w = 1.998, loss = 0.00003534
epoch 45: w = 1.999, loss = 0.00001845
epoch 47: w = 1.999, loss = 0.00000963
epoch 49: w = 1.999, loss = 0.00000503
epoch 51: w = 1.999, loss = 

Заменим forward на встроенную функцию

In [22]:
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)
n_samples, n_features = X.shape

X_test = torch.tensor([5], dtype=torch.float32)

input_size = n_features
output_size = n_features

# forward сами больше не пишем!
model = nn.Linear(input_size, output_size)


# Training
learning_rate = 0.01
n_iters = 100

# loss больше сами не определяем!!!
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # predict = forward pass
    y_pred = model(X)

    # loss
    l = loss(Y, y_pred)

    # calculate gradients. That's the difference!!! gradients = backward pass
    l.backward()

    # сами веса больше не обновляем!!! Optimizer делает всё за нас
    optimizer.step()

    optimizer.zero_grad()

    if epoch % 2 == 0:
        [w, b] = model.parameters() # unpack parameters
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

epoch 1: w = -0.345, loss = 46.72698975
epoch 3: w = 0.227, loss = 22.67167282
epoch 5: w = 0.624, loss = 11.08773613
epoch 7: w = 0.901, loss = 5.50839233
epoch 9: w = 1.094, loss = 2.82009029
epoch 11: w = 1.229, loss = 1.52375674
epoch 13: w = 1.323, loss = 0.89763534
epoch 15: w = 1.390, loss = 0.59422266
epoch 17: w = 1.436, loss = 0.44620743
epoch 19: w = 1.470, loss = 0.37303400
epoch 21: w = 1.494, loss = 0.33591712
epoch 23: w = 1.511, loss = 0.31618312
epoch 25: w = 1.524, loss = 0.30484039
epoch 27: w = 1.534, loss = 0.29755986
epoch 29: w = 1.542, loss = 0.29225692
epoch 31: w = 1.548, loss = 0.28792739
epoch 33: w = 1.553, loss = 0.28408769
epoch 35: w = 1.557, loss = 0.28050479
epoch 37: w = 1.561, loss = 0.27706641
epoch 39: w = 1.564, loss = 0.27371776
epoch 41: w = 1.567, loss = 0.27043256
epoch 43: w = 1.570, loss = 0.26719803
epoch 45: w = 1.573, loss = 0.26400745
epoch 47: w = 1.576, loss = 0.26085749
epoch 49: w = 1.578, loss = 0.25774643
epoch 51: w = 1.581, loss 