In [1]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


# Базовые операции с тензорами

In [2]:
a = torch.tensor([1, 2, 3, 4])

In [3]:
a = torch.randn(3, 4)
b = torch.randn(3, 4)

In [4]:
a + b, a * b

(tensor([[-0.3216,  0.0389,  0.5557, -2.5285],
         [-0.7835, -2.1300, -0.3477, -2.3530],
         [ 0.8177,  0.7918,  0.2258,  0.5612]]),
 tensor([[-2.1210, -0.4343,  0.0593,  1.5974],
         [-0.1318,  1.1173, -0.6370,  0.8822],
         [ 0.0543, -0.2449, -0.0691, -0.1002]]))

In [6]:
torch.randn(3, 4) @ torch.randn(4, 5)

tensor([[-0.7964, -0.2296, -1.3110,  0.6267, -0.9610],
        [-1.0236, -1.1118, -3.1181,  5.9850, -2.3383],
        [ 0.7304, -0.6055, -2.7391,  3.4069, -0.8725]])

In [7]:
a

tensor([[ 1.3044, -0.6399,  0.1440, -1.2351],
        [ 0.1424, -1.1948,  0.6430, -1.8850],
        [ 0.0729,  1.0297,  0.3989, -0.1425]])

In [11]:
k = torch.softmax(a, dim=1)
k

tensor([[0.6513, 0.0932, 0.2041, 0.0514],
        [0.3285, 0.0863, 0.5420, 0.0433],
        [0.1726, 0.4492, 0.2391, 0.1391]])

In [12]:
sum(k[0])

tensor(1.)

In [13]:
k.sigmoid()

tensor([[0.6573, 0.5233, 0.5508, 0.5128],
        [0.5814, 0.5216, 0.6323, 0.5108],
        [0.5430, 0.6105, 0.5595, 0.5347]])

In [15]:
a = torch.tensor([2.], requires_grad=True)

In [16]:
a

tensor([2.], requires_grad=True)

In [17]:
b = a ** 2
b

tensor([4.], grad_fn=<PowBackward0>)

In [18]:
b.backward()

In [21]:
a.grad

tensor([4.])

In [25]:
x = torch.ones(2, 3, requires_grad=True)
y = torch.randn(2, 3, 4, requires_grad=True)

In [48]:
l = torch.exp(y.sum(dim=-1) + x.tanh()).sum()
l

tensor(22.0742, grad_fn=<SumBackward0>)

In [49]:
l.backward()

In [50]:
x.grad, y.grad

(tensor([[ 0.1679, 25.3872,  1.4410],
         [12.4639,  4.6538,  2.2392]]),
 tensor([[[ 0.3998,  0.3998,  0.3998,  0.3998],
          [60.4494, 60.4494, 60.4494, 60.4494],
          [ 3.4312,  3.4312,  3.4312,  3.4312]],
 
         [[29.6778, 29.6778, 29.6778, 29.6778],
          [11.0811, 11.0811, 11.0811, 11.0811],
          [ 5.3318,  5.3318,  5.3318,  5.3318]]]))

## Обработка данных на видео-карте

In [16]:
import torch
torch.cuda.is_available()

False

In [27]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [74]:
x = torch.randn(10, 10).to(device)
x

tensor([[ 1.0133,  2.2951,  0.1564, -0.8994,  1.2518,  0.9480,  1.4142,  0.2733,
         -1.2717, -1.7488],
        [ 1.1000,  1.4003, -1.0463, -0.8161, -0.2172, -0.5606, -0.4878, -1.4092,
         -0.8391, -0.1817],
        [ 0.7837, -1.0966, -1.5988,  0.5730,  0.7737, -0.7053,  1.2997,  2.0156,
         -0.8985,  0.2825],
        [-0.8509, -0.8276,  0.3509,  1.1644,  0.1593,  0.6787, -0.0110,  0.3005,
          0.0664,  2.1634],
        [-1.1586,  1.0047, -0.3055,  0.6105, -0.1194, -1.2009,  0.2188,  1.1019,
          0.1078, -0.2118],
        [-0.0994,  0.6950, -0.0192,  0.9584,  1.0210,  0.4380, -1.0775, -1.1662,
          0.3537,  0.0031],
        [ 0.1761, -0.4339,  1.2335,  0.4187, -0.0485,  0.2978, -1.9483,  0.4359,
         -0.0937, -0.0639],
        [ 1.2862,  1.2164,  0.6486,  0.2483,  1.1167,  0.5022, -0.4988,  0.6403,
         -1.6048, -0.2109],
        [-0.2527, -0.9390,  1.4258, -1.8111, -0.0990, -0.6578, -0.3679, -0.0316,
          1.1640,  0.1331],
        [-0.6454,  

In [77]:
x.device

device(type='cpu')

In [20]:
a = torch.randn(10000, 10000)

## Сравнение скорости работы девайсов

In [21]:
a_python = a.tolist()

In [22]:
%%time
sum(a_python[i][j] for i in range(10000) for j in range(10000))

CPU times: total: 4.17 s
Wall time: 4.17 s


6830.530867846355

In [23]:
a_numpy = a.numpy()

In [24]:
%%time
a_numpy.sum()

CPU times: total: 78.1 ms
Wall time: 67 ms


6830.537

In [25]:
%%time
a.sum()

CPU times: total: 172 ms
Wall time: 15.2 ms


tensor(6830.5298)

In [28]:
a = a.to(device)

In [29]:
%%time
a.sum()

CPU times: total: 219 ms
Wall time: 11.3 ms


tensor(6830.5298)

In [36]:
x = torch.rand(5, 5, requires_grad=True)
y = torch.rand(5, 5, requires_grad=False)

In [31]:
l = (x * y).sum()
l.backward()

In [32]:
x * y

tensor([[0.1216, 0.0869, 0.0305, 0.3752, 0.1609],
        [0.0359, 0.5004, 0.0328, 0.0988, 0.4072],
        [0.2827, 0.7174, 0.5519, 0.0791, 0.1350],
        [0.2198, 0.0013, 0.3635, 0.0623, 0.5847],
        [0.0980, 0.4371, 0.2908, 0.4955, 0.2580]], grad_fn=<MulBackward0>)

In [33]:
x

tensor([[0.3389, 0.2705, 0.5717, 0.9134, 0.1743],
        [0.5636, 0.5042, 0.2520, 0.1704, 0.6449],
        [0.9023, 0.9032, 0.8216, 0.3677, 0.3050],
        [0.8639, 0.1186, 0.4876, 0.0968, 0.8431],
        [0.3809, 0.4582, 0.2969, 0.9623, 0.7782]], requires_grad=True)

In [35]:
y.grad

In [37]:
# no grad используется, когда нет
# необходимости считать градиенты.
# например, делаем предикт
with torch.no_grad():
    l = (x * y).sum()
    l.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [39]:
with torch.inference_mode():
    l = (x * y).sum()
    l.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

# Обучение модели

In [40]:
from torchvision.datasets import MNIST
import torchvision.transforms as T

In [77]:
train_set = MNIST('.MNIST', transform=T.ToTensor(), train=True, download=True)
test_set = MNIST('.MNIST', transform=T.ToTensor(), train=False, download=True)

In [78]:
from torch.utils.data import Dataset, DataLoader

In [79]:
len(train_set)

60000

In [80]:
out = train_set[357]

In [81]:
out

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [82]:
image, label = out

In [84]:
image.shape

torch.Size([1, 28, 28])

In [57]:
train_set.classes

['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']

In [59]:
from torch import nn

In [85]:
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

In [87]:
for images, labels in train_loader:
    print(images.shape, labels.shape)
    break

torch.Size([64, 1, 28, 28]) torch.Size([64])


In [66]:
class MLP(nn.Module):
    def __init__(self, in_features, num_classes, hidden_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=hidden_size),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size, bias=False),
            nn.LeakyReLU(0.1),
            nn.Linear(in_features=hidden_size, out_features=num_classes)
        )


    def forward(self, x):
        return self.model(x)

In [109]:
IMG_SIZE = 28
NUM_CLASSES = 10
HIDDEN_SIZE = 64 
NUM_EPOCHS = 10

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = MLP (in_features=IMG_SIZE**2, num_classes=NUM_CLASSES, hidden_size=HIDDEN_SIZE).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [63]:
model.model[0].weight.shape

torch.Size([64, 784])

In [65]:
model.model[0].bias.shape

torch.Size([64])

In [64]:
model.model[0]

Linear(in_features=784, out_features=64, bias=True)

In [107]:
torch.flatten(images, start_dim=1).shape

torch.Size([64, 784])

In [110]:
for epoch in range(1, NUM_EPOCHS+1):
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        logits = model(torch.flatten(images, start_dim=1))     # model.forward() == model() 
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        print(loss.item())

2.286445379257202
2.3193089962005615
2.3108127117156982
2.301104784011841
2.3056583404541016
2.314990520477295
2.3106305599212646
2.3042776584625244
2.285606861114502
2.301708459854126
2.2817466259002686
2.2844057083129883
2.283963918685913
2.275278329849243
2.2831413745880127
2.29099440574646
2.2780637741088867
2.2716052532196045
2.2774171829223633
2.268918752670288
2.2604594230651855
2.2592368125915527
2.2571959495544434
2.2495269775390625
2.273214817047119
2.2262871265411377
2.249816656112671
2.2538535594940186
2.2541372776031494
2.2363710403442383
2.243070125579834
2.2308642864227295
2.223224639892578
2.231257438659668
2.229835271835327
2.219832181930542
2.190051794052124
2.191153049468994
2.2233827114105225
2.18151593208313
2.1982290744781494
2.175621271133423
2.1673877239227295
2.172550916671753
2.138643980026245
2.2019405364990234
2.1556313037872314
2.1478140354156494
2.1316006183624268
2.14875864982605
2.1908581256866455
2.1926426887512207
2.0397965908050537
2.038421392440796
2