In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from PIL import Image

## Простой пример сверточной сети - AlexNet

Структуру в сети задаем через Sequential

[Sequential](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html)

**Сверточный слой**

3, 64, - количество входных и выходных каналов

kernel_size - высота и ширина сверточного фильтра (можно использовать скаляр или кортеж, например (3, 5)

stride - значение в пикселях, на которое смещается сверточный фильтр. Можно кортеж, если мы хотим двигаться не только в одном измерении

padding - отступ, которым мы дополняем кадр, если не хватаетпикселов для полной проходки фильтром

[Conv2d](https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html)

**Пуллинг**

В нашем случае, макспулинг получаем максимальное значение для каждого из тензоров. Еще вариант - использовать среднее., а так-же адаптивное среднее и адаптивный максимум для данных, приводящих к разным входным тензорам.

[MaxPool2d](https://pytorch.org/docs/master/generated/torch.nn.MaxPool2d.html)

**Дропаут**
[Dropaut](https://pytorch.org/docs/master/generated/torch.nn.Dropout.html)

**Пакетная нормализация**

BathNorm снижает влияние взрывных и затухающих градиентов [подробнее](https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm2d.html)

In [28]:
class CNNNet(nn.Module):

    def __init__(self, num_classes=2):
        super(CNNNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), # двумерная свертка
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2), # пкллинг
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [29]:
cnnnet = CNNNet()

In [30]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    for epoch in range(epochs):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)
        training_loss /= len(train_loader.dataset)
        
        model.eval()
        num_correct = 0 
        num_examples = 0
        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output,targets) 
            valid_loss += loss.data.item() * inputs.size(0)
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        valid_loss /= len(val_loader.dataset)

        print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}, accuracy = {:.2f}'.format(epoch, training_loss,
        valid_loss, num_correct / num_examples))

In [32]:
def check_image(path):
    try:
        im = Image.open(path)
        return True
    except:
        return False
img_transforms = transforms.Compose([
    transforms.Resize((64,64)),    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225] )
    ])

train_data_path = "pytorch_learning/train/"
train_data = torchvision.datasets.ImageFolder(root=train_data_path,
                                              transform=img_transforms, 
                                              is_valid_file=check_image)
val_data_path = "pytorch_learning/val/"
val_data = torchvision.datasets.ImageFolder(root=val_data_path,
                                            transform=img_transforms, 
                                            is_valid_file=check_image)
# не используем
test_data_path = "pytorch_learning/test/"
test_data = torchvision.datasets.ImageFolder(root=test_data_path, 
                                             transform=img_transforms, 
                                             is_valid_file=check_image)

batch_size=64
train_data_loader = torch.utils.data.DataLoader(train_data, 
                                                batch_size=batch_size,
                                                shuffle=True)
val_data_loader  = torch.utils.data.DataLoader(val_data, 
                                               batch_size=batch_size, 
                                               shuffle=True)
test_data_loader = torch.utils.data.DataLoader(test_data, 
                                               batch_size=batch_size, 
                                               shuffle=True)

if torch.cuda.is_available():
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

In [33]:
cnnnet.to(device)
optimizer = optim.Adam(cnnnet.parameters(), lr=0.001)

In [34]:
train(cnnnet, 
      optimizer,
      torch.nn.CrossEntropyLoss(), 
      train_data_loader,
      val_data_loader, 
      epochs=10, 
      device=device)

Epoch: 0, Training Loss: 0.88, Validation Loss: 0.70, accuracy = 0.35
Epoch: 1, Training Loss: 0.69, Validation Loss: 0.74, accuracy = 0.54
Epoch: 2, Training Loss: 0.65, Validation Loss: 0.66, accuracy = 0.52
Epoch: 3, Training Loss: 0.55, Validation Loss: 0.40, accuracy = 0.84
Epoch: 4, Training Loss: 0.54, Validation Loss: 0.49, accuracy = 0.75
Epoch: 5, Training Loss: 0.50, Validation Loss: 0.42, accuracy = 0.77
Epoch: 6, Training Loss: 0.46, Validation Loss: 0.62, accuracy = 0.68
Epoch: 7, Training Loss: 0.41, Validation Loss: 0.40, accuracy = 0.80
Epoch: 8, Training Loss: 0.41, Validation Loss: 0.36, accuracy = 0.82
Epoch: 9, Training Loss: 0.36, Validation Loss: 0.56, accuracy = 0.73


In [37]:
labels = ['cat','fish']
def predictor(image_for_predict):
    img = Image.open(image_for_predict) 
    img = img_transforms(img).to(device)
    # сеть ожидает 4d тензор, т.к. используются пакеты 
    # (первое значение в тензоре - число изображений в пакете)
    # т.к. у нас пакетов нет, создаем пакет длиной 1 с помощью unsqueeze(0)
    # который добавляет новую размерность в начало тензора
    img = img.unsqueeze(0)

    prediction = F.softmax(cnnnet(img), dim=1)
    prediction = prediction.argmax()
    print(labels[prediction])

In [38]:
# cat
predictor("pytorch_learning/test/cat/2041806579_e4a7f31b32.jpg")

cat


In [39]:
# fish
predictor("pytorch_learning/test/fish/1609947018_bfa1fcd6b2.jpg")

cat


In [40]:
# fish
predictor("pytorch_learning/test/fish/2869804396_c4127ccec6.jpg")

fish


## Загрузка готовой сети из хаба

На самом деле AlexNet определен в самом pytorch и его вызов осуществляется в две строки (в данном случае мы вызываем предварительно натрененный алекснет)

Тут также доступны VGG, ResNet, Inception, DenseNet, SqueezeNet и т.д. 

[подробнее про доступные сети тут](https://pytorch.org/docs/stable/torchvision/models.html)

In [3]:
import torchvision.models as models
alexnet = models.alexnet(num_classes=1000, pretrained=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /home/konstantin/.cache/torch/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




In [5]:
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

Другой способ получить готовую модель - запросить ее на [pytorch.org/hub](https://pytorch.org/hub/)

[Подробности тут](https://pytorch.org/docs/stable/hub.html)

In [16]:
torch.hub.list('pytorch/vision', force_reload=True)

Downloading: "https://github.com/pytorch/vision/archive/master.zip" to /home/konstantin/.cache/torch/hub/master.zip


['alexnet',
 'deeplabv3_resnet101',
 'deeplabv3_resnet50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'fcn_resnet101',
 'fcn_resnet50',
 'googlenet',
 'inception_v3',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'resnet101',
 'resnet152',
 'resnet18',
 'resnet34',
 'resnet50',
 'resnext101_32x8d',
 'resnext50_32x4d',
 'shufflenet_v2_x0_5',
 'shufflenet_v2_x1_0',
 'squeezenet1_0',
 'squeezenet1_1',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vgg16_bn',
 'vgg19',
 'vgg19_bn',
 'wide_resnet101_2',
 'wide_resnet50_2']

In [17]:
resnet50 = torch.hub.load('pytorch/vision', 'resnet50')

Using cache found in /home/konstantin/.cache/torch/hub/pytorch_vision_master


In [18]:
print(resnet50)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 