In [None]:
import torch
from collections import OrderedDict

### Стандартная свёрточная сеть

In [None]:
class IncorrectKernelSizeException(Exception):
    def __init__(self, kernel_size):
        self.kernel_size = kernel_size

    def __str__(self):
        return f"Incorrect kernel size: {self.kernel_size}. It must be odd."

In [None]:
class ResidualConvBlock(torch.nn.Module):
    def __init__(
        self,
        in_c,
        out_c,
        kernel_size: int = 3,
        activation = torch.nn.ReLU,
        *, # Только именованные параметры
        use_bias = True
    ):
        
        super().__init__()
        self.activation = activation()
        
        if kernel_size % 2 != 0:
            padding_size = (kernel_size - 1) // 2
        else:
            raise IncorrectKernelSizeException(kernel_size)
            
        # print(in_c, out_c, kernel_size, padding_size, '\n---------------------------------------')
        
        self.conv = torch.nn.Conv2d(
            in_channels = in_c,
            out_channels = out_c,
            kernel_size = kernel_size,
            padding = padding_size,
            padding_mode = 'zeros',
            bias = use_bias
        )

        if in_c == out_c:
            self.in_layer = torch.nn.Identity()
        else:
            self.in_layer = torch.nn.Conv2d(
                in_channels = in_c,
                out_channels = out_c,
                kernel_size = 1
            )

    def forward(self, x):
        return self.activation(self.in_layer(x) + self.conv(x))

In [None]:
class GlobalMaxPooling(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, x):
        return x.max(-1).values.max(-1).values

In [None]:
class GlobalAvgPolling(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x.mean(-1).values.mean(-1).values

In [None]:
from collections import OrderedDict
class ResConv2d(torch.nn.Module):
    def __init__(
        self,
        classes_num: int, # Количество выходных значений. Количество клаасов для предсказания
        conv_params: list[tuple], # Список кортежей следующего вида: (кол-во блоков между пулингами, число каналов на входе/выходе блоков, размер ядра, функция активации)
        linear_params: list[tuple] = [], # Список кортежей следующего вида: (кол-во нейронов на i-ом слое, функция активации на i-ом слое). Начинаем с 2-ого слоя, заканчиваем предпоследним слоем (отсчёт с 1). (P.S. Кол-во нейронов и функция активации на входном и выхдном слоях известны и так)
        channels = 3,
        use_Softmax: bool = False
    ):

        super().__init__()

        conv_layers = []
        linear_layers = []

        # Собираем свёрточную часть

        self.channels = channels
        in_channels = channels
        for group_num, (blocks_amount, channels_num, kernel_size, activation) in enumerate(conv_params):
            if group_num == len(conv_params) - 1:
                linear_in = channels_num # Рассчитываем входной вектор линейного слоя
                
            for block_num in range(blocks_amount):
                if block_num != 0:
                    in_channels = channels_num
                    
                conv_layers.append(
                    (
                        f'ResidualBlock {group_num} {block_num}',
                        ResidualConvBlock(
                            in_c = in_channels,
                            out_c = channels_num,
                            kernel_size = kernel_size,
                            activation = activation
                        )
                    )
                )

            if group_num < len(conv_params) - 1:
                conv_layers.append(
                    (
                        f'Pooling {group_num}',
                        torch.nn.AvgPool2d(2)
                    )
                )

        # Собираем Global Max Pooling

        gm_pooling = [('Global Max Pooling', GlobalMaxPooling())]

        # Собираем линейную часть
        
        if len(linear_params) == 0:
            linear_layers.append(('Linear 0', torch.nn.Linear(linear_in, classes_num)))
            linear_layers.append(('Activation 0', torch.nn.Softmax(-1) if use_Softmax else torch.nn.LogSoftmax(-1)))

        else:
            (first_out, activation) = linear_params[0]
            linear_layers.append(('Linear 0', torch.nn.Linear(linear_in, first_out)))
            linear_layers.append(('Activation 0', activation()))
            
            for i in range(len(linear_params)):
                (in_size, _) = linear_params[i - 1]
                (out_size, activation) = linear_params[i]
                linear_layers.append((f'Linear {i + 1}', torch.nn.Linear(in_size, out_size)))
                linear_layers.append((f'Activation {i + 1}', activation()))

            (last_in, _) = linear_params[-1]
            linear_layers.append((f'Linear {len(linear_params)}', torch.nn.Linear(last_in, classes_num)))
            linear_layers.append((f'Activation {len(linear_params)}', torch.nn.Softmax(-1) if use_Softmax else torch.nn.LogSoftmax(-1)))

        #

        all_layers = conv_layers + gm_pooling + linear_layers
        self.final_model = torch.nn.Sequential(OrderedDict(all_layers))

    def forward(self, x): # bs x h x w x с
        x = x.permute(0, 3, 1, 2) if self.channels > 1 else x.unsqueeze(1) # bs x c x h x w
        return self.final_model(x)

### Генерация bbox'ов

In [None]:
class BboxGen(torch.nn.Module):
    
    # Свёртка
    # Генерируем сколько-нибудь bbox'ов для каждой (или не каждой) точки feature map'ы
    # Берём признаки этой точки и генерируем отклонения bbox'а от anchor'а
    # Отбориаем хорошие
    
    def __init__(
        self,
        channels: int,
        conv_params: list[tuple], # Список кортежей следующего вида: 
                                  # (кол-во блоков между пулингами, 
                                  # число каналов на входе/выходе блоков,
                                  # размер ядра, функция активации)
        ll_sizes: list[int],
        ll_activations: list[torch.nn.Module],
        bbox_sizes: list[int], 
        bbox_ratios: list[tuple[int, int]], # (h, w)
        min_conf: float,
        max_IoU: float
    ):
        super().__init__()

        self.channels = channels
        self.bbox_sizes = bbox_sizes
        self.bbox_ratios = bbox_ratios
        self.min_conf = min_conf
        self.max_IoU = max_IoU
        
        conv_layers = []
        in_channels = channels

        for group_num, (blocks_amount, channels_num, kernel_size, activation) in enumerate(conv_params):
            if group_num == len(conv_params) - 1:
                linear_in = channels_num

            for block_num in range(blocks_amount):
                if block_num != 0:
                    in_channels = channels_num
                    
                conv_layers.append(
                    (
                        f'ResidualBlock {group_num} {block_num}',
                        ResidualConvBlock(
                            in_c = in_channels,
                            out_c = channels_num,
                            kernel_size = kernel_size,
                            activation = activation
                        )
                    )
                )

            if group_num < len(conv_params) - 1:
                conv_layers.append(
                    (
                        f'Pooling {group_num}',
                        torch.nn.AvgPool2d(2)
                    )
                )

        self.conv = torch.nn.Sequential(OrderedDict(conv_layers))

        # 

        linear_layers = []

        linear_layers.append(('Linear 0', torch.nn.Linear(linear_in, ll_sizes[0])))
        linear_layers.append(('Activation 0', ll_activations[0]))

        for i, (in_size, out_size, activation) in enumerate(zip(ll_sizes[:-1], ll_sizes[1:], ll_activations)):
            linear_layers.append((f'Linear {i + 1}', torch.nn.Linear(in_size, out_size)))
            linear_layers.append((f'Activation {i + 1}', activation))

        self.linear = torch.nn.Sequential(OrderedDict(linear_layers))

    def forward(self, x): # bs x c x h x w
        feature_map = self.conv(x)
        feature_map.permute(0, 2, 3, 1) # bs x h x w x c
        for batch in feature_map:
            for line in batch:
                for features in line:
                    for size in self.bbox_sizes:
                        for ratio in self.bbox_ratios:
                            
                            # TODO: add different sizes and ratios of bboxes (and send them into linear too!!!)
                            # TODO: add evaluating of bboxes' conf and IoU
                            # TODO: add bool parameter that shows if we should count bbox offset or its position

In [None]:
x = torch.randn(1, 5, 9, 9)
y = x.permute(0, 2, 3, 1)
for i in y:
    for ii in i:
        for iii in ii:
            print(iii.shape)
            print(type(iii))

In [34]:
a = torch.tensor([])

Exception: a