In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

from neck.attention_bifpn import BiFPN
from heads.dynamic_head import DynamicHead

In [3]:
torch.cuda.is_available()

True

In [4]:
bifpn = BiFPN(
    backbone='resnet18',
    out_channels=256,
    num_layers=2,
    use_lateral_cbam=True,
    use_td_cbam=True,
    use_skconv=True,
    use_aac=True
    )

In [5]:
x = torch.randn(1, 3, 640, 480)
y = bifpn(x)
# for i in y:
#     print(i.shape)
#     print(i.requires_grad)
#     print()

In [6]:
y

(tensor([[[[ 1.2612e-02,  1.1865e-02,  1.2128e-02,  ...,  1.4165e-02,
             5.5325e-03,  9.7289e-03],
           [ 1.1071e-02,  9.2240e-03,  9.4279e-03,  ...,  8.1283e-03,
            -8.0190e-04,  8.4616e-03],
           [ 1.0692e-02,  9.8105e-03,  1.3091e-02,  ...,  5.0467e-03,
             1.8950e-03,  1.1640e-02],
           ...,
           [ 1.0930e-02,  6.2795e-03,  1.0500e-03,  ...,  1.2371e-02,
             4.9634e-03,  8.0262e-03],
           [ 9.6139e-03,  6.3554e-03,  4.3816e-03,  ...,  7.1352e-03,
             5.0257e-03,  9.4785e-03],
           [ 9.6591e-03,  7.7190e-03,  7.0970e-03,  ...,  4.1322e-03,
             9.3570e-03,  9.9227e-03]],
 
          [[ 4.3538e-03,  3.6122e-03,  2.3658e-03,  ...,  5.2624e-03,
             4.9462e-03,  4.4635e-03],
           [ 5.0158e-03,  5.3370e-03,  3.5851e-03,  ...,  3.4328e-03,
             7.2592e-03,  3.8904e-03],
           [ 3.2967e-03,  1.5093e-03,  3.5654e-03,  ...,  3.1109e-03,
             1.0496e-02,  6.9217e-03],


In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
bifpn.to(device)
bifpn.train()

dummy_input = torch.randn(1, 3, 640, 480, device=device)
n_runs = 20

for _ in range(10):
    _ = bifpn(dummy_input)

# Запускаем профайлер
with torch.profiler.profile(
    activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
    record_shapes=True,      # Записывать информацию о формах тензоров
    profile_memory=True,     # Отслеживать использование памяти
    with_stack=True           # Сохранять стек вызовов
) as prof:
    with torch.profiler.record_function("model_inference"):
        for _ in range(n_runs):
            out = bifpn(dummy_input)
            if device.type == 'cuda':
                torch.cuda.synchronize()  # Для корректного измерения на GPU

# Выводим сводную таблицу по операциям, сортируя по времени выполнения на CUDA (если GPU)
print(prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=20))

# Если хотите сохранить результаты для визуализации в TensorBoard:
prof.export_chrome_trace("trace.json")

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                  Torch-Compiled Region        55.53%        6.108s        69.14%        7.606s      63.380ms        6.080s        55.28%        8.280s      69.002ms           0 b      -3.28 Kb      19.21 Mb    -714.00 K

In [3]:
timm.list_models()

['aimv2_1b_patch14_224',
 'aimv2_1b_patch14_336',
 'aimv2_1b_patch14_448',
 'aimv2_3b_patch14_224',
 'aimv2_3b_patch14_336',
 'aimv2_3b_patch14_448',
 'aimv2_huge_patch14_224',
 'aimv2_huge_patch14_336',
 'aimv2_huge_patch14_448',
 'aimv2_large_patch14_224',
 'aimv2_large_patch14_336',
 'aimv2_large_patch14_448',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'beitv2_base_patch16_224',
 'beitv2_large_patch16_224',
 'botnet26t_256',
 'botnet50ts_256',
 'caformer_b36',
 'caformer_m36',
 'caformer_s18',
 'caformer_s36',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_medium',
 'coat_lite_medium_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_small',
 'coat_tiny',
 'coatnet_0_224',
 'coatnet_0_rw_224',
 'coa

In [37]:
model = timm.create_model(
    # 'resnet50',
    'convnextv2_base',
    # 'tf_efficientnetv2_s.in21k_ft_in1k',
    # 'efficientvit_b0',
    features_only=True,
    out_indices=(-4, -3, -2, -1),  # C2, C3, C4, C5
    pretrained=True
)

In [38]:
# Пример входных данных (batch_size=1, 3 канала, 224x224)
x = torch.randn(1, 3, 224, 224)

# Получаем признаки
features = model(x)  # Возвращает список [C2, C3, C4, C5]
print([f.shape for f in features])  # Пример: [torch.Size([1, 256, 56, 56]), ...]

[torch.Size([1, 128, 56, 56]), torch.Size([1, 256, 28, 28]), torch.Size([1, 512, 14, 14]), torch.Size([1, 1024, 7, 7])]


In [39]:
model.feature_info.channels()

[128, 256, 512, 1024]

In [15]:
class FPN(nn.Module):
    def __init__(self, backbone_name='resnet50', out_channels=256):
        super().__init__()
        # Создаем бэкбон с выводом C2-C5
        self.backbone = timm.create_model(
            backbone_name,
            features_only=True,
            out_indices=(1, 2, 3, 4),  # C2, C3, C4, C5
            pretrained=True
        )
        # Получаем количество каналов для каждого уровня
        self.channels = self.backbone.feature_info.channels()
        
        # Lateral 1x1 convolutions для выравнивания каналов
        self.lateral_c2 = nn.Conv2d(self.channels[0], out_channels, kernel_size=1)
        self.lateral_c3 = nn.Conv2d(self.channels[1], out_channels, kernel_size=1)
        self.lateral_c4 = nn.Conv2d(self.channels[2], out_channels, kernel_size=1)
        self.lateral_c5 = nn.Conv2d(self.channels[3], out_channels, kernel_size=1)
        
        # Upsampling
        self.up = nn.Upsample(scale_factor=2, mode='nearest')
        
        # 3x3 convolutions для сглаживания
        self.smooth_p2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.smooth_p3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.smooth_p4 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
    
    def forward(self, x):
        # Получаем признаки C2-C5 из бэкбона
        c2, c3, c4, c5 = self.backbone(x)
        
        # Top-down pathway и lateral connections
        p5 = self.lateral_c5(c5)
        p4 = self.lateral_c4(c4) + self.up(p5)
        p4 = self.smooth_p4(p4)
        
        p3 = self.lateral_c3(c3) + self.up(p4)
        p3 = self.smooth_p3(p3)
        
        p2 = self.lateral_c2(c2) + self.up(p3)
        p2 = self.smooth_p2(p2)
        
        return p2, p3, p4, p5

In [16]:
class PANet(nn.Module):
    def __init__(self, backbone_name='resnet50', out_channels=256):
        super().__init__()
        # Инициализация FPN
        self.fpn = FPN(backbone_name, out_channels)
        
        # Дополнительные свёртки для Bottom-Up Pathway
        self.down_conv_p2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1)
        self.down_conv_p3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1)
        self.down_conv_p4 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1)
    
    def forward(self, x):
        # Получаем признаки из FPN
        p2, p3, p4, p5 = self.fpn(x)
        
        # Bottom-Up Pathway (передача признаков вниз)
        n2 = p2
        n3 = p3 + self.down_conv_p2(n2)
        n4 = p4 + self.down_conv_p3(n3)
        n5 = p5 + self.down_conv_p4(n4)
        
        return n2, n3, n4, n5

In [17]:
class BiFPN(nn.Module):
    def __init__(
        self,
        backbone_name='resnet50',
        out_channels=256,
        num_layers=1
    ):
        super().__init__()
        # Инициализация бэкбона через timm
        self.backbone = timm.create_model(
            backbone_name,
            features_only=True,
            out_indices=(1, 2, 3, 4),  # C2-C5 для ResNet
            pretrained=True
        )
        
        # Получаем список каналов из feature_info
        self.in_channels_list = self.backbone.feature_info.channels()
        
        # Адаптационные свёртки
        self.lateral_p2 = nn.Conv2d(self.in_channels_list[0], out_channels, 1)
        self.lateral_p3 = nn.Conv2d(self.in_channels_list[1], out_channels, 1)
        self.lateral_p4 = nn.Conv2d(self.in_channels_list[2], out_channels, 1)
        self.lateral_p5 = nn.Conv2d(self.in_channels_list[3], out_channels, 1)
        
        # BiFPN параметры
        self.num_layers = num_layers
        self.weights_top_down = nn.ParameterList([
            nn.Parameter(torch.ones(2, dtype=torch.float32)) for _ in range(3)
        ])
        self.weights_bottom_up = nn.ParameterList([
            nn.Parameter(torch.ones(2, dtype=torch.float32)) for _ in range(3)
        ])
        
        # Обрабатывающие свёртки
        self.conv_p2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.conv_p3 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.conv_p4 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.conv_p5 = nn.Conv2d(out_channels, out_channels, 3, padding=1)

    def forward(self, x):
        # Извлекаем признаки из бэкбона
        features = self.backbone(x)
        p2 = self.lateral_p2(features[0])
        p3 = self.lateral_p3(features[1])
        p4 = self.lateral_p4(features[2])
        p5 = self.lateral_p5(features[3])
        
        # BiFPN обработка
        for _ in range(self.num_layers):
            # Top-down pathway
            p5_td = p5
            p4_td = self._weighted_sum(
                p4,
                F.interpolate(p5_td, size=p4.shape[2:], mode='nearest'),
                self.weights_top_down[0]
            )
            p3_td = self._weighted_sum(
                p3,
                F.interpolate(p4_td, size=p3.shape[2:], mode='nearest'),
                self.weights_top_down[1]
            )
            p2_td = self._weighted_sum(
                p2,
                F.interpolate(p3_td, size=p2.shape[2:], mode='nearest'),
                self.weights_top_down[2]
            )
            
            # Bottom-up pathway
            p2_out = self.conv_p2(p2_td)
            p3_out = self.conv_p3(self._weighted_sum(
                p3_td,
                F.interpolate(p2_out, size=p3_td.shape[2:], mode='nearest'),
                self.weights_bottom_up[2]
            ))
            p4_out = self.conv_p4(self._weighted_sum(
                p4_td,
                F.interpolate(p3_out, size=p4_td.shape[2:], mode='nearest'),
                self.weights_bottom_up[1]
            ))
            p5_out = self.conv_p5(self._weighted_sum(
                p5_td,
                F.interpolate(p4_out, size=p5_td.shape[2:], mode='nearest'),
                self.weights_bottom_up[0]
            ))
            
            p2, p3, p4, p5 = p2_out, p3_out, p4_out, p5_out
        
        return p2_out, p3_out, p4_out, p5_out

    def _weighted_sum(self, x, y, weights):
        w = torch.softmax(weights, dim=0)
        return w[0] * x + w[1] * y

In [None]:
class CBAM(nn.Module):
    def __init__(self, channels, ratio=16):
        super().__init__()
        self.channel_att = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels//ratio, 1),
            nn.ReLU(),
            nn.Conv2d(channels//ratio, channels, 1),
            nn.Sigmoid()
        )
        self.spatial_att = nn.Sequential(
            nn.Conv2d(2, 1, kernel_size=7, padding=3),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        channel_mask = self.channel_att(x)
        x = x * channel_mask
        
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out = torch.max(x, dim=1, keepdim=True)[0]
        spatial_input = torch.cat([avg_out, max_out], dim=1)
        spatial_mask = self.spatial_att(spatial_input)
        x = x * spatial_mask
        return x

In [19]:
x = torch.randn(1, 3, 224, 224)
fpn = FPN()
p2, p3, p4, p5 = fpn(x)
print(p2.shape, p3.shape, p4.shape, p5.shape)

torch.Size([1, 256, 56, 56]) torch.Size([1, 256, 28, 28]) torch.Size([1, 256, 14, 14]) torch.Size([1, 256, 7, 7])


In [20]:
panet = PANet()
n2, n3, n4, n5 = panet(x)
print(n2.shape, n3.shape, n4.shape, n5.shape)

torch.Size([1, 256, 56, 56]) torch.Size([1, 256, 28, 28]) torch.Size([1, 256, 14, 14]) torch.Size([1, 256, 7, 7])


In [21]:
bifpn = BiFPN(backbone_name='resnet50', out_channels=256, num_layers=2)
x = torch.randn(1, 3, 224, 224)
b2, b3, b4, b5 = bifpn(x)
print(b2.shape, b3.shape, b4.shape, b5.shape)

torch.Size([1, 256, 56, 56]) torch.Size([1, 256, 28, 28]) torch.Size([1, 256, 14, 14]) torch.Size([1, 256, 7, 7])


In [22]:
cbam = CBAM(256)
cbam(p2).shape

torch.Size([1, 256, 56, 56])