In [1]:
import torch
import torch.nn as nn

# Define SELayer (as provided)
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

# Create a sample input tensor
batch_size, channels, height, width = 2, 64, 32, 32
x = torch.randn(batch_size, channels, height, width)  # Simulated feature map

# Instantiate SELayer
se_layer = SELayer(channel=channels, reduction=16)

# Forward pass
output = se_layer(x)

# Print shapes and sample attention weights
print(f"Input shape: {x.shape}")
print(f"Output shape: {output.shape}")

# Extract attention weights for inspection
with torch.no_grad():
    y = se_layer.avg_pool(x).view(batch_size, channels)
    y = se_layer.fc(y)
    print(f"Attention weights shape: {y.shape}")
    print(f"Sample attention weights for first sample:\n{y[0][:5]}")  # First 5 channels

Input shape: torch.Size([2, 64, 32, 32])
Output shape: torch.Size([2, 64, 32, 32])
Attention weights shape: torch.Size([2, 64])
Sample attention weights for first sample:
tensor([0.4997, 0.5000, 0.5000, 0.5002, 0.4997])


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define NonLocalBlock (as provided)
class NonLocalBlock(nn.Module):
    def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
        super(NonLocalBlock, self).__init__()
        self.sub_sample = sub_sample
        self.in_channels = in_channels
        self.inter_channels = inter_channels
        if self.inter_channels is None:
            self.inter_channels = in_channels // 2
            if self.inter_channels == 0:
                self.inter_channels = 1
        self.g = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
                           kernel_size=1, stride=1, padding=0)
        if bn_layer:
            self.W = nn.Sequential(
                nn.Conv2d(in_channels=self.inter_channels, out_channels=self.in_channels,
                          kernel_size=1, stride=1, padding=0),
                nn.BatchNorm2d(self.in_channels)
            )
            nn.init.constant_(self.W[1].weight, 0)
            nn.init.constant_(self.W[1].bias, 0)
        else:
            self.W = nn.Conv2d(in_channels=self.inter_channels, out_channels=self.in_channels,
                               kernel_size=1, stride=1, padding=0)
            nn.init.constant_(self.W.weight, 0)
            nn.init.constant_(self.W.bias, 0)
        self.theta = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
                               kernel_size=1, stride=1, padding=0)
        self.phi = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels,
                             kernel_size=1, stride=1, padding=0)
        if sub_sample:
            self.g = nn.Sequential(self.g, nn.MaxPool2d(kernel_size=(2, 2)))
            self.phi = nn.Sequential(self.phi, nn.MaxPool2d(kernel_size=(2, 2)))

    def forward(self, x):
        batch_size = x.size(0)
        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
        g_x = g_x.permute(0, 2, 1)
        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
        theta_x = theta_x.permute(0, 2, 1)
        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
        f = torch.matmul(theta_x, phi_x)
        f_div_C = F.softmax(f, dim=-1)
        y = torch.matmul(f_div_C, g_x)
        y = y.permute(0, 2, 1).contiguous()
        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
        W_y = self.W(y)
        z = W_y + x
        return z

# Create a sample input tensor
batch_size, in_channels, height, width = 2, 64, 32, 32
x = torch.randn(batch_size, in_channels, height, width)  # Simulated feature map

# Instantiate NonLocalBlock
non_local_block = NonLocalBlock(in_channels=in_channels, inter_channels=None, sub_sample=True, bn_layer=True)

# Forward pass
output = non_local_block(x)

# Print shapes and sample attention weights
print(f"Input shape: {x.shape}")
print(f"Output shape: {output.shape}")

# Extract attention map for inspection
with torch.no_grad():
    g_x = non_local_block.g(x).view(batch_size, non_local_block.inter_channels, -1)
    theta_x = non_local_block.theta(x).view(batch_size, non_local_block.inter_channels, -1).permute(0, 2, 1)
    phi_x = non_local_block.phi(x).view(batch_size, non_local_block.inter_channels, -1)
    f = torch.matmul(theta_x, phi_x)
    f_div_C = F.softmax(f, dim=-1)
    print(f"Attention map shape: {f_div_C.shape}")
    print(f"Sample attention weights (first 5x5 for first sample):\n{f_div_C[0, :5, :5]}")

Input shape: torch.Size([2, 64, 32, 32])
Output shape: torch.Size([2, 64, 32, 32])
Attention map shape: torch.Size([2, 1024, 256])
Sample attention weights (first 5x5 for first sample):
tensor([[0.0052, 0.0056, 0.0017, 0.0021, 0.0009],
        [0.0016, 0.0070, 0.0040, 0.0005, 0.0004],
        [0.0097, 0.0216, 0.0051, 0.0020, 0.0051],
        [0.0087, 0.0036, 0.0016, 0.0019, 0.0107],
        [0.0001, 0.0020, 0.0030, 0.0100, 0.0002]])


In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define HPPF (as provided)
class HPPF(nn.Module):
    def __init__(self, in_channels):
        super(HPPF, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels, in_channels // 16, 1, 1), nn.ReLU(inplace=True))
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels, in_channels // 64, 1, 1), nn.ReLU(inplace=True))
        self.avg   = nn.AdaptiveAvgPool2d(1)
        self.max1  = nn.AdaptiveMaxPool2d(4)
        self.max2  = nn.AdaptiveMaxPool2d(8)
        self.mlp   = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // 8, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels // 8, in_channels, kernel_size=1),
            nn.Sigmoid())
        self.feat_conv = nn.Sequential(nn.Conv2d(in_channels, in_channels // 3, 3, 1, 1),
                                       nn.BatchNorm2d(in_channels // 3),
                                       nn.ReLU(inplace=True))

    def forward(self, x1, x2, x3):
        print(f'x1.shape: {x1.shape}, x2.shape: {x2.shape}, x3.shape: {x3.shape}')
        x2 = F.interpolate(x2, size=x1.size()[2:], mode='bilinear', align_corners=True)
        print(f'x2.shape: {x2.shape}')
        x3 = F.interpolate(x3, size=x1.size()[2:], mode='bilinear', align_corners=True)
        print(f'x3.shape: {x3.shape}')
        feat = torch.cat((x1, x2, x3), 1)
        print(f'feat.shape: {feat.shape}')
        b, c, h, w = feat.size()
        y1 = self.avg(feat)
        print(f"y1.shape: {y1.shape}")
        y2 = self.conv1(self.max1(feat)) # (2, 192, 4, 4)
        print(f"y2.shape before reshape: {y2.shape}")
        y3 = self.conv2(self.max2(feat))
        print(f"y3.shape before reshape: {y3.shape}")
        y2 = y2.reshape(b, c, 1, 1)
        print(f"y2.shape after reshape: {y2.shape}")
        y3 = y3.reshape(b, c, 1, 1)
        print(f"y3.shape after reshape: {y3.shape}")
        z = (y1 + y2 + y3) // 3
        print(f'z.shape: {z.shape}')
        attention = self.mlp(z)
        print(f"attention.shape: {attention.shape}")
        output1 = attention * feat
        print(f"output1.shape: {output1.shape}")
        output2 = self.feat_conv(output1)
        print(f'output2.shape: {output2.shape}')
        return output2

# Create sample input tensors
batch_size, in_channels, height, width = 2, 3, 352, 352
x1 = torch.randn(batch_size, in_channels, height, width)  # Feature map at highest resolution
x2 = torch.randn(batch_size, in_channels, height//2, width//2)  # Lower resolution
x3 = torch.randn(batch_size, in_channels, height//4, width//4)  # Lowest resolution

# Instantiate HPPF
hppf = HPPF(in_channels=in_channels)

# Forward pass
output = hppf(x1, x2, x3)

# Extract attention weights for inspection
with torch.no_grad():
    x2_up = F.interpolate(x2, size=x1.size()[2:], mode='bilinear', align_corners=True)
    x3_up = F.interpolate(x3, size=x1.size()[2:], mode='bilinear', align_corners=True)
    feat = torch.cat((x1, x2_up, x3_up), 1)
    y1 = hppf.avg(feat)
    y2 = hppf.conv1(hppf.max1(feat)).reshape(batch_size, 3 * in_channels, 1, 1)
    y3 = hppf.conv2(hppf.max2(feat)).reshape(batch_size, 3 * in_channels, 1, 1)
    z = (y1 + y2 + y3) // 3
    attention = hppf.mlp(z)
    print(f"Attention weights shape: {attention.shape}")
    print(f"Sample attention weights (first 5 channels):\n{attention[0, :5, 0, 0]}")

x1.shape: torch.Size([2, 3, 352, 352]), x2.shape: torch.Size([2, 3, 176, 176]), x3.shape: torch.Size([2, 3, 88, 88])
x2.shape: torch.Size([2, 3, 352, 352])
x3.shape: torch.Size([2, 3, 352, 352])
feat.shape: torch.Size([2, 9, 352, 352])
y1.shape: torch.Size([2, 9, 1, 1])


RuntimeError: Given groups=1, expected weight to be at least 1 at dimension 0, but got weight of size [0, 3, 1, 1] instead