In [20]:
import torch
import torch.nn as nn

class DecoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DecoderBlock, self).__init__()
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.decoder(x)

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        # 解码器部分
        self.up1 = DecoderBlock(1024, 512)  # 输入大小20x20，输出大小40x40
        self.up2 = DecoderBlock(512, 256)   # 输入大小40x40，输出大小80x80
        self.up3 = DecoderBlock(256, 128)   # 输入大小80x80，输出大小160x160
        self.up4 = DecoderBlock(128, 64)    # 输入大小160x160，输出大小320x320
        self.up5 = nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1)  # 上采样至640x640，通道数为3

    def forward(self, x):
        x = self.up1(x)
        x = self.up2(x)
        x = self.up3(x)
        x = self.up4(x)
        x = self.up5(x)
        return x

# 创建实例
decoder_model = Decoder()

# 输入张量
input_tensor = torch.randn(1, 1024, 20, 20)  # 假设为批大小为1的张量

# 前向传播
output_tensor = decoder_model(input_tensor)

print(output_tensor.shape)  # 输出大小为 torch.Size([1, 3, 640, 640])


torch.Size([1, 3, 640, 640])


In [21]:
import torch
import torch.nn as nn

from nets.backbone import Backbone, Multi_Concat_Block, Conv, SiLU, Transition_Block, autopad

class DecoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DecoderBlock, self).__init__()
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.decoder(x)
class YoloUnet(nn.Module):
    def __init__(self):
        super(YoloUnet, self).__init__()
        self.backbone = Backbone(32, 32, 4, phi='l')
        self.up1 = DecoderBlock(1024, 512)  # 输入大小20x20，输出大小40x40
        self.up2 = DecoderBlock(512, 256)  # 输入大小40x40，输出大小80x80
        self.up3 = DecoderBlock(256, 128)  # 输入大小80x80，输出大小160x160
        self.up4 = DecoderBlock(128, 64)  # 输入大小160x160，输出大小320x320
        self.up5 = nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1)  # 上采样至640x640，通道数为3


    def forward(self, x):
        feat1, feat2, feat3 = self.backbone(x)
        # 80	80	512
        # 40	40	1024
        # 20    20	1024
        print("经过backbone：",feat3.shape)
        x=feat3;
        x = self.up1(x)
        x = self.up2(x)
        x = self.up3(x)
        x = self.up4(x)
        x = self.up5(x)
        out = x
        return out


decoder_model = YoloUnet()
x = torch.randn(1, 3, 640, 640)
print("x.shape:", x.shape)
out1 = YoloUnet()(x)
print("out1.shape:", out1.shape)

x.shape: torch.Size([1, 3, 640, 640])
经过backbone： torch.Size([1, 1024, 20, 20])
out1.shape: torch.Size([1, 3, 640, 640])


In [4]:
import torch
import torch.nn as nn
def getMask(x,maskRatio):
    tempx = torch.torch.flatten(x,start_dim=1,end_dim=-1)     #变成[batch,640*640*3]
    b,l = tempx.shape
    noise = torch.randn(b,l,device=x.device)
    ids_sort = torch.argsort(noise,dim=1)
    ids_restore = torch.argsort(ids_sort,dim=1)
    mask = torch.zeros(b,l).to(x.device)
    len_keep = l -int(l*maskRatio)
    mask[:,:len_keep]=1
    mask = torch.gather(mask,dim=1,index=ids_restore)
    # maskedX = torch.mul(tempx,mask).unsqueeze(dim=1)
    return mask
x = torch.randn(3,640,640,3)
print(x.shape)
mask = getMask(x,0.3)
mask=mask.reshape(3,640,640,-1)
print(mask.shape)    #[batch,640*640*3]

torch.Size([3, 640, 640, 3])
torch.Size([3, 640, 640, 3])
