Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions print_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ultralytics.nn import SegmentationModel

model = SegmentationModel(r"ultralytics\cfg\models\11\FS-ITMR.yaml",ch=13,nc=1)
print(model)
model = SegmentationModel(r"ultralytics\cfg\models\11\FS-ITMR.yaml", ch=13, nc=1)
print(model)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ dev = [
"minijinja>=2.0.0", # render docs macros without mkdocs-macros-plugin
]
export = [
"numpy<2.0.0", # TF 2.20 compatibility
"numpy<3.0.0", # TF 2.20 compatibility
"onnx>=1.12.0; platform_system != 'Darwin'", # ONNX export
"onnx>=1.12.0,<1.18.0; platform_system == 'Darwin'", # TF inference hanging on MacOS (tested up to onnx==1.20.0)
"onnxslim>=0.1.80",
Expand Down
2 changes: 1 addition & 1 deletion ultralytics/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
"parse_model",
"torch_safe_load",
"yaml_model_load",
)
)
21 changes: 10 additions & 11 deletions ultralytics/nn/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@
from .block import (
C1,
C2,
C2PDS,
C2PSA,
C3,
C3TR,
CIB,
DFL,
ELAN1,
IN,
PSA,
S2AFM,
SPP,
SPPELAN,
SPPF,
Expand All @@ -52,18 +55,15 @@
HGStem,
ImagePoolingAttn,
MaxSigmoidAttnBlock,
MultiIn,
Proto,
RepC3,
RepNCSPELAN4,
RepVGGDW,
ResNetLayer,
SCDown,
SPDConv,
TorchVision,
IN,
MultiIn,
S2AFM,
C2PDS,
SPDConv
)
from .conv import (
CBAM,
Expand Down Expand Up @@ -107,26 +107,23 @@
TransformerLayer,
)


__all__ = (
"IN",
"MultiIn",
"S2AFM",
"C2PDS",
"SPDConv",
"AIFI",
"C1",
"C2",
"C2PDS",
"C2PSA",
"C3",
"C3TR",
"CBAM",
"CIB",
"DFL",
"ELAN1",
"IN",
"MLP",
"OBB",
"PSA",
"S2AFM",
"SPP",
"SPPELAN",
"SPPF",
Expand Down Expand Up @@ -171,6 +168,7 @@
"MLPBlock",
"MSDeformAttn",
"MaxSigmoidAttnBlock",
"MultiIn",
"Pose",
"Proto",
"RTDETRDecoder",
Expand All @@ -180,6 +178,7 @@
"RepVGGDW",
"ResNetLayer",
"SCDown",
"SPDConv",
"Segment",
"SpatialAttention",
"TorchVision",
Expand Down
123 changes: 43 additions & 80 deletions ultralytics/nn/modules/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
__all__ = (
"C1",
"C2",
"C2PDS",
"C2PSA",
"C3",
"C3TR",
"CIB",
"DFL",
"ELAN1",
"IN",
"PSA",
"S2AFM",
"SPP",
"SPPELAN",
"SPPF",
Expand All @@ -45,20 +48,18 @@
"HGBlock",
"HGStem",
"ImagePoolingAttn",
"MultiIn",
"Proto",
"RepC3",
"RepNCSPELAN4",
"RepVGGDW",
"ResNetLayer",
"SCDown",
"TorchVision",
"IN",
"MultiIn",
"S2AFM",
"SPDConv",
"C2PDS"
"TorchVision",
)


class IN(nn.Module):
def __init__(self):
super().__init__()
Expand All @@ -74,13 +75,14 @@ def __init__(self, img_idx=1, img1_bands=3):
self.img1_bands = img1_bands

def forward(self, x):
x1, x2 = x[:, :self.img1_bands, :, :], x[:, self.img1_bands:, :, :]
x1, x2 = x[:, : self.img1_bands, :, :], x[:, self.img1_bands :, :, :]
if self.img_idx == 1:
x = x1
else:
x = x2
return x


class AFA(nn.Module):
def __init__(self, channels, reduction=16):
super().__init__()
Expand All @@ -93,9 +95,7 @@ def __init__(self, channels, reduction=16):

def forward(self, f_rgb, f_msi):
# 上采样 MSI
f_msi_up = F.interpolate(
f_msi, size=f_rgb.shape[2:], mode='bilinear', align_corners=False
)
f_msi_up = F.interpolate(f_msi, size=f_rgb.shape[2:], mode="bilinear", align_corners=False)

rgb_conv = self.conv_rgb(f_rgb)
msi_conv = self.conv_msi(f_msi_up)
Expand Down Expand Up @@ -132,7 +132,7 @@ def __init__(self, channels):
self.k_msi = nn.Conv2d(channels, channels, 1)
self.v_msi = nn.Conv2d(channels, channels, 1)

self.scale = channels ** -0.5
self.scale = channels**-0.5

def forward(self, f_rgb, f_msi):
B, C, H, W = f_rgb.shape
Expand All @@ -146,23 +146,20 @@ def reshape(x):
k_msi = reshape(self.k_msi(f_msi))
v_msi = reshape(self.v_msi(f_msi))

attn_rgb = torch.softmax(
torch.bmm(q_rgb, k_msi.transpose(1, 2)) * self.scale, dim=-1
)
attn_rgb = torch.softmax(torch.bmm(q_rgb, k_msi.transpose(1, 2)) * self.scale, dim=-1)
f_rgb_hat = torch.bmm(attn_rgb, v_msi).permute(0, 2, 1).view(B, C, H, W)

# MSI ← RGB
q_msi = reshape(self.q_msi(f_msi))
k_rgb = reshape(self.k_rgb(f_rgb))
v_rgb = reshape(self.v_rgb(f_rgb))

attn_msi = torch.softmax(
torch.bmm(q_msi, k_rgb.transpose(1, 2)) * self.scale, dim=-1
)
attn_msi = torch.softmax(torch.bmm(q_msi, k_rgb.transpose(1, 2)) * self.scale, dim=-1)
f_msi_hat = torch.bmm(attn_msi, v_rgb).permute(0, 2, 1).view(B, C, H, W)

return f_rgb_hat, f_msi_hat


class CA(nn.Module):
def __init__(self, channels, reduction=16):
super().__init__()
Expand All @@ -183,6 +180,7 @@ def mlp(x):
w = torch.sigmoid(mlp(gap) + mlp(gmp))
return f * w


class S2AFM(nn.Module):
def __init__(self, channels):
super().__init__()
Expand Down Expand Up @@ -210,17 +208,16 @@ def forward(self, x: list[torch.Tensor]):


class PDSConv(nn.Module):
"""
Partial Depthwise Separable Convolution (PDSConv)
"""
"""Partial Depthwise Separable Convolution (PDSConv)."""

def __init__(
self,
in_channels,
out_channels,
kernel_size=3,
stride=1,
ratio_d=0.5, # 深度可分离卷积分支比例
ratio_s=0.25 # 标准卷积分支比例
ratio_d=0.5, # 深度可分离卷积分支比例
ratio_s=0.25, # 标准卷积分支比例
):
super().__init__()
padding = kernel_size // 2
Expand All @@ -236,55 +233,35 @@ def __init__(

# 深度可分离卷积分支(DWConv)
self.dw_conv = nn.Conv2d(
c_d, c_d,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=c_d,
bias=False
c_d, c_d, kernel_size=kernel_size, stride=stride, padding=padding, groups=c_d, bias=False
)

# 标准卷积分支(Conv)
self.std_conv = nn.Conv2d(
c_s, c_s,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=False
)
self.std_conv = nn.Conv2d(c_s, c_s, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)

# 逐点卷积(PWConv)
self.pw_conv = nn.Conv2d(
c_d + c_s + c_i,
out_channels,
kernel_size=1,
bias=False
)
self.pw_conv = nn.Conv2d(c_d + c_s + c_i, out_channels, kernel_size=1, bias=False)

self.bn = nn.BatchNorm2d(out_channels)
self.act = nn.ReLU(inplace=True)

def forward(self, x):
# 通道拆分
x_d, x_s, x_i = torch.split(
x, [self.c_d, self.c_s, self.c_i], dim=1
)
x_d, x_s, x_i = torch.split(x, [self.c_d, self.c_s, self.c_i], dim=1)

# 三分支计算
y_d = self.dw_conv(x_d) if self.c_d > 0 else None
y_s = self.std_conv(x_s) if self.c_s > 0 else None
y_i = x_i # Identity

# 拼接
y = torch.cat(
[t for t in (y_d, y_s, y_i) if t is not None],
dim=1
)
y = torch.cat([t for t in (y_d, y_s, y_i) if t is not None], dim=1)

# PWConv + BN + ReLU
y = self.act(self.bn(self.pw_conv(y)))
return y


class SPDConv(nn.Module):
def __init__(self, in_channels, out_channels, scale=2, kernel_size=3, act=True):
super().__init__()
Expand Down Expand Up @@ -1351,9 +1328,7 @@ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int =


class C2PDS(C2f):
"""
Cross Stage Partial with Partial Depthwise Separable Convolution (C2PDS)
"""
"""Cross Stage Partial with Partial Depthwise Separable Convolution (C2PDS)."""

def __init__(
self,
Expand All @@ -1366,53 +1341,41 @@ def __init__(
shortcut: bool = True,
ratio_d: float = 0.5,
ratio_s: float = 0.25,
k: int = 3
k: int = 3,
):
super().__init__(c1, c2, n, shortcut, g, e)

self.m = nn.ModuleList(
C3PDS(
self.c,
self.c,
n=2,
shortcut=shortcut,
g=g,
e=1.0,
k=k,
ratio_d=ratio_d,
ratio_s=ratio_s
)
C3PDS(self.c, self.c, n=2, shortcut=shortcut, g=g, e=1.0, k=k, ratio_d=ratio_d, ratio_s=ratio_s)
if c3k
else PDSConv(
in_channels=self.c,
out_channels=self.c,
kernel_size=k,
stride=1,
ratio_d=ratio_d,
ratio_s=ratio_s
in_channels=self.c, out_channels=self.c, kernel_size=k, stride=1, ratio_d=ratio_d, ratio_s=ratio_s
)
for _ in range(n)
)


class C3PDS(C3):
"""
C3k with PDSConv replacing Bottleneck
"""
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5, k: int = 3, ratio_d: float = 0.5, ratio_s: float = 0.25):
"""C3k with PDSConv replacing Bottleneck."""

def __init__(
self,
c1: int,
c2: int,
n: int = 1,
shortcut: bool = True,
g: int = 1,
e: float = 0.5,
k: int = 3,
ratio_d: float = 0.5,
ratio_s: float = 0.25,
):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels

self.m = nn.Sequential(
*(
PDSConv(
in_channels=c_,
out_channels=c_,
kernel_size=k,
stride=1,
ratio_d=ratio_d,
ratio_s=ratio_s
)
PDSConv(in_channels=c_, out_channels=c_, kernel_size=k, stride=1, ratio_d=ratio_d, ratio_s=ratio_s)
for _ in range(n)
)
)
Expand Down
Loading