/
model.py
138 lines (121 loc) · 5.81 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import torch
import torch.nn as nn
class ResNet(nn.Module):
r"""Resnet layers implementation summarized in the paper (layer 2~3)"""
def __init__(self, in_feature, hid_feature, out_feature, repeat=8, k_size=5, stride=1, pad=2):
super(ResNet, self).__init__()
# layer 1
self.layer_top = nn.Sequential(
nn.Conv2d(in_feature, hid_feature, kernel_size=k_size, stride=stride, padding=pad, bias=False),
nn.BatchNorm2d(hid_feature),
nn.ReLU(inplace=True), )
# layer 2~17
self.layer_middles = nn.ModuleList()
for i in range(repeat):
self.layer_middles.append(nn.Sequential(
# layer 2
nn.Conv2d(hid_feature, hid_feature, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(hid_feature),
nn.ReLU(inplace=True),
# layer 3
nn.Conv2d(hid_feature, hid_feature, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(hid_feature),
nn.ReLU(inplace=True), ))
# layer 18
self.layer_bottom = nn.Conv2d(hid_feature, out_feature, kernel_size=3, stride=1, padding=1)
def forward(self, x):
x = self.layer_top(x)
for layer in self.layer_middles:
x = layer(x) + x
x = self.layer_bottom(x)
return x
class MCDLNet(nn.Module):
def __init__(self, n=32, d=160):
super(MCDLNet, self).__init__()
self.n = n
self.d = d
# Weight volume generation
# layer 1~18
self.resnet1 = ResNet(1, n, n, stride=2)
# layer 19~20
self.attention = nn.Sequential(
nn.Conv3d(2 * n, n, kernel_size=1),
nn.Sigmoid(),
nn.Conv3d(n, 1, kernel_size=1),
nn.Sigmoid(), )
self.regulation = nn.ModuleList()
# layer 21~22
self.regulation.append(nn.Sequential(
nn.Conv3d(2 * n, n, kernel_size=3, padding=1, bias=False),
nn.BatchNorm3d(n),
nn.ReLU(inplace=True),
nn.Conv3d(n, n, kernel_size=3, padding=1, bias=False),
nn.BatchNorm3d(n),
nn.ReLU(inplace=True), ))
# layer 23~25, 26~28, 29~31, 32~34
for i in range(4):
self.regulation.append(nn.Sequential(
nn.Conv3d(n if i == 0 else 2 * n, 2 * n, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm3d(2 * n),
nn.ReLU(inplace=True),
nn.Conv3d(2 * n, 2 * n, kernel_size=3, padding=1, bias=False),
nn.BatchNorm3d(2 * n),
nn.ReLU(inplace=True),
nn.Conv3d(2 * n, 2 * n, kernel_size=3, padding=1, bias=False),
nn.BatchNorm3d(2 * n),
nn.ReLU(inplace=True), ))
# layer 35, 36, 37, 38
for i in range(4):
self.regulation.append(nn.Sequential(
nn.ConvTranspose3d(2 * n, n if i == 3 else 2 * n, kernel_size=3, stride=2, padding=0, bias=False),
nn.BatchNorm3d(n if i == 3 else 2 * n),
nn.ReLU(inplace=True), ))
# layer 39
self.regulation.append(nn.ConvTranspose3d(n, 1, kernel_size=3, stride=2, padding=0, bias=False))
# Color residue joint learning
self.resnet2 = ResNet(1, n, n)
self.resnet3 = ResNet(1, n, n)
self.resnet4 = ResNet(2 * n, n, 1)
def forward(self, mono_img, color_img_y, color_img_c):
# Weight volume generation
F_Y = self.resnet1(mono_img)
F_YR = self.resnet1(color_img_y)
V_FR = torch.zeros(F_Y.shape + (self.d // 2,), device=F_Y.device) # NCHWD
for i in range(self.d // 2):
V_FR[:, :, :, i:, i] = F_YR if i == 0 else F_YR[:, :, :, :-i] # grad_fn=<CopySlices>
V_F = torch.cat([F_Y.unsqueeze(dim=-1).expand_as(V_FR), V_FR], dim=1)
A = self.attention(V_F)
V_A = torch.cat([V_F[:, :self.n], V_F[:, self.n:] * A], dim=1)
W_22 = self.regulation[0](V_A) # torch.Size([1, 32, 128, 256, 80])
W_25 = self.regulation[1](W_22) # torch.Size([1, 64, 64, 128, 40])
W_28 = self.regulation[2](W_25) # torch.Size([1, 64, 32, 64, 20])
W_31 = self.regulation[3](W_28) # torch.Size([1, 64, 16, 32, 10])
W_34 = self.regulation[4](W_31) # torch.Size([1, 64, 8, 16, 5])
W_35 = self.regulation[5](W_34)[:, :, :-1, :-1, :-1] + W_31 # torch.Size([1, 64, 16, 32, 10])
W_36 = self.regulation[6](W_35)[:, :, :-1, :-1, :-1] + W_28 # torch.Size([1, 64, 32, 64, 20])
W_37 = self.regulation[7](W_36)[:, :, :-1, :-1, :-1] + W_25 # torch.Size([1, 64, 64, 128, 40])
W_38 = self.regulation[8](W_37)[:, :, :-1, :-1, :-1] + W_22 # torch.Size([1, 32, 128, 256, 80])
W = self.regulation[9](W_38)[:, :, :-1, :-1, :-1] # torch.Size([1, 1, 256, 512, 160])
# Color residue joint learning
C_R = color_img_c
C_P = torch.zeros(C_R.shape + (self.d,), device=C_R.device) # NCHWD
for i in range(self.d):
C_P[:, :, :, i:, i] = C_R if i == 0 else C_R[:, :, :, :-i]
C_P = (C_P * W).sum(dim=-1)
G_CP = self.resnet2(C_P)
G_Y = self.resnet3(mono_img)
G = torch.cat([G_CP, G_Y], dim=1)
C = self.resnet4(G)
#return C + C_P
return C # non-residual shows more generalized results.
if __name__ == "__main__":
zeros1 = torch.zeros([4, 1, 256, 512])
resnet1 = ResNet(1, 32, 32, k_size=5, stride=2, pad=2)
print("resnet1", resnet1(zeros1).shape)
resnet2 = ResNet(1, 32, 32)
print("resnet2", resnet2(zeros1).shape)
mcdlnet = MCDLNet().cuda()
mono_img = torch.zeros([1, 1, 256 // 2, 512 // 2]).cuda()
color_img = torch.zeros([1, 3, 256 // 2, 512 // 2]).cuda()
mcdlnet1 = mcdlnet(mono_img, color_img[:, 0:1], color_img[:, 1:2])
print("mcdlnet", mcdlnet1.shape)