diff --git a/oneflow/python/nn/modules/conv.py b/oneflow/python/nn/modules/conv.py index 6e98d72a724..069e94b6a61 100644 --- a/oneflow/python/nn/modules/conv.py +++ b/oneflow/python/nn/modules/conv.py @@ -221,6 +221,7 @@ def __init__( self.weight = flow.nn.Parameter( flow.Tensor(out_channels, in_channels // groups, *kernel_size) ) + self.out_channel_groups = out_channels // groups self.bias = None self._bias_add_op = None if bias: @@ -280,7 +281,17 @@ def forward(self, x): out_list = [] for i in range(len(in_split_list)): out_list.append( - self._cpu_op(in_split_list[i], self.weight[i : i + 1, :, :, :])[0] + self._cpu_op( + in_split_list[i], + self.weight[ + i + * self.out_channel_groups : (i + 1) + * self.out_channel_groups, + :, + :, + :, + ], + )[0] ) res = flow.experimental.cat(out_list, dim=in_channel_axis) else: diff --git a/oneflow/python/test/modules/test_conv.py b/oneflow/python/test/modules/test_conv.py index 3bcff61f5d3..9057dc77d3c 100644 --- a/oneflow/python/test/modules/test_conv.py +++ b/oneflow/python/test/modules/test_conv.py @@ -14,8 +14,12 @@ limitations under the License. """ import unittest +from collections import OrderedDict + import numpy as np + import oneflow.experimental as flow +from test_util import GenArgList test_conv2d_weight = np.array( [ @@ -1202,6 +1206,278 @@ def _test_conv2d_backward( ) +def _test_conv2d_large_in_channel(test_case, device): + np_arr = np.array( + [ + [ + [ + [ + 0.6206631238581714, + -1.1225329393404626, + 0.8407155480700242, + -0.6845162855236345, + ], + [ + -0.5186484633906412, + 0.10420735184519186, + -0.1711568947473012, + 0.5168640476046483, + ], + [ + -0.12429464919764661, + 0.050277779246134253, + -1.0144501797426606, + -2.184600444658526, + ], + [ + 0.28918126931309923, + -0.822872663244595, + 0.44019150436683663, + -1.0247720130825562, + ], + ], + [ + [ + 0.7786504412818226, + -0.7501839068078657, + -0.8187283189941765, + -1.1116653569170698, + ], + [ + 0.18085524152316743, + -1.3461349607476678, + 1.142505437476448, + -0.000649619704040145, + ], + [ + 0.03160672782674317, + -0.006318157449953413, + 1.2218487782604377, + 0.15903027907930234, + ], + [ + 1.5857011815642381, + 0.6656477116332891, + -0.04036621813223574, + -0.3427168687988546, + ], + ], + [ + [ + -1.1774346070102524, + 1.6195241269303395, + -0.36185552303441965, + -1.1382193113192487, + ], + [ + 0.08061907334568702, + 1.5025447613238763, + -1.1591348706634745, + 1.6449050139676873, + ], + [ + 1.1539915649822392, + -2.414624939646017, + 0.3056063774849572, + 1.1920089257083162, + ], + [ + 0.7623012858982319, + -0.01685314742940813, + -1.096666898224702, + -0.4406476137098582, + ], + ], + [ + [ + 0.9383797282214235, + -1.1075876842796508, + -0.4420913825139058, + -1.0736097610655628, + ], + [ + -0.3101376466546291, + 1.6578227745160954, + -0.6225454278031398, + 0.6831188620748697, + ], + [ + 0.00743800968372913, + -0.8089158949698473, + 2.08084287836801, + 0.721204366332351, + ], + [ + 0.5694701823297723, + 0.031519314469744895, + -0.5041680957766629, + -0.4738588233094669, + ], + ], + ] + ] + ) + input = flow.Tensor( + np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True + ) + weight = np.array( + [ + [ + [ + [0.06456436216831207, -0.10852358490228653, -0.21638715267181396], + [-0.2279110550880432, 0.1476770043373108, 0.19457484781742096], + [0.05026858672499657, 0.10818571597337723, 0.02056501805782318], + ], + [ + [0.205095112323761, 0.1488947868347168, -0.2344113141298294], + [0.1684819906949997, -0.21986986696720123, 0.1082606166601181], + [-0.1528974026441574, 0.17120417952537537, 0.01954500749707222], + ], + ], + [ + [ + [-0.09441672265529633, -0.03644559532403946, -0.22235223650932312], + [-0.1771145612001419, 0.08043312281370163, 0.06938580423593521], + [0.054393064230680466, -0.05483492836356163, 0.23438701033592224], + ], + [ + [0.22666795551776886, 0.0874653309583664, 0.07092718034982681], + [0.08883464336395264, -0.052362944930791855, -0.1720171570777893], + [0.10441060364246368, 0.011952142231166363, -0.0894528403878212], + ], + ], + ] + ) + m = flow.nn.Conv2d(4, 2, 3, groups=2, bias=False) + m.weight = flow.nn.Parameter(flow.Tensor(weight), requires_grad=True) + m = m.to(device) + output = m(input) + np_out = [ + [ + [ + [0.7666134238243103, -0.3961866497993469], + [-0.656266987323761, -1.1613956689834595], + ], + [ + [0.3077264130115509, -0.42817503213882446], + [-0.5761325359344482, 0.1300736665725708], + ], + ] + ] + test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-6, 1e-6)) + output = output.sum() + output.backward() + np_grad = [ + [ + [ + [ + 0.06456436216831207, + -0.04395922273397446, + -0.3249107301235199, + -0.21638715267181396, + ], + [ + -0.16334669291973114, + -0.12419328093528748, + 0.017341122031211853, + -0.021812304854393005, + ], + [ + -0.17764246463775635, + 0.07822024822235107, + 0.47100257873535156, + 0.21513986587524414, + ], + [ + 0.05026858672499657, + 0.1584542989730835, + 0.128750741481781, + 0.02056501805782318, + ], + ], + [ + [ + 0.205095112323761, + 0.3539898991584778, + -0.08551652729511261, + -0.2344113141298294, + ], + [ + 0.3735771179199219, + 0.30260205268859863, + -0.19712577760219574, + -0.1261506974697113, + ], + [ + 0.015584588050842285, + -0.03308109939098358, + 0.07913993299007416, + 0.12780562043190002, + ], + [ + -0.1528974026441574, + 0.018306776881217957, + 0.1907491832971573, + 0.01954500749707222, + ], + ], + [ + [ + -0.09441672265529633, + -0.13086232542991638, + -0.258797824382782, + -0.22235223650932312, + ], + [ + -0.27153128385543823, + -0.22754377126693726, + -0.10897888988256454, + -0.1529664397239685, + ], + [ + -0.12272149324417114, + -0.09712330251932144, + 0.32937100529670715, + 0.30377280712127686, + ], + [ + 0.054393064230680466, + -0.00044186413288116455, + 0.1795520782470703, + 0.23438701033592224, + ], + ], + [ + [ + 0.22666795551776886, + 0.31413328647613525, + 0.1583925187587738, + 0.07092718034982681, + ], + [ + 0.3155025839805603, + 0.35060498118400574, + -0.06598758697509766, + -0.1010899767279625, + ], + [ + 0.19324524700641632, + 0.1528344452381134, + -0.301880806684494, + -0.2614699900150299, + ], + [ + 0.10441060364246368, + 0.11636274307966232, + -0.07750070095062256, + -0.0894528403878212, + ], + ], + ] + ] + test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-6, 1e-6)) + + @unittest.skipIf( not flow.unittest.env.eager_execution_enabled(), ".numpy() doesn't work in lazy mode", @@ -1401,6 +1677,15 @@ def test_conv2d_dilation_backward(test_case): device=device, ) + def test_large_channel_group_conv(test_case): + arg_dict = OrderedDict() + arg_dict["test_fun"] = [ + _test_conv2d_large_in_channel, + ] + arg_dict["device"] = ["cuda", "cpu"] + for arg in GenArgList(arg_dict): + arg[0](test_case, *arg[1:]) + if __name__ == "__main__": unittest.main()