-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Closed
Description
hi, I'm trying to test the released multiscaledeformableattention plugin, but all my output is 0.
here is my test code
import tensorrt as trt
import os
from cuda import cudart
import numpy as np
def getDeformAttnPlugin():
for c in trt.get_plugin_registry().plugin_creator_list:
if c.name == 'MultiscaleDeformableAttnPlugin_TRT':
return c.create_plugin(c.name, trt.PluginFieldCollection([]))
return None
def run():
logger = trt.Logger(trt.Logger.ERROR)
trt.init_libnvinfer_plugins(logger, '')
builder = trt.Builder(logger)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
profile = builder.create_optimization_profile()
config = builder.create_builder_config()
config.max_workspace_size = 3 << 30
value = network.add_input('value', trt.float32, [1, 14960, 8, 32])
spatial_shapes = network.add_input('spatial_shapes', trt.int32, [4, 2])
level_start_index = network.add_input('level_start_index', trt.int32, [4, ])
sampling_locations = network.add_input('sampling_locations', trt.float32, [1, 6400, 8, 4, 4, 2])
attention_weights = network.add_input('attention_weights', trt.float32, [1, 6400, 8, 4, 4])
layer = network.add_plugin_v2(
[value, spatial_shapes, level_start_index, sampling_locations, attention_weights],
getDeformAttnPlugin()
)
network.mark_output(layer.get_output(0))
engineString = builder.build_serialized_network(network, config)
if engineString == None:
print("Failed building engine!")
return
print("Succeeded building engine!")
engine = trt.Runtime(logger).deserialize_cuda_engine(engineString)
context = engine.create_execution_context()
nInput = np.sum([engine.binding_is_input(i) for i in range(engine.num_bindings)])
nOutput = engine.num_bindings - nInput
value = np.load('value.npy')
spatial_shapes = np.load('value_spatial_shapes.npy').astype(int)
level_start_index = np.load('value_level_start_index.npy').astype(int)
sampling_locations = np.load('sampling_locations.npy')
attention_weights = np.load('attention_weights.npy')
bufferH = []
bufferH.append(np.ascontiguousarray(value))
bufferH.append(np.ascontiguousarray(spatial_shapes))
bufferH.append(np.ascontiguousarray(level_start_index))
bufferH.append(np.ascontiguousarray(sampling_locations))
bufferH.append(np.ascontiguousarray(attention_weights))
for i in range(nOutput):
bufferH.append(np.ones(context.get_binding_shape(nInput + i), dtype=trt.nptype(engine.get_binding_dtype(nInput + i))))
bufferD = []
for i in range(engine.num_bindings):
bufferD.append(cudart.cudaMalloc(bufferH[i].nbytes)[1])
for i in range(nInput):
cudart.cudaMemcpy(bufferD[i], np.ascontiguousarray(bufferH[i].reshape(-1)).ctypes.data, bufferH[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
context.set_binding_shape(0, [1, 14960, 8, 32])
context.set_binding_shape(1, [4, 2])
context.set_binding_shape(2, [4, ])
context.set_binding_shape(3, [1, 6400, 8, 4, 4, 2])
context.set_binding_shape(4, [1, 6400, 8, 4, 4])
# context.execute(1, bufferD)
context.execute_v2([bufferD[0],bufferD[1], bufferD[2], bufferD[3], bufferD[4], bufferD[5]])
# context.execute_v2(bufferD)
for i in range(nOutput):
cudart.cudaMemcpy(bufferH[nInput+i].ctypes.data, bufferD[nInput+i], bufferH[nInput+i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)
for i in range(nOutput):
print(bufferH[5])
np.save('trt_output.npy', bufferH[nInput])
if __name__ == "__main__":
run()
the output bufferH[5] is all 0.
can you help me find what is wrong here?
thanks a lot
Metadata
Metadata
Assignees
Labels
No labels