Skip to content

Commit 14802d6

Browse files
committed
update readme
1 parent 7c588d2 commit 14802d6

File tree

4 files changed

+28
-10
lines changed

4 files changed

+28
-10
lines changed

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,16 @@
8585

8686
## Super-Resolution model(in progress)
8787
- TensorRT 8.0.3.4 (Real-ESRGAN)
88-
- Real-ESRGAN model (real-esrgan.cpp)
88+
- Real-ESRGAN model (real-esrgan.cpp)
89+
- Scale up 4x (448x640x3 -> 1792x2560x3)
8990
- Comparison of calculation execution time of 100 iteration and GPU memory usage for one 448x640x3
90-
- Pytorch F32
91-
- Pytorch F16
92-
- TensorRT F32
93-
- TensorRT F16
91+
- Pytorch F32 4109 ms ( 5.029 GB)
92+
- Pytorch F16 1936 ms ( 4.407 GB)
93+
- TensorRT F32 2139 ms ( 3.807 GB) (0.47 FPS)
94+
- TensorRT F16 737 ms ( 3.311 GB) (1.35 FPS)
9495
- TensorRT Int8
9596
***
96-
97+
9798
## Using C TensoRT model in Python using dll
9899
- TRT_DLL_EX : <https://github.com/yester31/TRT_DLL_EX>
99100
***
-702 Bytes
Loading

TensorRT/TensorRT.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
</ClCompile>
136136
<ClCompile Include="yolov5s.cpp">
137137
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
138+
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
138139
</ClCompile>
139140
</ItemGroup>
140141
<ItemGroup>

TensorRT/real-esrgan.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ ITensor* residualDenseBlock(INetworkDefinition *network, std::map<std::string, W
143143
return ew1->getOutput(0);
144144
}
145145

146-
147146
ITensor* RRDB(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor* x, std::string lname)
148147
{
149148
ITensor* out = residualDenseBlock(network, weightMap, x, lname + ".rdb1");
@@ -253,7 +252,24 @@ void createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig*
253252

254253
// Build engine
255254
builder->setMaxBatchSize(maxBatchSize);
256-
config->setMaxWorkspaceSize(1 << 20);
255+
//config->setMaxWorkspaceSize(1 << 22);
256+
config->setMaxWorkspaceSize(28 * (1 << 23)); // 28MB
257+
258+
if (precision_mode == 16) {
259+
std::cout << "==== precision f16 ====" << std::endl << std::endl;
260+
config->setFlag(BuilderFlag::kFP16);
261+
}
262+
else if (precision_mode == 8) {
263+
//std::cout << "==== precision int8 ====" << std::endl << std::endl;
264+
//std::cout << "Your platform support int8: " << builder->platformHasFastInt8() << std::endl;
265+
//assert(builder->platformHasFastInt8());
266+
//config->setFlag(BuilderFlag::kINT8);
267+
//Int8EntropyCalibrator2 *calibrator = new Int8EntropyCalibrator2(maxBatchSize, INPUT_W, INPUT_H, 0, "../data_calib/", "../Int8_calib_table/detr_int8_calib.table", INPUT_BLOB_NAME);
268+
//config->setInt8Calibrator(calibrator);
269+
}
270+
else {
271+
std::cout << "==== precision f32 ====" << std::endl << std::endl;
272+
}
257273

258274
std::cout << "Building engine, please wait for a while..." << std::endl;
259275
IHostMemory* engine = builder->buildSerializedNetwork(*network, *config);
@@ -285,7 +301,7 @@ int main()
285301
char engineFileName[] = "real-esrgan";
286302

287303
char engine_file_path[256];
288-
sprintf(engine_file_path, "../Engine/%s.engine", engineFileName);
304+
sprintf(engine_file_path, "../Engine/%s_%d.engine", engineFileName, precision_mode);
289305

290306
// 1) engine file 만들기
291307
// 강제 만들기 true면 무조건 다시 만들기
@@ -359,7 +375,7 @@ int main()
359375
std::cout << "===== input load done =====" << std::endl << std::endl;
360376

361377
uint64_t dur_time = 0;
362-
uint64_t iter_count = 1;
378+
uint64_t iter_count = 10;
363379

364380
// CUDA 스트림 생성
365381
cudaStream_t stream;

0 commit comments

Comments
 (0)