Merge pull request #1 from LDOUBLEV/upload

Upload PaddleOCR code
PaddlePaddle · May 10, 2020 · 56c6c3a · 56c6c3a
2 parents e27cf9a + 338ba3e
commit 56c6c3a
Show file tree

Hide file tree

Showing 84 changed files with 15,543 additions and 0 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+-   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
+    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+    hooks:
+    -   id: yapf
+        files: \.py$
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: a11d9314b22d8f8c7556443875b731ef05965464
+    hooks:
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+        files: (?!.*paddle)^.*$
+    -   id: end-of-file-fixer
+        files: \.md$
+    -   id: trailing-whitespace
+        files: \.md$
+-   repo: https://github.com/Lucas-C/pre-commit-hooks
+    sha: v1.0.1
+    hooks:
+    -   id: forbid-crlf
+        files: \.md$
+    -   id: remove-crlf
+        files: \.md$
+    -   id: forbid-tabs
+        files: \.md$
+    -   id: remove-tabs
+        files: \.md$
+-   repo: local
+    hooks:
+    -   id: clang-format
+        name: clang-format
+        description: Format files with ClangFormat
+        entry: bash .clang_format.hook -i
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
diff --git a/.style.yapf b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
diff --git a/configs/det/det_db_icdar15_reader.yml b/configs/det/det_db_icdar15_reader.yml
@@ -0,0 +1,22 @@
+TrainReader:
+  reader_function: ppocr.data.det.dataset_traversal,TrainReader
+  process_function: ppocr.data.det.db_process,DBProcessTrain
+  num_workers: 8
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+
+EvalReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.db_process,DBProcessTest
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  test_image_shape: [736, 1280]
+
+TestReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.db_process,DBProcessTest
+  single_img_path: 
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  test_image_shape: [736, 1280]
+  do_eval: True
diff --git a/configs/det/det_db_mv3.yml b/configs/det/det_db_mv3.yml
@@ -0,0 +1,51 @@
+Global:
+  algorithm: DB
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 16
+  test_batch_size_per_card: 16
+  image_shape: [3, 640, 640]
+  reader_yml: ./configs/det/det_db_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/
+  save_res_path: ./output/predicts_db.txt
+
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+
+Head:
+  function: ppocr.modeling.heads.det_db_head,DBHead
+  model_name: large
+  k: 50
+  inner_channels: 96
+  out_channels: 2
+
+Loss:
+  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.7
+  max_candidates: 1000
+  unclip_ratio: 1.5
diff --git a/configs/det/det_db_r50_vd.yml b/configs/det/det_db_r50_vd.yml
@@ -0,0 +1,51 @@
+Global:
+  algorithm: DB
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 8
+  test_batch_size_per_card: 16
+  image_shape: [3, 640, 640]
+  reader_yml: ./configs/det/det_db_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/
+  save_res_path: ./output/predicts_db.txt
+
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
+  layers: 50
+
+Head:
+  function: ppocr.modeling.heads.det_db_head,DBHead
+  model_name: large
+  k: 50
+  inner_channels: 256
+  out_channels: 2
+
+Loss:
+  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.7
+  max_candidates: 1000
+  unclip_ratio: 1.5
+
diff --git a/configs/det/det_east_icdar15_reader.yml b/configs/det/det_east_icdar15_reader.yml
@@ -0,0 +1,23 @@
+TrainReader:
+  reader_function: ppocr.data.det.dataset_traversal,TrainReader
+  process_function: ppocr.data.det.east_process,EASTProcessTrain
+  num_workers: 8
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+  background_ratio: 0.125
+  min_crop_side_ratio: 0.1
+  min_text_size: 10
+
+EvalReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.east_process,EASTProcessTest
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+
+TestReader:
+  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
+  process_function: ppocr.data.det.east_process,EASTProcessTest
+  single_img_path: 
+  img_set_dir: ./train_data/icdar2015/text_localization/
+  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+  do_eval: True
diff --git a/configs/det/det_east_mv3.yml b/configs/det/det_east_mv3.yml
@@ -0,0 +1,43 @@
+Global:
+  algorithm: EAST
+  use_gpu: true
+  epoch_num: 100000
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 16
+  test_batch_size_per_card: 16
+  image_shape: [3, 512, 512]
+  reader_yml: ./configs/det/det_east_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/
+  save_res_path: ./output/predicts_east.txt
+
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+
+Head:
+  function: ppocr.modeling.heads.det_east_head,EASTHead
+  model_name: small
+
+Loss:
+  function: ppocr.modeling.losses.det_east_loss,EASTLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.east_postprocess,EASTPostPocess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+
diff --git a/configs/det/det_east_r50_vd.yml b/configs/det/det_east_r50_vd.yml
@@ -0,0 +1,42 @@
+Global:
+  algorithm: EAST
+  use_gpu: true
+  epoch_num: 100000
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: output
+  save_epoch_step: 200
+  eval_batch_step: 5000
+  train_batch_size_per_card: 8
+  test_batch_size_per_card: 16
+  image_shape: [3, 512, 512]
+  reader_yml: ./configs/det/det_east_icdar15_reader.yml
+  pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/
+  save_res_path: ./output/predicts_east.txt
+
+Architecture:
+  function: ppocr.modeling.architectures.det_model,DetModel
+
+Backbone:
+  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
+  layers: 50
+
+Head:
+  function: ppocr.modeling.heads.det_east_head,EASTHead
+  model_name: large
+
+Loss:
+  function: ppocr.modeling.losses.det_east_loss,EASTLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
+
+PostProcess:
+  function: ppocr.postprocess.east_postprocess,EASTPostPocess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+
diff --git a/configs/rec/rec_benchmark_reader.yml b/configs/rec/rec_benchmark_reader.yml
@@ -0,0 +1,12 @@
+TrainReader:
+  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
+  num_workers: 8
+  lmdb_sets_dir: ./train_data/data_lmdb_release/training/
+
+EvalReader:
+  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
+  lmdb_sets_dir: ./train_data/data_lmdb_release/validation/
+
+TestReader:
+  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
+  lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml
@@ -0,0 +1,42 @@
+Global:
+  algorithm: CRNN
+  dataset: common
+  use_gpu: true
+  epoch_num: 300
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: ch
+  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_chinese_reader.yml
+  pretrain_weights:
+
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: small
+
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 48
+
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999
diff --git a/configs/rec/rec_chinese_reader.yml b/configs/rec/rec_chinese_reader.yml
@@ -0,0 +1,14 @@
+TrainReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  num_workers: 8
+  img_set_dir: .
+  label_file_path: ./train_data/hard_label.txt
+
+EvalReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  img_set_dir: .
+  label_file_path: ./train_data/label_val_all.txt
+
+TestReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  infer_img: ./infer_img
diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -0,0 +1,40 @@
+Global:
+  algorithm: CRNN
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: output
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: en
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
+  pretrain_weights: 
+
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: large
+
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 96
+
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.001
+  beta1: 0.9
+  beta2: 0.999