In [None]:
  from google.colab import drive
  drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# ===============================
# Colab: PaddleOCR Text Detection (Demo)
# ===============================

# 1. Setup Environment
#!pip install paddlepaddle-gpu==2.5.2
!pip install paddlepaddle-gpu
!git clone https://github.com/PaddlePaddle/PaddleOCR.git
%cd PaddleOCR

# Install dependencies
!pip install -r requirements.txt

# 2. Imports
import os
from pathlib import Path

# Ensure data directory
os.makedirs("./train_data/demo_det", exist_ok=True)

# 3. Download demo images (instead of copying from repo)
!wget -nc https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs_en/img_10.jpg -O train_data/demo_det/img_10.jpg
!wget -nc https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs_en/img_12.jpg -O train_data/demo_det/img_12.jpg

# 4. Create toy annotation file
with open("train_data/demo_det/train_det_gt.txt", "w", encoding="utf-8") as f:
    # Format: img_path \t [ { "transcription": str, "points": [[x,y],...] }, ...]
    f.write("train_data/demo_det/img_10.jpg\t[{\"transcription\":\"text\",\"points\":[[50,50],[150,50],[150,100],[50,100]]}]\n")
    f.write("train_data/demo_det/img_12.jpg\t[{\"transcription\":\"ocr\",\"points\":[[30,30],[130,30],[130,80],[30,80]]}]\n")

# 5. Create a lightweight config
os.makedirs("configs/det", exist_ok=True)

with open("configs/det/det_db_demo.yml", "w") as f:
    f.write("""
%%writefile configs/det/det_db_demo.yml
Global:
  use_gpu: true
  epoch_num: 2
  log_smooth_window: 20
  save_model_dir: ./output/det_db_demo
  save_epoch_step: 1
  eval_batch_step: [1, 1]
  print_batch_step: 1
  checkpoints: null
  pretrained_model: null
  save_inference_dir: null
  use_visualdl: False
  infer_img: null
  character_dict_path: ppocr/utils/ic15_dict.txt
  max_text_length: 25
  max_ctc_length: 30
  image_shape: [3, 640, 640]
  infer_mode: False
  use_space_char: True
  save_res_path: ./output/det_db_demo/predicts_db.txt

Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    learning_rate: 0.001
  regularizer:
    name: 'L2'
    factor: 3.0e-05

Architecture:
  model_type: det
  algorithm: DB
  Transform: null
  Backbone:
    name: ResNet
    layers: 18
  Neck:
    name: DBFPN
    out_channels: 256
  Head:
    name: DBHead
    k: 50
    adaptive: True

Loss:
  name: DBLoss
  balance_loss: True
  main_loss_type: DiceLoss
  alpha: 5
  beta: 10
  ohem_ratio: 3

PostProcess:
  name: DBPostProcess
  thresh: 0.3
  box_thresh: 0.6
  max_candidates: 1000
  unclip_ratio: 1.5

Metric:
  name: DetMetric
  main_indicator: hmean

Train:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data/demo_det
    label_file_list:
      - ./train_data/demo_det/train_icdar2015_label.txt
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - DetLabelEncode: {}
      - IaaAugment:
          augmenter_args:
            - type: Fliplr
              args: [0.5]
            - type: Affine
              args:
                rotate: [-10, 10]
            - type: Resize
              args:
                size: [0.5, 3.0]
      - EastRandomCropData:
          size: [640, 640]
          max_tries: 50
          keep_ratio: True
      - MakeBorderMap:
          shrink_ratio: 0.4
          thresh_min: 0.3
          thresh_max: 0.7
      - MakeShrinkMap:
          shrink_ratio: 0.4
          min_text_size: 8
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - ToCHWImage: {}
      - KeepKeys:
          keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask']
  loader:
    shuffle: True
    drop_last: False
    batch_size: 2
    num_workers: 2

Eval:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data/demo_det
    label_file_list:
      - ./train_data/demo_det/train_icdar2015_label.txt
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - DetLabelEncode: {}
      - DetResizeForTest:
          resize_long: 736
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - ToCHWImage: {}
      - KeepKeys:
          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  loader:
    shuffle: False
    drop_last: False
    batch_size: 1
    num_workers: 2
""")

# 6. Train DBNet (toy demo)
!python tools/train.py -c configs/det/det_db_demo.yml -o Global.pretrained_model=None

# 7. Evaluate
!python tools/eval.py -c configs/det/det_db_demo.yml -o Global.checkpoints=output/det_db_demo/best_accuracy

# 8. Inference Example
!python tools/infer_det.py \
    -c configs/det/det_db_demo.yml \
    -o Global.checkpoints=output/det_db_demo/latest \
    Global.infer_img=train_data/demo_det/img_10.jpg


Cloning into 'PaddleOCR'...
remote: Enumerating objects: 284199, done.[K
remote: Counting objects: 100% (2613/2613), done.[K
remote: Compressing objects: 100% (512/512), done.[K
remote: Total 284199 (delta 2371), reused 2101 (delta 2101), pack-reused 281586 (from 4)[K
Receiving objects: 100% (284199/284199), 1.50 GiB | 29.24 MiB/s, done.
Resolving deltas: 100% (224562/224562), done.
/content/PaddleOCR/PaddleOCR/PaddleOCR/PaddleOCR
--2025-09-19 07:02:23--  https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs_en/img_10.jpg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 179855 (176K) [image/jpeg]
Saving to: ‘train_data/demo_det/img_10.jpg’


2025-09-19 07:02:23 (12.0 MB/s) - ‘train_data/demo_det/img_10.jpg’ saved [179855/179855]

-

In [None]:
!ls output/det_db_demo/

ls: cannot access 'output/det_db_demo/': No such file or directory
