In [None]:
# Google Drive 연결
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 기본 설치
!sudo apt-get install -y libsndfile1
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git
!pip install wandb

# DTRB 레포 설치
!git clone https://github.com/clovaai/deep-text-recognition-benchmark
%cd deep-text-recognition-benchmark
!pip install -r requirements.txt

!pip install lmdb pillow torchvision torchaudio torchaudio albumentations
!pip install wandb

%cd /content/deep-text-recognition-benchmark
!sed -i 's/from torch._utils import _accumulate/import itertools\n_accumulate = itertools.accumulate/' dataset.py

%cd /content/drive/MyDrive/DTRB_project
!pip install tqdm

!pip install lmdb pillow torchvision numpy tqdm matplotlib opencv-python fire

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libsndfile1 is already the newest version (1.0.31-2ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-y3i9vqio
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-y3i9vqio
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->clip==1

In [None]:
%cd /content/drive/MyDrive/DTRB_project

!python3 train.py \
  --train_data data/korean_plate_lmdb_split \
  --valid_data data/korean_plate_lmdb_split \
  --select_data MJ-ST \
  --batch_ratio 0.5-0.5 \
  --Transformation TPS \
  --FeatureExtraction ResNet \
  --SequenceModeling BiLSTM \
  --Prediction Attn \
  --character '0123456789가나다라마바사아자차카타파하거너더러머버서어저처커터퍼허고노도로모보소오조초코토포호구누두루무부수우주추쿠투푸후기니디리미비시이지치키티피히육-' \
  --input_channel 1 \
  --output_channel 512 \
  --hidden_size 256 \
  --batch_size 64 \
  --valInterval 200 \
  --num_iter 15000 \
  --saved_model /content/drive/MyDrive/TPS-ResNet-BiLSTM-Attn-case-sensitive.pth \
  --data_filtering_off


/content/drive/MyDrive/DTRB_project
--------------------------------------------------------------------------------
dataset_root: data/korean_plate_lmdb_split
opt.select_data: ['MJ', 'ST']
opt.batch_ratio: ['0.5', '0.5']
--------------------------------------------------------------------------------
dataset_root:    data/korean_plate_lmdb_split	 dataset: MJ
sub-directory:	/MJ	 num samples: 69674
num total samples of MJ: 69674 x 1.0 (total_data_usage_ratio) = 69674
num samples of MJ per batch: 64 x 0.5 (batch_ratio) = 32
--------------------------------------------------------------------------------
dataset_root:    data/korean_plate_lmdb_split	 dataset: ST
sub-directory:	/ST	 num samples: 69674
num total samples of ST: 69674 x 1.0 (total_data_usage_ratio) = 69674
num samples of ST per batch: 64 x 0.5 (batch_ratio) = 32
--------------------------------------------------------------------------------
Total_batch_size: 32+32 = 64
--------------------------------------------------------

In [None]:
import os
import cv2
import torch
import unicodedata
import numpy as np
from PIL import Image
from tqdm import tqdm
from jiwer import cer
from model import Model
from utils import AttnLabelConverter
from dataset import ResizeNormalize
import re
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score

# 📌 모델 세팅
class Opt:
    def __init__(self):
        self.imgH = 32
        self.imgW = 100
        self.input_channel = 1
        self.output_channel = 512
        self.hidden_size = 256
        self.character = '0123456789가나다라마바사아자차카타파하거너더러머버서어저처커터퍼허고노도로모보소오조초코토포호구누두루무부수우주추쿠투푸후기니디리미비시이지치키티피히육-'
        self.Transformation = 'TPS'
        self.FeatureExtraction = 'ResNet'
        self.SequenceModeling = 'BiLSTM'
        self.Prediction = 'Attn'
        self.num_fiducial = 20
        self.PAD = False
        self.sensitive = True
        self.batch_max_length = 25
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.rgb = False

opt = Opt()
converter = AttnLabelConverter(opt.character)
opt.num_class = len(converter.character)
model = Model(opt).to(opt.device)

# 📌 모델 로딩
state_dict = torch.load("/content/drive/MyDrive/DTRB_project/saved_models/TPS-ResNet-BiLSTM-Attn-Seed1111/best_accuracy.pth", map_location=opt.device)
if list(state_dict.keys())[0].startswith("module."):
    from collections import OrderedDict
    new_state_dict = OrderedDict((k[7:], v) for k, v in state_dict.items())
    state_dict = new_state_dict
model.load_state_dict(state_dict, strict=True)
model.eval()

# 📁 테스트 이미지 경로 설정
test_dir = "/content/drive/MyDrive/test_ocr"
image_paths = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir) if fname.lower().endswith(('.jpg', '.png'))]

# 📈 평가용 변수
gt_texts, pred_texts, match_flags = [], [], []
plate_pattern = re.compile(r'\d{2,3}[가-힣]\d{4}')

# 📦 평가 루프
for path in tqdm(image_paths):
    filename = os.path.basename(path)
    gt_raw = os.path.splitext(filename)[0]
    gt_text = unicodedata.normalize("NFC", gt_raw)
    gt_plate = plate_pattern.search(gt_text)
    gt_plate = gt_plate.group(0) if gt_plate else None
    gt_texts.append(gt_plate)

    img = cv2.imread(path)
    img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).convert('L')
    transformer = ResizeNormalize((opt.imgW, opt.imgH))
    image_tensor = transformer(img_pil).unsqueeze(0).to(opt.device)
    length_for_pred = torch.IntTensor([opt.batch_max_length]).to(opt.device)
    text_for_pred = torch.LongTensor(1, opt.batch_max_length + 1).fill_(0).to(opt.device)

    with torch.no_grad():
        preds = model(image_tensor, text_for_pred, is_train=False)
        _, preds_index = preds.max(2)
        pred_str = converter.decode(preds_index, length_for_pred)[0].replace('[s]', '').strip()
        pred_str = unicodedata.normalize("NFC", pred_str)
        pred_plate = plate_pattern.search(pred_str)
        pred_plate = pred_plate.group(0) if pred_plate else None
        pred_texts.append(pred_plate)

    match_flags.append(pred_plate == gt_plate)

# 🧮 지표 계산
total = len(gt_texts)
correct = sum(match_flags)
exact_accuracy = correct / total
char_error_rate = cer([gt or "" for gt in gt_texts], [pred or "" for pred in pred_texts])

# 문자 정확도 계산
char_correct = 0
char_total = 0
for gt, pred in zip(gt_texts, pred_texts):
    if gt and pred:
        char_total += len(gt)
        char_correct += sum(1 for g, p in zip(gt, pred) if g == p)
char_accuracy = char_correct / char_total if char_total > 0 else 0.0

# Precision, Recall, F1 계산
binary_true = [1 if m else 0 for m in match_flags]
binary_pred = [1 if p else 0 for p in [gt == pred for gt, pred in zip(gt_texts, pred_texts)]]
precision = precision_score(binary_true, binary_pred, zero_division=0)
recall = recall_score(binary_true, binary_pred, zero_division=0)
f1 = f1_score(binary_true, binary_pred, zero_division=0)

# 출력
print("\n📊 DTRB OCR 모델 성능 평가 결과:")
print(f"총 테스트 수: {total}")
print(f"✔ 번호판 패턴 기준 정확도 (Exact Accuracy) : {exact_accuracy:.2%}")
print(f"✔ 문자 단위 정확도 (Character Accuracy)   : {char_accuracy:.2%}")
print(f"✔ 문자 오류율 (CER)                      : {char_error_rate:.2%}")
print(f"✔ Precision                              : {precision:.2%}")
print(f"✔ Recall                                 : {recall:.2%}")
print(f"✔ F1 Score                               : {f1:.2%}")

# 예측 결과 보기
print("\n🔍 예측 결과 상세 비교:")
print(f"{'파일명':<20} {'정답(GT)':<15} {'예측(Pred)':<15} {'일치 여부'}")
print("-" * 60)
for path, gt, pred, flag in zip(image_paths, gt_texts, pred_texts, match_flags):
    name = os.path.basename(path)
    match = "✅" if flag else "❌"
    print(f"{name:<20} {gt or '-':<15} {pred or '-':<15} {match}")

# CSV 저장
save_csv = True
if save_csv:
    df = pd.DataFrame({
        'filename': [os.path.basename(p) for p in image_paths],
        'ground_truth': gt_texts,
        'prediction': pred_texts,
        'match': match_flags
    })
    df.to_csv("/content/dtrb_ocr_eval_result.csv", index=False)
    print("\n📁 결과가 /content/dtrb_ocr_eval_result.csv 에 저장되었습니다.")


100%|██████████| 189/189 [00:07<00:00, 25.86it/s]


📊 DTRB OCR 모델 성능 평가 결과:
총 테스트 수: 189
✔ 번호판 패턴 기준 정확도 (Exact Accuracy) : 100.00%
✔ 문자 단위 정확도 (Character Accuracy)   : 100.00%
✔ 문자 오류율 (CER)                      : 0.00%
✔ Precision                              : 100.00%
✔ Recall                                 : 100.00%
✔ F1 Score                               : 100.00%

🔍 예측 결과 상세 비교:
파일명                  정답(GT)          예측(Pred)        일치 여부
------------------------------------------------------------
116저7840.jpg        116저7840        116저7840        ✅
130버4755.jpg        130버4755        130버4755        ✅
116하1218.jpg        116하1218        116하1218        ✅
133하7652.jpg        133하7652        133하7652        ✅
24너6591.jpg         24너6591         24너6591         ✅
137머5098.jpg        137머5098        137머5098        ✅
11마1926.jpg         11마1926         11마1926         ✅
138도7306.jpg        138도7306        138도7306        ✅
116하4561.jpg        116하4561        116하4561        ✅
141버3724.jpg        141버3724        141버3724 


