In [4]:
from easyocr.easyocr import *
import cv2

# GPU 설정
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

def get_files(path):
    file_list = []

    files = [f for f in os.listdir(path) if not f.startswith('.')]
    files.sort()
    abspath = os.path.abspath(path)
    print(abspath)
    for file in files:
        file_path = os.path.join(abspath, file)
        file_list.append(file_path)

    return file_list, len(file_list)

reader = Reader(['ko'], gpu=True,
                model_storage_directory='model',
                user_network_directory='EasyOCR\\trainer\\config_files',
                # py 파일과 pth 파일, yaml 파일은 아래 이름과 똑같아야 합니다.
                recog_network='custom')

files, count = get_files('demo_image')

for idx, file in enumerate(files):
    filename = os.path.basename(file)
    # 경로에 한글이 들어간다면 에러 발생 가능성이 높습니다.
    img = cv2.imread(r'./demo_image/objectid_21659344_1761203704061.jpg')
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    result = reader.readtext(gray)

    # ./easyocr/utils.py 733 lines
    # result[0]: bbox
    # result[1]: string
    # result[2]: confidence
    for (bbox, string, confidence) in result:
        print("filename: '%s', confidence: %.4f, string: '%s'" % (filename, confidence, string))
        # print('bbox: ', bbox)

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


c:\Users\다인\EASYOCR-custom\demo_image
filename: 'objectid_21659344_1761203704061.jpg', confidence: 0.0000, string: '름귀실귀파주파주파'




In [34]:
import os
import cv2
import numpy as np
from easyocr import Reader

# GPU 설정
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

def preprocess_image(img):
    """이미지 전처리"""
    # 크기 확대 (2배)
    h, w = img.shape[:2]
    img = cv2.resize(img, (100, 32), interpolation=cv2.INTER_CUBIC)
    
    # Grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    return gray

# Reader 초기화
# reader = Reader(['ko'], gpu=True,
#                 model_storage_directory='model',
#                 user_network_directory='EasyOCR/trainer/config_files',
#                 recog_network='custom')
reader = Reader(['ko'])

# 이미지 로드
img_path = './demo_image/plate.jpg'
img = cv2.imread(img_path)

# 전처리
gray = preprocess_image(img)

# OCR 실행
result = reader.readtext(
    gray,
    detail=1,
    contrast_ths=0.1,
    adjust_contrast=0.5
)

# 결과 출력
print(f"원본 크기: {img.shape[1]}x{img.shape[0]}")
print(f"처리 후: {processed.shape[1]}x{processed.shape[0]}\n")

if result:
    for bbox, text, conf in result:
        print(f"텍스트: '{text}' | 신뢰도: {conf:.4f}")
else:
    print("인식된 텍스트 없음")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


원본 크기: 640x320
처리 후: 100x32

텍스트: '206모1213' | 신뢰도: 0.3223




In [12]:
import cv2
import numpy as np
from easyocr import Reader
from PIL import Image

# Reader 초기화
reader = Reader(['ko'], gpu=True,
                model_storage_directory='model',
                user_network_directory='EasyOCR/trainer/config_files',
                recog_network='custom')

def preprocess_image(img):
    """모델 학습 크기(100x32)로 전처리"""
    # Grayscale
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    
    # 가벼운 대비 향상
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)
    
    # 100x32로 고정 resize
    img_pil = Image.fromarray(enhanced).resize((100, 32), Image.BICUBIC)
    
    return np.array(img_pil)

# 단일 이미지 처리
img = cv2.imread('./demo_image/plate.jpg')
processed = preprocess_image(img)

result = reader.recognize(
    processed,
    horizontal_list=None,  # 전체 영역
    free_list=None,
    detail=1,
    adjust_contrast=0,     # 추가 대비 조정 끄기
    contrast_ths=0
)

if result:
    text = result[0][1]
    conf = result[0][2]
    print(f"인식 결과: {text} (신뢰도: {conf:.4f})")
else:
    print("인식 실패")

# ========================================
# 여러 이미지 배치 처리
# ========================================
import os

image_dir = './demo_image'
files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]

for filename in files:
    img_path = os.path.join(image_dir, filename)
    img = cv2.imread(img_path)
    
    if img is None:
        continue
    
    processed = preprocess_image(img)
    result = reader.recognize(processed, horizontal_list=None, 
                             free_list=None, adjust_contrast=0)
    
    if result:
        text = result[0][1]
        conf = result[0][2]
        print(f"{filename}: '{text}' (신뢰도: {conf:.4f})")
    else:
        print(f"{filename}: 인식 실패")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


인식 결과: 구중제영8다8은8다 (신뢰도: 0.0000)
plate.jpg: '구중제영8다8은8다' (신뢰도: 0.0000)


In [28]:
import os
import cv2
import numpy as np
from easyocr import Reader
from PIL import Image

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

# Reader 초기화
reader = Reader(['ko'], gpu=True,
                model_storage_directory='model',
                user_network_directory='EasyOCR/trainer/config_files',
                recog_network='custom')

def preprocess(img):
    """CustomOCR과 동일한 전처리"""
    # Grayscale
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    
    # PIL로 100x32 resize
    img_pil = Image.fromarray(gray).resize((100, 32), Image.BICUBIC)
    
    return np.array(img_pil)

# 이미지 로드
img = cv2.imread('./demo_image/plate.jpg')

# 전처리
processed = preprocess(img)

# OCR 실행
result = reader.recognize(
    processed,
    horizontal_list=None,
    free_list=None,
    detail=1,
    adjust_contrast=0
)

# 결과 출력
print(f"원본: {img.shape[1]}x{img.shape[0]}")
print(f"처리: {processed.shape[1]}x{processed.shape[0]}\n")

if result:
    text = result[0][1]
    conf = result[0][2]
    print(f"텍스트: '{text}' | 신뢰도: {conf:.4f}")
else:
    print("인식 실패")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


원본: 640x320
처리: 100x32

텍스트: '누하모음명음모9' | 신뢰도: 0.0000


In [27]:
import os
import cv2
import numpy as np
import torch
from easyocr import Reader
from PIL import Image

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

# 재현성 보장
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)

# Reader 초기화
reader = Reader(['ko'], gpu=True,
                model_storage_directory='model',
                user_network_directory='EasyOCR/trainer/config_files',
                recog_network='custom')

# eval 모드 강제 설정
reader.recognizer.eval()

def preprocess(img):
    """CustomOCR과 동일한 전처리"""
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    
    img_pil = Image.fromarray(gray).resize((100, 32), Image.BICUBIC)
    return np.array(img_pil)

# 이미지 로드
img = cv2.imread('./demo_image/plate.jpg')
processed = preprocess(img)

# 여러 번 실행해서 결과 확인
print("=== 같은 이미지 5번 실행 ===\n")
for i in range(5):
    result = reader.recognize(
        processed,
        horizontal_list=None,
        free_list=None,
        detail=1,
        adjust_contrast=0
    )
    
    if result:
        text = result[0][1]
        conf = result[0][2]
        print(f"{i+1}회: '{text}' (신뢰도: {conf:.6f})")
    else:
        print(f"{i+1}회: 인식 실패")

print("\n→ 5번 모두 동일해야 정상입니다!")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


=== 같은 이미지 5번 실행 ===

1회: '오외초외등계안시안' (신뢰도: 0.000000)
2회: '오외초외등계안시안' (신뢰도: 0.000000)
3회: '오외초외등계안시안' (신뢰도: 0.000000)
4회: '오외초외등계안시안' (신뢰도: 0.000000)
5회: '오외초외등계안시안' (신뢰도: 0.000000)

→ 5번 모두 동일해야 정상입니다!




In [24]:
# best.pth가 어떤 구조로 학습되었는지 확인
import torch

checkpoint = torch.load('./model/custom.pth', map_location='cpu')

# 모델 키 확인
print("모델 레이어:")
for key in checkpoint.keys():
    print(key)
    
# Transformation 레이어가 있는지 확인
has_tps = any('Transformation' in key for key in checkpoint.keys())
has_resnet = any('FeatureExtraction.ConvNet.resnet' in key for key in checkpoint.keys())
has_vgg = any('FeatureExtraction.ConvNet.VGG' in key for key in checkpoint.keys())

print(f"\nTPS 있음: {has_tps}")
print(f"ResNet 있음: {has_resnet}")
print(f"VGG 있음: {has_vgg}")

모델 레이어:
module.Transformation.LocalizationNetwork.conv.0.weight
module.Transformation.LocalizationNetwork.conv.1.weight
module.Transformation.LocalizationNetwork.conv.1.bias
module.Transformation.LocalizationNetwork.conv.1.running_mean
module.Transformation.LocalizationNetwork.conv.1.running_var
module.Transformation.LocalizationNetwork.conv.1.num_batches_tracked
module.Transformation.LocalizationNetwork.conv.4.weight
module.Transformation.LocalizationNetwork.conv.5.weight
module.Transformation.LocalizationNetwork.conv.5.bias
module.Transformation.LocalizationNetwork.conv.5.running_mean
module.Transformation.LocalizationNetwork.conv.5.running_var
module.Transformation.LocalizationNetwork.conv.5.num_batches_tracked
module.Transformation.LocalizationNetwork.conv.8.weight
module.Transformation.LocalizationNetwork.conv.9.weight
module.Transformation.LocalizationNetwork.conv.9.bias
module.Transformation.LocalizationNetwork.conv.9.running_mean
module.Transformation.LocalizationNetwork.conv.9

In [26]:
import torch

checkpoint = torch.load('./model/custom.pth', map_location='cpu')

print("=== 모델 구조 분석 ===\n")

# 1. Transformation 확인
has_tps = any('Transformation' in key for key in checkpoint.keys())
print(f"Transformation: {'TPS' if has_tps else 'None'}")

# 2. FeatureExtraction 확인
feature_keys = [key for key in checkpoint.keys() if 'FeatureExtraction' in key]
if feature_keys:
    print("\nFeatureExtraction 레이어:")
    for key in feature_keys[:5]:  # 처음 5개만
        print(f"  {key}")
    
    # 어떤 타입인지 추측
    if any('resnet' in key.lower() for key in feature_keys):
        feature_type = "ResNet"
    elif any('vgg' in key.lower() for key in feature_keys):
        feature_type = "VGG"
    elif any('rcnn' in key.lower() for key in feature_keys):
        feature_type = "RCNN"
    else:
        feature_type = "Unknown (첫 레이어 확인 필요)"
    
    print(f"\n  → FeatureExtraction: {feature_type}")

# 3. SequenceModeling 확인
has_lstm = any('SequenceModeling' in key and 'LSTM' in key for key in checkpoint.keys())
has_bilstm = any('SequenceModeling' in key for key in checkpoint.keys())
print(f"\nSequenceModeling: {'BiLSTM' if has_bilstm else ('LSTM' if has_lstm else 'None')}")

# 4. Prediction 확인
has_attention = any('Attention' in key for key in checkpoint.keys())
has_ctc = any('CTCLoss' in key or 'generator' in key for key in checkpoint.keys())

if has_attention:
    prediction_type = "Attn"
elif has_ctc:
    prediction_type = "CTC"
else:
    prediction_type = "Unknown"

print(f"Prediction: {prediction_type}")

# 5. 전체 모듈 확인
print("\n=== 전체 모듈 구조 ===")
modules = set()
for key in checkpoint.keys():
    parts = key.split('.')
    if len(parts) > 1:
        modules.add(parts[1])  # module. 다음 부분

for module in sorted(modules):
    print(f"  - {module}")

print("\n=== 추천 config.yaml 설정 ===")
print(f"""
ocr:
  path: "models/ocr/best.pth"
  config:
    img_height: 32
    img_width: 100
    input_channel: 1
    output_channel: 256
    hidden_size: 256
    num_fiducial: 20
    transformation: "TPS"
    feature_extraction: "{feature_type}"  # ← 확인 필요!
    sequence_modeling: "BiLSTM"
    prediction: "{prediction_type}"
    batch_max_length: 25
""")

=== 모델 구조 분석 ===

Transformation: TPS

FeatureExtraction 레이어:
  module.FeatureExtraction.ConvNet.conv0_1.weight
  module.FeatureExtraction.ConvNet.bn0_1.weight
  module.FeatureExtraction.ConvNet.bn0_1.bias
  module.FeatureExtraction.ConvNet.bn0_1.running_mean
  module.FeatureExtraction.ConvNet.bn0_1.running_var

  → FeatureExtraction: Unknown (첫 레이어 확인 필요)

SequenceModeling: BiLSTM
Prediction: CTC

=== 전체 모듈 구조 ===
  - FeatureExtraction
  - Prediction
  - SequenceModeling
  - Transformation

=== 추천 config.yaml 설정 ===

ocr:
  path: "models/ocr/best.pth"
  config:
    img_height: 32
    img_width: 100
    input_channel: 1
    output_channel: 256
    hidden_size: 256
    num_fiducial: 20
    transformation: "TPS"
    feature_extraction: "Unknown (첫 레이어 확인 필요)"  # ← 확인 필요!
    sequence_modeling: "BiLSTM"
    prediction: "CTC"
    batch_max_length: 25

