### Tesseract-OCR

In [None]:
import cv2
import numpy as np
from imutils.object_detection import non_max_suppression

model_path = "models/frozen_east_text_detection.pb"
img = cv2.imread("images/car.jpg")
model = cv2.dnn.readNet(model_path)
outputLayers = []
outputLayers.append("feature_fusion/Conv_7/Sigmoid")
outputLayers.append("feature_fusion/concat_3")
height, width, colorch = img.shape
new_height = (height//32+1)*32
new_width = (width//32+1)*32
h_ratio = height/new_height
w_ratio = width/new_width
blob = cv2.dnn.blobFromImage(img, 1, (new_width, new_height),
                             (123.68, 116.78, 103.94), True)
model.setInput(blob)
(scores, geometry) = model.forward(outputLayers)
rectangles = []
confidence_score = []
rows = geometry.shape[2]
cols = geometry.shape[3]
for y in range(0, rows):
    for x in range(0, cols):
        if scores[0][0][y][x] < 0.5:
            continue
        offset_x = x*4
        offset_y = y*4
        # gemetry map:top/right/bottom/left/rotation angle
        bottom_x = int(offset_x + geometry[0][1][y][x])
        bottom_y = int(offset_y + geometry[0][2][y][x])
        top_x = int(offset_x - geometry[0][3][y][x])
        top_y = int(offset_y - geometry[0][0][y][x])
        rectangles.append((top_x, top_y, bottom_x, bottom_y))
        confidence_score.append(float(scores[0][0][y][x]))
final_boxes = non_max_suppression(np.array(rectangles),
                                  probs=confidence_score,
                                  overlapThresh=0.5)
for (x1, y1, x2, y2) in final_boxes:
    area = abs(x2-x1) * abs(y2-y1)
    if area > 4000:
        x1 = int(x1*w_ratio)
        y1 = int(y1*h_ratio)
        x2 = int(x2*w_ratio)
        y2 = int(y2*h_ratio)
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.imshow("EAST", img)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [11]:
import cv2
import numpy as np
from imutils.object_detection import non_max_suppression
import copy # 引入 copy 模組用於深度複製

model_path = "models/frozen_east_text_detection.pb"
img = cv2.imread("images/car.jpg")
model = cv2.dnn.readNet(model_path)
outputLayers = []
outputLayers.append("feature_fusion/Conv_7/Sigmoid")
outputLayers.append("feature_fusion/concat_3")
height, width, colorch = img.shape
new_height = (height//32+1)*32
new_width = (width//32+1)*32
h_ratio = height/new_height
w_ratio = width/new_width
blob = cv2.dnn.blobFromImage(img, 1, (new_width, new_height),
                             (123.68, 116.78, 103.94), True)
model.setInput(blob)
(scores, geometry) = model.forward(outputLayers)
rectangles = []
confidence_score = []
# 用於儲存旋轉矩形的四個頂點 (4, 2)
rotated_boxes_coords = [] 
rows = geometry.shape[2]
cols = geometry.shape[3]
min_confidence = 0.5 # 設定最小信心分數

for y in range(0, rows):
    for x in range(0, cols):
        if scores[0][0][y][x] < min_confidence:
            continue
            
        d = geometry[0][:4, y, x] # [top, right, bottom, left]
        angle = geometry[0][4][y][x] # 旋轉角度 theta

        # 計算 sin 和 cos
        cos_a = np.cos(angle)
        sin_a = np.sin(angle)

        # 輸出的特徵圖尺寸比輸入圖片小 4 倍，所以要乘以 4
        offset_x = x*4.0
        offset_y = y*4.0

        # 計算邊界框的四個頂點 (基於 EAST 論文公式)
        
        # 頂點 1 (x1, y1)
        x1 = int(offset_x - d[3] * cos_a - d[0] * sin_a)
        y1 = int(offset_y - d[3] * sin_a + d[0] * cos_a)
        
        # 頂點 2 (x2, y2)
        x2 = int(offset_x + d[1] * cos_a - d[0] * sin_a)
        y2 = int(offset_y + d[1] * sin_a + d[0] * cos_a)
        
        # 頂點 3 (x3, y3)
        x3 = int(offset_x + d[1] * cos_a + d[2] * sin_a)
        y3 = int(offset_y + d[1] * sin_a - d[2] * cos_a)

        # 頂點 4 (x4, y4)
        x4 = int(offset_x - d[3] * cos_a + d[2] * sin_a)
        y4 = int(offset_y - d[3] * sin_a - d[2] * cos_a)
        
        # --- NMS 需要的軸對齊矩形 ---
        # 計算最小軸對齊外接矩形 (Min-Area Bounding Box)
        min_x = np.min([x1, x2, x3, x4])
        min_y = np.min([y1, y2, y3, y4])
        max_x = np.max([x1, x2, x3, x4])
        max_y = np.max([y1, y2, y3, y4])

        rectangles.append((min_x, min_y, max_x, max_y))
        confidence_score.append(float(scores[0][0][y][x]))
        
        # 儲存旋轉矩形的頂點資訊
        rotated_boxes_coords.append(np.array([
            [x1, y1], [x2, y2], [x3, y3], [x4, y4]
        ]))

# 1. 執行 NMS 濾除重疊的軸對齊矩形 (不再使用 return_indices)
final_boxes_nms = non_max_suppression(np.array(rectangles),
                                      probs=confidence_score,
                                      overlapThresh=0.5)

# 2. 找出通過 NMS 的矩形在原始列表中的索引
# 注意：這是一個間接且可能效率較低的方法，但能解決 imutils 版本的限制。
final_rotated_boxes = []
# 將原始 rectangles 轉換為 NumPy 陣列以便比較
rectangles_np = np.array(rectangles) 
# 由於 NMS 可能會對座標進行微調，我們不能直接進行全等比較。
# 更穩健的方法是依賴於矩形數量和順序。但更安全的做法是使用一個標記陣列。

# 假設 NMS 返回的順序和原始輸入順序一致 (這通常是 imutils 的行為)
# 並且 NMS 輸出的是原始輸入的一個子集。

# 使用一個標記陣列來追蹤哪些矩形被保留
is_kept = np.zeros(len(rectangles_np), dtype=bool)

# 為了防止浮點數誤差導致匹配失敗，我們將座標四捨五入到整數
final_boxes_nms_int = np.round(final_boxes_nms).astype(int)
rectangles_np_int = np.round(rectangles_np).astype(int)

# 遍歷通過 NMS 的矩形
for (fx1, fy1, fx2, fy2) in final_boxes_nms_int:
    # 在原始矩形列表中尋找匹配項
    # 找到所有座標都匹配的索引
    matches = np.where((rectangles_np_int[:, 0] == fx1) & 
                       (rectangles_np_int[:, 1] == fy1) & 
                       (rectangles_np_int[:, 2] == fx2) & 
                       (rectangles_np_int[:, 3] == fy2))[0]
    
    if len(matches) > 0:
        # 由於 NMS 可能會保留多個具有相同軸對齊外框，但不同旋轉資訊的框
        # 我們只取第一個未被標記的匹配項的索引
        for idx in matches:
            if not is_kept[idx]:
                final_rotated_boxes.append(rotated_boxes_coords[idx])
                is_kept[idx] = True # 標記為已處理
                break # 跳出內層迴圈，處理下一個 NMS 框

# 3. 繪製結果
for box_points_scaled in final_rotated_boxes:
    # 原始頂點座標是基於 EAST 輸出尺寸的，需要映射回原圖尺寸
    
    # 複製一份頂點，避免修改原始列表中的 NumPy 陣列
    box_points_scaled_copy = copy.deepcopy(box_points_scaled) 
    
    # 將頂點座標映射回原圖尺寸
    box_points_scaled_copy[:, 0] = box_points_scaled_copy[:, 0] * w_ratio
    box_points_scaled_copy[:, 1] = box_points_scaled_copy[:, 1] * h_ratio
    
    # 確保頂點是整數
    box_points = np.intp(box_points_scaled_copy)
    
    # 繪製旋轉矩形 (綠色)
    cv2.polylines(img, [box_points], isClosed=True, color=(0, 255, 0), thickness=2)

# 額外篩選：只繪製面積大於 4000 的框 (如果需要，可保留此邏輯)
# 注意：在旋轉矩形的情況下，計算面積應使用 minAreaRect 或 Shoelace formula，
# 但為了簡單，我們可以先假設通過 NMS 的都是感興趣的車牌。
# 如果需要面積篩選，請在 NMS 之前或之後對旋轉矩形的實際面積進行計算。

cv2.imshow("EAST Rotated License Plate Detection", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [2]:
import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread("images/number.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(img, lang="eng")
print(text.strip())

img = cv2.imread("images/traditional.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(img, lang="chi_tra")
print(text.strip())

img = cv2.imread("images/simple.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(img, lang="chi_sim")
print(text.strip())


K4P1K
更改

輻

片尺寸和製作縮
清明 时 节 雨 纷纷 ， 路 上 行人 欲 断 魂 。


In [None]:
# 中英文參雜辨識
import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread("images/sample.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(img, lang="eng+chi_tra")
print(text.strip())

OpenCV
Python 程式 設計
DAT-4567


In [3]:
import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread("images/traditional2.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(img, lang="chi_tra_vert")
print(text.strip())


測試 垂直 文 字


In [4]:
import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread("images/number.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_w = img.shape[1]
img_h = img.shape[0]
boxes = pytesseract.image_to_boxes(img)
print(boxes)
for box in boxes.splitlines():
    box = box.split(" ")
    character = box[0]
    x = int(box[1])
    y = int(box[2])
    x2 = int(box[3])
    y2 = int(box[4])
    cv2.rectangle(img, (x, img_h - y),
                  (x2, img_h - y2), (0, 255, 0), 1)
    cv2.putText(img, character, (x, img_h - y2 - 10),
                cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1)
cv2.imshow("Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()


K 16 52 42 80 0
4 49 52 70 80 0
P 87 53 107 79 0
1 125 52 140 80 0
K 159 52 187 80 0



In [5]:
import cv2
import numpy as np
from imutils.object_detection import non_max_suppression
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread("images/car.jpg")
model = cv2.dnn.readNet("models/frozen_east_text_detection.pb")
outputLayers = []
outputLayers.append("feature_fusion/Conv_7/Sigmoid")
outputLayers.append("feature_fusion/concat_3")
height, width, colorch = img.shape
new_height = (height//32+1)*32
new_width = (width//32+1)*32
h_ratio = height/new_height
w_ratio = width/new_width
blob = cv2.dnn.blobFromImage(img, 1, (new_width, new_height),
                             (123.68, 116.78, 103.94), True)
model.setInput(blob)
(scores, geometry) = model.forward(outputLayers)
rectangles = []
confidence_score = []
rows = geometry.shape[2]
cols = geometry.shape[3]
for y in range(0, rows):
    for x in range(0, cols):
        if scores[0][0][y][x] < 0.5:
            continue
        offset_x = x*4
        offset_y = y*4
        bottom_x = int(offset_x + geometry[0][1][y][x])
        bottom_y = int(offset_y + geometry[0][2][y][x])
        top_x = int(offset_x - geometry[0][3][y][x])
        top_y = int(offset_y - geometry[0][0][y][x])
        rectangles.append((top_x, top_y, bottom_x, bottom_y))
        confidence_score.append(float(scores[0][0][y][x]))

final_boxes = non_max_suppression(np.array(rectangles),
                                  probs=confidence_score,
                                  overlapThresh=0.5)
for (x1, y1, x2, y2) in final_boxes:
    w = abs(x2-x1)
    h = abs(y2-y1)
    area = w * h
    if area > 4000:
        x1 = int(x1*w_ratio)
        y1 = int(y1*h_ratio)
        x2 = int(x2*w_ratio)
        y2 = int(y2*h_ratio)
        print("偵測和辨識出車牌文字!")
        result = img[y1-10:y1+h+13, x1-10:x1+w+1]
        cv2.imshow("Plate", result)
        text = pytesseract.image_to_string(result, lang="eng")
        print(text.strip())
        cv2.waitKey(0)

cv2.destroyAllWindows()


偵測和辨識出車牌文字!
BBT:6566


In [6]:
import cv2
import numpy as np
from imutils.object_detection import non_max_suppression
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread("images/car1.jpg")
model = cv2.dnn.readNet("models/frozen_east_text_detection.pb")
outputLayers = []
outputLayers.append("feature_fusion/Conv_7/Sigmoid")
outputLayers.append("feature_fusion/concat_3")
height, width, colorch = img.shape
new_height = (height//32+1)*32
new_width = (width//32+1)*32
h_ratio = height/new_height
w_ratio = width/new_width
blob = cv2.dnn.blobFromImage(img, 1, (new_width, new_height),
                             (123.68, 116.78, 103.94), True)
model.setInput(blob)
(scores, geometry) = model.forward(outputLayers)
rectangles = []
confidence_score = []
rows = geometry.shape[2]
cols = geometry.shape[3]
for y in range(0, rows):
    for x in range(0, cols):
        if scores[0][0][y][x] < 0.5:
            continue
        offset_x = x*4
        offset_y = y*4
        bottom_x = int(offset_x + geometry[0][1][y][x])
        bottom_y = int(offset_y + geometry[0][2][y][x])
        top_x = int(offset_x - geometry[0][3][y][x])
        top_y = int(offset_y - geometry[0][0][y][x])
        rectangles.append((top_x, top_y, bottom_x, bottom_y))
        confidence_score.append(float(scores[0][0][y][x]))

final_boxes = non_max_suppression(np.array(rectangles),
                                  probs=confidence_score,
                                  overlapThresh=0.5)
for (x1, y1, x2, y2) in final_boxes:
    w = abs(x2-x1)
    h = abs(y2-y1)
    area = w * h
    if area > 4000:
        x1 = int(x1*w_ratio)
        y1 = int(y1*h_ratio)
        x2 = int(x2*w_ratio)
        y2 = int(y2*h_ratio)
        print("偵測和辨識出車牌文字!")
        result = img[y1-5:y1+h+5, x1-1:x1+w+1]
        cv2.imshow("Plate", result)
        text = pytesseract.image_to_string(result, lang="eng")
        print(text.strip())
        cv2.waitKey(0)

cv2.destroyAllWindows()


偵測和辨識出車牌文字!
ABC-8888


In [7]:
'''
回傳結果:
1. bounding box:文字區域的邊界框四個點的座標
2. text:辨識出來的文字內容
3. confidence:信心指數
'''

import easyocr
import cv2

reader = easyocr.Reader(["en"])
result = reader.readtext("images/number.jpg")
print(result)

reader = easyocr.Reader(["ch_sim", "en"])
img = cv2.imread("images/simple.jpg")
result = reader.readtext(img)
print(result)

reader = easyocr.Reader(["ch_tra", "en"])
with open("images/traditional.jpg", "rb") as f:
    img = f.read()
result = reader.readtext(img)
print(result)


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[([[np.int32(7), np.int32(47)], [np.int32(193), np.int32(47)], [np.int32(193), np.int32(89)], [np.int32(7), np.int32(89)]], 'K4 P 1 K', np.float64(0.39847019310503506))]


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[([[np.int32(5), np.int32(7)], [np.int32(239), np.int32(7)], [np.int32(239), np.int32(51)], [np.int32(5), np.int32(51)]], '清明时节雨纷纷', np.float64(0.9471719233904865)), ([[np.int32(259), np.int32(7)], [np.int32(507), np.int32(7)], [np.int32(507), np.int32(51)], [np.int32(259), np.int32(51)]], '路上行人欲断魂。', np.float64(0.9462622045096404))]
[([[np.int32(33), np.int32(35)], [np.int32(449), np.int32(35)], [np.int32(449), np.int32(71)], [np.int32(33), np.int32(71)]], '更改圖片尺寸和製作縮圖', np.float64(0.5309929695577912))]


In [2]:
import easyocr
import cv2

img = cv2.imread("images/sample.jpg")
reader = easyocr.Reader(["ch_tra", "en"])
horizontal_list, free_list = reader.detect(img)
for box in horizontal_list[0]:
    print(box)
    cv2.rectangle(img, (box[0], box[2]), (box[1], box[3]),
                  (0, 0, 255), 3)
cv2.imshow("Detection", img)
cv2.waitKey(0)
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[np.int32(32), np.int32(159), np.int32(8), np.int32(47)]
[np.int32(6), np.int32(191), np.int32(43), np.int32(81)]
[np.int32(30), np.int32(178), np.int32(86), np.int32(118)]


In [3]:
import easyocr
import cv2

img = cv2.imread("images/sample.jpg")
reader = easyocr.Reader(["ch_tra", "en"])
results = reader.readtext("images/sample.jpg")
for result in results:
    box = result[0]
    cv2.rectangle(img, box[0], box[2], (0, 0, 255), 3)
cv2.imshow("Detection", img)
cv2.waitKey(0)
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [3]:
import easyocr
import cv2

boxes = [[32, 159, 8, 47],  # horizontal_list[0]
         [6, 191, 43, 81],
         [30, 178, 86, 118]]

img = cv2.imread("images/sample.jpg")
reader = easyocr.Reader(["ch_tra", "en"])
results = reader.recognize(img, horizontal_list=boxes,
                           free_list=[])
for result in results:
    print(result[0])
    print(result[1])
    print(result[2])


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[[32, 8], [159, 8], [159, 47], [32, 47]]
OpenCV
0.9790065595931878
[[6, 43], [191, 43], [191, 81], [6, 81]]
Python程式設計
0.8910885101124186
[[30, 86], [178, 86], [178, 118], [30, 118]]
DAT-4567
0.6394383716097745


In [5]:
import easyocr
import numpy as np
import cv2

img = cv2.imread("images/car.jpg")
reader = easyocr.Reader(["en"])
results = reader.readtext(img)
y = 0
for box in results:
    points = box[0]
    points = np.array(points, np.int32)
    print(points)
    print(box[1])
    cv2.polylines(img, pts=[points], isClosed=True,
                  color=(0, 0, 255), thickness=3)
    y = y + 30
    cv2.putText(img, box[1], (10, y),
                cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)

cv2.imshow("License Plate Recognition", img)
cv2.waitKey(0)
cv2.destroyAllWindows()


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[[266 427]
 [403 413]
 [405 459]
 [268 474]]
BBT-6566


In [5]:
import torch

print(torch.version.cuda)   # PyTorch 編譯時使用的 CUDA 版本

# 檢查 CUDA 是否可用
print(torch.cuda.is_available())

# 顯示偵測到的 GPU 型號
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

None
False
