In [None]:
import os
import evaluate
import cv2
from google.colab.patches import cv2_imshow
import joblib
from skimage.feature import hog
import numpy as np

In [None]:
# Process Dataset
PATH = '../../dataset/test'
images = []
labels = []

for filename in os.listdir(PATH):
  if filename.lower().endswith('.jpeg'):
    labels.append(filename[:-5])
  else:
    labels.append(filename[:-4])

  images.append(os.path.join(PATH, filename))

In [None]:
cer = evaluate.load('cer')
wer = evaluate.load('wer')
MODEL_PATH = '/content/drive/MyDrive/Projects/CompVis/FinalProject/OCR/HOG/hog_lreg_model_3.pkl'
clf = joblib.load(MODEL_PATH)

In [None]:
def ocr(PATH):
    im = cv2.imread(PATH)
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    ret, im_th = cv2.threshold(im_gray, 120, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    ctrs, hier = cv2.findContours(im_th, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    bboxes = [cv2.boundingRect(c) for c in ctrs]
    sorted_bboxes = sorted(bboxes, key=lambda b: b) # Sort by x-coordinate

    plate_char = []
    image_height, image_width = im.shape[:2]
    height_threshold = image_height * 0.3
    width_threshold = image_width * 0.3

    for num, i_bboxes in enumerate(sorted_bboxes):
        [x, y, w, h] = i_bboxes
        if h > height_threshold and w < width_threshold:
            cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 1)
            roi = im_gray[y:y + h, x:x + w]
            roi = cv2.resize(roi, (64, 128), interpolation=cv2.INTER_AREA)
            roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(1, 1))
            nbr = clf.predict(np.array([roi_hog_fd]))
            cv2.putText(im, str(nbr[0]), (x, y + h), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 200, 250), 3)
            plate_char.append(str(nbr[0]))

    return ''.join(plate_char)

In [None]:
pred = []
for i in range(len(images)):
  pred.append(ocr(images[i]))

In [None]:
word_accuracy = wer.compute(references=labels, predictions=pred)
character_accuracy = cer.compute(references=labels, predictions=pred)

print(f'wer: {word_accuracy}, cer: {character_accuracy}')

wer: 0.9, cer: 0.28169014084507044


In [None]:
for i,x in enumerate(zip(pred, labels)):
  print(i,x)

0 ('E5944TK', 'E5944TK')
1 ('4594', 'E4594Q')
2 ('B97363FK', 'B9736JFK')
3 ('H624AC', 'H6240AKC')
4 ('E3TO5OO', 'E5105OD')
5 ('E453GOK', 'E4538QK')
6 ('H314ONB', 'H3141NB')
7 ('E2TT8ST', 'E2118ST')
8 ('DB888TNRZ', 'B8837NR')
9 ('E5O53RC', 'E5053RG')
