In [1]:
import os
import cv2
from jiwer import cer
import pytesseract
import numpy as np
import imutils

import re

from PIL import Image

pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'

# Implement this function that will return a string representing what in seen in the given image
def get_plate_text(img) -> str:

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to grey scale
    gray = cv2.bilateralFilter(gray, 5, 17, 17) #Blur to reduce noise
#     gray = cv2.medianBlur(gray,5)
    
    med_val = np.median(gray) 
    lower = int(max(0 ,0.7*med_val))
    upper = int(min(255,1.3*med_val))

    edged = cv2.Canny(gray, lower, upper)
    
    try:
        
        cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
        screenCnt = None

        for c in cnts:
            peri = cv2.arcLength(c, True)
            approx = cv2.approxPolyDP(c, 0.018 * peri, True)
            if len(approx) == 4:
                screenCnt = approx
                break

        mask = np.zeros(gray.shape,np.uint8)
        new_image = cv2.drawContours(mask,[screenCnt],0,255,-1,)
        new_image = cv2.bitwise_and(image,image,mask=mask)

        (x, y) = np.where(mask == 255)
        (topx, topy) = (np.min(x), np.min(y))
        (bottomx, bottomy) = (np.max(x), np.max(y))
        Cropped = gray[topx:bottomx+1, topy:bottomy+1]

        text = pytesseract.image_to_string(Cropped, config='-l eng --psm 6 --oem 1')
    
    except:
        
        text = pytesseract.image_to_string(gray, config='-l eng --psm 6 --oem 1')
        
    
    return text

In [2]:
img_dir = "plate_images/"
number_of_plates = 0
error = 0.0
# Loops through the given directory reading images in
for filename in os.listdir(img_dir):
    # Loads the image feel free to do this using whatever library you like opencv as an example
    img = cv2.imread(os.path.join(img_dir, filename))
    # Gets the actual from the file name
    actual = filename.split("_")[0]
    guess = get_plate_text(img)
    
    
    pattern = r'[A-Z0-9]+'

    matches = re.findall(pattern, guess)
    merged_string = ''.join(matches)
    
    print(merged_string)

    
    # This is one way to get the error of the guess compared to actual feel free to evaluate your algorithm differently
    error += cer(actual, merged_string)
    number_of_plates += 1

accuracy = (1 - error/number_of_plates) * 100
print("Overall accuracy: ", accuracy)

001YDUB
TOONS
06TROY
121
L0SCLEN
11094



11C2NB

INT4M
EE
12C750A

11C73V0

TUET3P

IVBEX
IVIT3K
A
217816

N20L07

482TT
55TEW
5959HO
836217

90990
909GRN
9310

QUEERS

C1
F
EAJUN703

PAK18

102377
LAOGDOL


WERE
508721
AR971
P


AU08ZN
AUC84B
AUD007
AY02T
LCCAYAD18R
SRSAYAD54G
B3SSIE

BLOT

BC64Z

BF67CF
86355U


SBJB14D
BJB61K
TEEBJM1L
EELBKC15
BLJ34D
RTT
EBNU40W
BP235
A

ERO5CYFR

JES05U0

BT03AN

BTT795


ETE13N
BEX9TH
BX65D1
BZ061
BZR37LL
CA00PX
CA1501NREL
CAD83H


CE27A0
ECEA35T
C
CER80T


CF12KX
CF16FY
F557H
WGHO0CY
ECHP64K
C1Z17W
ICI137A
C2
CKOBXX
CL1ZH
3CL53B
T537
SS

85P
ECNV01Z
C0636

E
CPZILE
CP42EP

LCPE44L

J
CPT32A
C391BE

CREDXR


FAT

H
C138R
ICT85CYL
CT92FH

ULICK

CL5400

T
C23A
CW49UA24
CH980
CWNT5E
CX09LM


C30TE
SCYA0RN

CY6611
77870
C17

A71GH
DB35KAJ
IDB59BO
DB96RL
DBA76QR

DC14XJ

C60FC
DC63MP
DC66EJ
C9501
OCN51W
5


DDM18H
T
DE500I
SE
LDJ24F
EDD357
DKP30D
ET20KH

EDLC25
PPTRTET
L270


DNT08N


EDPW03A
DPY68R

300293



DTS20
DVR65M
EDXD29K
LDXSE
DXN02RB
DXZ80