# Number Plate Recognition

**Importing necessary modules**

In [1]:
import cv2
import pytesseract
import numpy as np
import imutils
import re
import os
from pathlib import Path
import glob 
import Levenshtein
import pathlib

**Specifying tesseract path**

In [2]:
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

### Preprocessing functions
First image is converted to greyscale, then is blurred using mean blur. Finally image is inverted.

In [3]:
def preProcess(img):
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)#greyscale
    blur = cv2.blur(gray_image,(2,2))#blurring
    inverted = 256- 1 - blur#inverting
    return inverted

Getting image edges, to find contours in order to determine plate location in  the `getplate(img,location)`.

In [4]:
def getContours(img):
    edged = cv2.Canny(img, 30, 200) #Edge detection
    keypoints = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) #Find contours 
    contours = imutils.grab_contours(keypoints) #Grab contours 
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10] #Sort contours
    
    #Looping over our contours to find the best possible approximate contour of 10 contours
    location = None
    for contour in contours:
        approx = cv2.approxPolyDP(contour, 10, True)
        if len(approx) == 4:
            location = approx
            break
    return location

Determining plate location for it to be sent to pytessseract ocr in order to read the plate numbers.

In [5]:
def getplate(img,location):
    mask = np.zeros(img.shape, np.uint8) #create blank image with same dimensions as the original image
    new_image = cv2.drawContours(mask, [location], 0,255, -1) #Draw contours on the mask image
    new_image = cv2.bitwise_and(img, img, mask=mask) #Take bitwise AND between the original image and mask image
    (x,y) = np.where(mask==255) #Find the co-ordinates of the four corners of the document
    (x1, y1) = (np.min(x), np.min(y)) #Find the top left corner
    (x2, y2) = (np.max(x), np.max(y)) #Find the bottom right corner
    cropped_image = img[x1:x2+1, y1:y2+1] #Crop the image using the co-ordinates
    return cropped_image

The following function preprocesses the image using the previously defined functions, then sends the extracted plate to pytesseract, to get the plate number.

In [6]:
def readplate(img):
    preProcessed_img=preProcess(img)
    contours=getContours(preProcessed_img)
    try:
        plate=getplate(preProcessed_img,contours)
        data = pytesseract.image_to_string(plate, lang='eng', config='--oem 3 --psm 7 tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
        #specifying pystesseract configure
        #--oem 3 => Using default engine
        #--psm 7 => Treating the image as a single text line.
        #tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ => Specifying characters to recognize
        data=re.sub(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$', '', data)
        #cleaning text using regular expressions to increase accuracy
        return data
    except:
        return ""        

a function that shows an image as in a pop-up window , to be used when necessary

In [7]:
def showim(img):
    cv2.imshow('', img)
    cv2.waitKey(0)
    # Window shown waits for any key pressing event
    cv2.destroyAllWindows()

## First try: 
Extracting plate numbers without any preprocessing

In [8]:
def raw_readplate(img): #This functions uses tesseract to get plate number , but without any preprocessing
        data = pytesseract.image_to_string(img, lang='eng', config='--oem 3 --psm 7')
        data=re.sub(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$', '', data)
        return data       

In [10]:
print(f"{'Actual License Plate':<25}{'Predicted License Plate':<29}{'Accuracy':<8}")
print("-" * 70)
i=0
sum=0
folder_dir = 'dataset'
images = Path(folder_dir).glob('*.png') 
#looping over images in a specified directory, accuracy is calculated using levenshtein ratio.
for image in images:
    i+=1
    actual_plate=pathlib.Path(image).stem
    predicted_plate=raw_readplate(cv2.imread(image))
    accuracy=Levenshtein.ratio(actual_plate, predicted_plate)
    sum+=accuracy
    print(f"{actual_plate:<25}{predicted_plate:<30}{accuracy:.2f}")
Overall_accuracy=(sum/i)
print(("-"*70),f"{'':<25}{'Overall Accuracy:':<30}{Overall_accuracy:<10.2f}",sep="\n")

Actual License Plate     Predicted License Plate      Accuracy
----------------------------------------------------------------------
172 TMJ                  e                             0.00
42-UK-32                                               0.00
ALR 486                  a                             0.00
BKTP 665                                               0.00
Cars84                                                 0.00
CH01AN0001                                             0.00
CZ17 KOD                                               0.00
DAN 54P                                                0.00
DL7C N 5617                                            0.00
DZI7 YXR                                               0.00
G526 JHD                                               0.00
GJ03JL0126               s ‘2 == 5                     0.11
GJW-1-15-A-1138                                        0.00
HR 26 BR 9044                                          0.00
HR 26 CT 6702             

> As we can conclude, no preprocessing or edge detection yields terrible accuracy, which means that preprocessing and edge detection are crucial steps before using pytesseract.

## Second try: 
Extracting plate numbers with some preprocessing (only converting to grey scale, blurring and inverting image, without any contouring or edge detection)

In [11]:
def nocontouring_readplate(img):
    preProcessed_img=preProcess(img)
    try:
        data = pytesseract.image_to_string(img, lang='eng', config='--oem 3 --psm 7 tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
        #specifying pystesseract configure
        #--oem 3 => Using default engine
        #--psm 7 => Treating the image as a single text line.
        #tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ => Specifying characters to recognize
        data=re.sub(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$', '', data)
        #cleaning text using regular expressions to increase accuracy
        return data
    except:
        return ""        

In [12]:
print(f"{'Actual License Plate':<25}{'Predicted License Plate':<29}{'Accuracy':<8}")
print("-" * 70)
i=0
sum=0
folder_dir = 'dataset'
images = Path(folder_dir).glob('*.png')
for image in images:
    i+=1
    actual_plate=pathlib.Path(image).stem
    predicted_plate=nocontouring_readplate(cv2.imread(image))
    accuracy=Levenshtein.ratio(actual_plate, predicted_plate)
    sum+=accuracy
    print(f"{actual_plate:<25}{predicted_plate:<30}{accuracy:.2f}")
Overall_accuracy=(sum/i)
print(("-"*70),f"{'':<25}{'Overall Accuracy:':<30}{Overall_accuracy:<10.2f}",sep="\n")

Actual License Plate     Predicted License Plate      Accuracy
----------------------------------------------------------------------
172 TMJ                  e                             0.00
42-UK-32                                               0.00
ALR 486                  a                             0.00
BKTP 665                                               0.00
Cars84                                                 0.00
CH01AN0001                                             0.00
CZ17 KOD                                               0.00
DAN 54P                                                0.00
DL7C N 5617                                            0.00
DZI7 YXR                                               0.00
G526 JHD                                               0.00
GJ03JL0126               s ‘2 == 5                     0.11
GJW-1-15-A-1138                                        0.00
HR 26 BR 9044                                          0.00
HR 26 CT 6702             

> As we can conclude, preprocessing without edge detection yields terrible accuracy, which means that preprocessing didn't enhace our results for this case and edge detection is a crucial step before using pytesseract, too.

## Third try: 
Extracting plate numbers without preprocessing but using edge detection

In [13]:
def onlyContouring_readplate(img):
    contours=getContours(img)
    try:
        plate=getplate(img,contours)
        data = pytesseract.image_to_string(plate, lang='eng', config='--oem 3 --psm 7 tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
        #specifying pystesseract configure
        #--oem 3 => Using default engine
        #--psm 7 => Treating the image as a single text line.
        #tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ => Specifying characters to recognize
        data=re.sub(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$', '', data)
        #cleaning text using regular expressions to increase accuracy
        return data
    except:
        return ""      

In [14]:
print(f"{'Actual License Plate':<25}{'Predicted License Plate':<29}{'Accuracy':<8}")
print("-" * 70)
i=0
sum=0
folder_dir = 'dataset'
images = Path(folder_dir).glob('*.png')
for image in images:
    i+=1
    actual_plate=pathlib.Path(image).stem
    predicted_plate=onlyContouring_readplate(cv2.imread(image))
    accuracy=Levenshtein.ratio(actual_plate, predicted_plate)
    sum+=accuracy
    print(f"{actual_plate:<25}{predicted_plate:<30}{accuracy:.2f}")
Overall_accuracy=(sum/i)
print(("-"*70),f"{'':<25}{'Overall Accuracy:':<30}{Overall_accuracy:<10.2f}",sep="\n")

Actual License Plate     Predicted License Plate      Accuracy
----------------------------------------------------------------------
172 TMJ                                                0.00
42-UK-32                                               0.00
ALR 486                                                0.00
BKTP 665                                               0.00
Cars84                                                 0.00
CH01AN0001                                             0.00
CZ17 KOD                                               0.00
DAN 54P                                                0.00
DL7C N 5617                                            0.00
DZI7 YXR                                               0.00
G526 JHD                                               0.00
GJ03JL0126                                             0.00
GJW-1-15-A-1138                                        0.00
HR 26 BR 9044                                          0.00
HR 26 CT 6702             

> As we can conclude, edge detection without preprocessing gives an even worse result, which means that preprocessing is a crucial step before using pytesseract.

## Fourth Try:
having some preprocessing alongside contouring , and actually having results

In [15]:
def minimal_preProcess1(img):
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)#greyscale
    blur = cv2.GaussianBlur(gray_image, (3, 3), 0)#blurring
    inverted = 256- 1 - gray_image#inverting
    return inverted

In [16]:
def minimal_preprocessing_readplate(img):
    preProcessed_img=minimal_preProcess1(img)
    contours=getContours(preProcessed_img)
    try:
        plate=getplate(preProcessed_img,contours)
        data = pytesseract.image_to_string(plate, lang='eng', config='--oem 3 --psm 7 tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
        #specifying pystesseract configure
        #--oem 3 => Using default engine
        #--psm 7 => Treating the image as a single text line.
        #tessedit_char_whitelist=-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ => Specifying characters to recognize
        data=re.sub(r'^[^a-zA-Z0-9]+|[^a-zA-Z0-9]+$', '', data)
        #cleaning text using regular expressions to increase accuracy
        return data
    except:
        return ""        

In [17]:
print(f"{'Actual License Plate':<25}{'Predicted License Plate':<29}{'Accuracy':<8}")
print("-" * 70)
i=0
sum=0
folder_dir = 'dataset'
images = Path(folder_dir).glob('*.png')
for image in images:
    i+=1
    actual_plate=pathlib.Path(image).stem
    predicted_plate=minimal_preprocessing_readplate(cv2.imread(image))
    accuracy=Levenshtein.ratio(actual_plate, predicted_plate)
    sum+=accuracy
    print(f"{actual_plate:<25}{predicted_plate:<30}{accuracy:.2f}")
Overall_accuracy=(sum/i)
print(("-"*70),f"{'':<25}{'Overall Accuracy:':<30}{Overall_accuracy:<10.2f}",sep="\n")

Actual License Plate     Predicted License Plate      Accuracy
----------------------------------------------------------------------
172 TMJ                  172 TMJ                       1.00
42-UK-32                 w= —— a                       0.00
ALR 486                  ALR 466                       0.86
BKTP 665                                               0.00
Cars84                                                 0.00
CH01AN0001               CHO1ANO001                    0.80
CZ17 KOD                 CZI7 KOD                      0.88
DAN 54P                  DAN 54P                       1.00
DL7C N 5617              DL7C N 5617                   1.00
DZI7 YXR                 DZI7 YXR                      1.00
G526 JHD                                               0.00
GJ03JL0126               G6J03JL0126                   0.95
GJW-1-15-A-1138          GJW-1-15-A-1138               1.00
HR 26 BR 9044            HR 26 BR 9044                 1.00
HR 26 CT 6702            H

#### As we can see, preprocessing and using gaussian blur with kernel of 3*3 increased our acuuracy by about 60%
we tried other kernel values for gaussian blur that gave us the following accuracies:

kernel size &nbsp;&nbsp;&nbsp; accuracy

- 5*5 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 0.60
- 7*7 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 0.49
- 9*9 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 0.36


## Fifth Try:
trying another preprocessing approach (using mean/average blur instead of gaussian) alongside contouring, to see whether accuracy increases.

In [18]:
print(f"{'Actual License Plate':<25}{'Predicted License Plate':<29}{'Accuracy':<8}")
print("-" * 70)
i=0
sum=0
folder_dir = 'dataset'
images = Path(folder_dir).glob('*.png')
for image in images:
    i+=1
    actual_plate=pathlib.Path(image).stem
    predicted_plate=readplate(cv2.imread(image))
    accuracy=Levenshtein.ratio(actual_plate, predicted_plate)
    sum+=accuracy
    print(f"{actual_plate:<25}{predicted_plate:<30}{accuracy:.2f}")
Overall_accuracy=(sum/i)
print(("-"*70),f"{'':<25}{'Overall Accuracy:':<30}{Overall_accuracy:<10.2f}",sep="\n")

Actual License Plate     Predicted License Plate      Accuracy
----------------------------------------------------------------------
172 TMJ                  172 TMJ                       1.00
42-UK-32                 4 oe                          0.17
ALR 486                  ALR 466                       0.86
BKTP 665                 j BKTP- 665                   0.84
Cars84                   ff i                          0.00
CH01AN0001               CHO1AN0001                    0.90
CZ17 KOD                 C217 KOD                      0.88
DAN 54P                  DAN 54P                       1.00
DL7C N 5617              DL7C N 5617                   1.00
DZI7 YXR                 DZI7 YXR                      1.00
G526 JHD                 G526 JHD                      1.00
GJ03JL0126               6J03JL0126                    0.90
GJW-1-15-A-1138          GIW-1-15-A-1138               0.93
HR 26 BR 9044            HR 26 BR 9044                 1.00
HR 26 CT 6702            H

#### As we can see, preprocessing and using normalized box blur with kernel of 2*2 increased our acuuracy to 0.87
we tried other kernel values for box blur that gave us the following accuracies:

kernel size &nbsp;&nbsp;&nbsp; accuracy

- 3*3 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 0.66
- 4*4 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 0.37
- 5*5 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 0.18


### so we can conclude that this approach:
***greysclaing->box blurring of kernel(2,2)->invertion->contouring*** yeilds the best possible result with accuracy of 0.87