# Extracting Serial Number from Meter Images

## Install & Import Dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
import os

import easyocr
from glob import glob
import xml.etree.ElementTree as xet

import warnings
warnings.filterwarnings("ignore")

## Extracting only .xml file from the sample_fullimgs folder

In [2]:
path = glob('./sample_fullimgs/*.xml')
path

['./sample_fullimgs\\1622603162_0.xml',
 './sample_fullimgs\\1623370619_0.xml',
 './sample_fullimgs\\1623372505_0.xml',
 './sample_fullimgs\\1623373578_0.xml',
 './sample_fullimgs\\1623375167_0.xml',
 './sample_fullimgs\\1623377917_0.xml',
 './sample_fullimgs\\1623378080_0.xml',
 './sample_fullimgs\\1623378516_0.xml',
 './sample_fullimgs\\1623378846_0.xml',
 './sample_fullimgs\\1623379074_0.xml',
 './sample_fullimgs\\1623380464_0.xml',
 './sample_fullimgs\\1623381606_0.xml',
 './sample_fullimgs\\1623385560_0.xml',
 './sample_fullimgs\\1623387508_0.xml',
 './sample_fullimgs\\1623388359_0.xml',
 './sample_fullimgs\\1623389185_0.xml',
 './sample_fullimgs\\1623391183_0.xml',
 './sample_fullimgs\\1623392964_0.xml',
 './sample_fullimgs\\1623394746_0.xml',
 './sample_fullimgs\\1623395973_0.xml',
 './sample_fullimgs\\1623396672_0.xml',
 './sample_fullimgs\\1623397626_0.xml',
 './sample_fullimgs\\1623397782_0.xml',
 './sample_fullimgs\\1623398111_0.xml',
 './sample_fullimgs\\1623399315_0.xml',


## Extracting coordinates of the bounding box

In [3]:
labels_dict = dict(filepath=[],xmin=[],xmax=[],ymin=[],ymax=[])

for filename in path:
    info = xet.parse(filename)
    root = info.getroot()
    member_object = root.find('object')
    labels_info = member_object.find('bndbox')
    
    xmin = int(labels_info.find('xmin').text)
    xmax = int(labels_info.find('xmax').text)    
    ymin = int(labels_info.find('ymin').text)    
    ymax = int(labels_info.find('ymax').text)
    
    labels_dict['filepath'].append(filename)
    labels_dict['xmin'].append(xmin)    
    labels_dict['xmax'].append(xmax)    
    labels_dict['ymin'].append(ymin)    
    labels_dict['ymax'].append(ymax)    

## Making DataFrame of coordinates with their respective images

In [4]:
df = pd.DataFrame(labels_dict)
df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,./sample_fullimgs\1622603162_0.xml,194,298,475,506
1,./sample_fullimgs\1623370619_0.xml,175,394,304,350
2,./sample_fullimgs\1623372505_0.xml,214,415,788,844
3,./sample_fullimgs\1623373578_0.xml,41,283,475,537
4,./sample_fullimgs\1623375167_0.xml,177,399,558,604


## Saving above new data frame on local machine 

In [5]:
#df.to_csv('labels.csv',index=False)

## Reading label dataset

In [6]:
df = pd.read_csv('labels.csv')
df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,./sample_fullimgs\1622603162_0.xml,194,298,475,506
1,./sample_fullimgs\1623370619_0.xml,175,394,304,350
2,./sample_fullimgs\1623372505_0.xml,214,415,788,844
3,./sample_fullimgs\1623373578_0.xml,41,283,475,537
4,./sample_fullimgs\1623375167_0.xml,177,399,558,604


In [7]:
df.shape

(47, 5)

## Function to extract File Path from .xml file

In [8]:
def getFilename(filename):
    filename_image = xet.parse(filename).getroot().find('filename').text
    filepath_image = os.path.join('./sample_fullimgs',filename_image)
    return filepath_image

In [9]:
image_path = list(df['filepath'].apply(getFilename))
image_path

['./sample_fullimgs\\1622603162_0.jpg',
 './sample_fullimgs\\1623370619_0.jpg',
 './sample_fullimgs\\1623372505_0.jpg',
 './sample_fullimgs\\1623373578_0.jpg',
 './sample_fullimgs\\1623375167_0.jpg',
 './sample_fullimgs\\1623377917_0.jpg',
 './sample_fullimgs\\1623378080_0.jpg',
 './sample_fullimgs\\1623378516_0.jpg',
 './sample_fullimgs\\1623378846_0.jpg',
 './sample_fullimgs\\1623379074_0.jpg',
 './sample_fullimgs\\1623380464_0.jpg',
 './sample_fullimgs\\1623381606_0.jpg',
 './sample_fullimgs\\1623385560_0.jpg',
 './sample_fullimgs\\1623387508_0.jpg',
 './sample_fullimgs\\1623388359_0.jpg',
 './sample_fullimgs\\1623389185_0.jpg',
 './sample_fullimgs\\1623391183_0.jpg',
 './sample_fullimgs\\1623392964_0.jpg',
 './sample_fullimgs\\1623394746_0.jpg',
 './sample_fullimgs\\1623395973_0.jpg',
 './sample_fullimgs\\1623396672_0.jpg',
 './sample_fullimgs\\1623397626_0.jpg',
 './sample_fullimgs\\1623397782_0.jpg',
 './sample_fullimgs\\1623398111_0.jpg',
 './sample_fullimgs\\1623399315_0.jpg',


In [10]:
labels = df.iloc[:,1:].values
len(labels)

47

## Extracting Serial number from meter Image

In [11]:
serial_no = []
for i in range(len(labels)):
    file_path = image_path[i]
    img = cv2.imread(file_path)

    x1,x2,y1,y2 = labels[i]
    cv2.rectangle(img,(x1,y1),(x2,y2),(0,255,0),2)
    #plt.imshow(img)
    
    cropped_img = img[y1:y2, x1:x2]
    #plt.imshow(cropped_img)

    reader = easyocr.Reader(['en'])
    result = reader.readtext(cropped_img)
    #result[0][-2]
    try:
        serial_no.append(result[0][-2])
    except:
        serial_no.append('Unable to read serial no')
    

CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.
CUDA not available - defaulting to CPU. Note: This module is much

## Function of extract File Name of Images

In [12]:
def getFilename(filename):
    filename_image = xet.parse(filename).getroot().find('filename').text
    return filename_image

In [13]:
image_name = list(df['filepath'].apply(getFilename))

## Dataset of image name with their respective serial number number

In [14]:
sn_df = pd.DataFrame(list(zip(image_name , serial_no )), columns = ['Image_Name',"Serial_No"])
sn_df.head()

Unnamed: 0,Image_Name,Serial_No
0,1622603162_0.jpg,Ezz58411
1,1623370619_0.jpg,63113879
2,1623372505_0.jpg,75382
3,1623373578_0.jpg,1284`
4,1623375167_0.jpg,31040176


## Save sn_df dataset

In [15]:
#df.to_csv('Meter_SerialNo.csv',index=False)