In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import PyPDF4 as pdf
from AutoEvidence import get_img_from_page
import glob
import extract_digit_images_v2 as edi2
import extract_digit_images_v1a as edi1
from tensorflow.keras.models import load_model
model = load_model('model.h5')

In [None]:
def ExtractID(img_segs, model):
    img_segs = np.array([np.array(x) for x in img_segs])
    n = int(img_segs.size/(28*28))
    img_segs = 1 - img_segs.reshape(n, 28, 28, 1).astype('float32') / 255.0  # invert and convert to max = 1.0
    prediction = model.predict(img_segs)
    if n>9:
        p = np.amax(prediction,axis=1)
        # print(f'found more digits than 9, dropping the digits with the least confidence from:\n{p}')
    while prediction.shape[0] > 9:
        p = np.amax(prediction,axis=1)
        i = p.argmin()
        prediction = np.delete(prediction,i,axis=0)
    id = [str(x) if x<10 else 'M' for x in prediction.argmax(axis=1)]
    return ''.join(id)

In [None]:
# %%time
ID_box = [0.5,0,1,0.15]
files = glob.glob('./test/*.pdf')
figsz = (12,6)

n_p = 0
n_f = 0
n_p2 = 0
n_f2 = 0

for f in files:
    pdf_reader = pdf.PdfFileReader(f)
    img = get_img_from_page(pdf_reader.getPage(0), pdf_reader)
    w, h = img.size
    img = img.crop((ID_box[0]*w, ID_box[1]*h, ID_box[2]*w, ID_box[3]*h))

    im = edi2.convert_im(img, conversion='grey')
    b_xy,b_w,b_h = edi2.find_box(im)
    boxes = edi2.find_digit_boxes(b_xy,b_w,b_h,9)

    im_segments = edi2.GetDigitImages(img)
    ID = ExtractID(im_segments,model)    

    im_segments2 = edi1.GetDigitImages(img.crop((b_xy[0]-5, b_xy[1]-10, b_xy[0]+b_w+5, b_xy[1]+b_h+10)))
    ID2 = ExtractID(im_segments2,model)
    
    correct = f[7:16]
    show_plot = (ID != correct) and (n_p+n_f+1 not in [7,11,14,28,43]) # list of number scriber issues
#     show_plot = False
    
    if show_plot:
        print('*'*20)
        print('*'*8 + f' {n_p+n_f+1: 2d} ' + '*'*8)
        print('*'*20)
    
        plt.figure(figsize=figsz)
        ax = plt.subplot(1, 1, 1)
        ax.axis('off')
        ax.imshow(img)
        rect = patches.Rectangle(b_xy,b_w,b_h,linewidth=1,edgecolor='lime',facecolor='none')
        ax.add_patch(rect)
        for b in boxes:
            rect = patches.Rectangle(b[0],b[1],b[2],linewidth=1,edgecolor='red',facecolor='none')
            ax.add_patch(rect)
        plt.show()

    if ID in f:
        n_p += 1
        if show_plot: print(f'correct -> {ID} = {correct}')
    else:
        n_f += 1
        if show_plot: print(f'wrong -> {ID} != {correct}')        

    if show_plot:
        print(f'segs')        
        plt.figure(figsize=figsz)
        for i in range(len(im_segments)):
            ax = plt.subplot(1,len(im_segments),i+1)
            ax.axis('off')
            plt.imshow(im_segments[i])
        plt.show()
        print(f'ID')  
        plt.figure(figsize=(figsz[0],1))
        ax = plt.subplot(1, 1, 1)
        ax.axis('off')
        plt.text(0,0,''.join([f'  {x}  ' for x in ID]),fontsize=39)
        plt.show()

    if ID2 in f:
        n_p2 += 1
        if show_plot: print(f'correct -> {ID} = {correct}')
    else:
        n_f2 += 1
        if show_plot: print(f'wrong -> {ID} != {correct}')        

    if show_plot:
        print(f'segs2')  
        plt.figure(figsize=figsz)
        for i in range(len(im_segments2)):
            ax = plt.subplot(1,len(im_segments2),i+1)
            ax.axis('off')
            plt.imshow(im_segments2[i])
        plt.show()
        print(f'ID2')  
        plt.figure(figsize=(figsz[0],1))
        ax = plt.subplot(1, 1, 1)
        ax.axis('off')
        plt.text(0,0,''.join([f'  {x}  ' for x in ID2]),fontsize=39)
        plt.show()

print(f'Pass percent: {n_p/(n_p+n_f):.2%}')
print(f'Pass percent2:{n_p2/(n_p2+n_f2):.2%}')