# Import Libraries

In [31]:
import os
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense, Lambda, Activation, BatchNormalization, Dropout

import sys
from glob import glob
sys.path.append('D:/gits/Newa-OCR/')   

from utils.preprocess import *
from utils.dict import *
from utils.load_model import *
from utils.inference import *
from utils.translate import *

# List Models

In [26]:
root_dir = '../models'
models = []
for root, dir, files in os.walk(root_dir):
    dirs = root.split(os.sep)
    if len(dirs) > 2 and dirs[-2] in ['pracalit','ranjana','pracalit_ranjana']:
        models.append([dirs[-2],dirs[-1]])

In [27]:
print(models)

[['pracalit', 'gray'], ['pracalit', 'rgb'], ['pracalit_ranjana', 'gray'], ['pracalit_ranjana', 'rgb'], ['ranjana', 'gray'], ['ranjana', 'rgb']]


# Parse Through Models and get Metrics

In [62]:
checkpoint= 'best'


for model in models:
    
    # model dirs
    model_dir = '../models/' + "/".join(model)
    dataset_dir = f"../dataset/{model[-2]}/{model[-1]}"

    # color channels
    color_channel = model[-1]
    
    # get num_of_characters and char_dict
    num_of_characters, char_dict = get_dict(model_dir)

    # get model
    ocr_model = ocr_model = load_model_weights(model_dir, checkpoint, color_channel, num_of_characters)

    # get image paths and preprocess
    with open(f"{dataset_dir}/label.txt","r",encoding='utf=8') as f:
        infos = f.readlines()
    image_paths = [info.split("\t")[0].replace("/kaggle/input/dataset/","../dataset/").replace("test/","") for info in infos]

    images = [cv2.imread(image_path) for image_path in image_paths]
    if color_channel =='gray':
        images = [preprocess_for_gray_channel(image) for image in images]
    else:
        images = [preprocess_for_rgb_channel(image) for image in images]
    
    # predicted labels
    prediction = [predict(ocr_model, image, char_dict) for image in images]
    
    # get true labels
    true_labels = [label.split("\t")[-1].strip() for label in infos] 

    # evaluate
    correct_char = 0
    total_char = 0
    correct = 0
    for i in range(len(true_labels)):
        
        pr = prediction[i]
        tr = true_labels[i]
        total_char += len(tr)
        
        for j in range(min(len(tr), len(pr))):
            if tr[j] == pr[j]:
                correct_char += 1
                
        if pr == tr :
            correct += 1 

    print("**********************************")
    print(f"For {checkpoint}  Model {model[-1]} {model[-2]}")
    print('Correct characters predicted : %.2f%%' %(correct_char*100/total_char))
    print('Correct words predicted      : %.2f%%' %(correct*100/len(true_labels)))
    print("**********************************")

**********************************
For best  Model gray pracalit
Correct characters predicted : 46.01%
Correct words predicted      : 47.50%
**********************************
**********************************
For best  Model rgb pracalit
Correct characters predicted : 0.21%
Correct words predicted      : 0.00%
**********************************


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [None]:
checkpoint= 'latest'


for model in models:
    
    # model dirs
    model_dir = '../models/' + "/".join(model)
    dataset_dir = f"../dataset/{model[-2]}/{model[-1]}"

    # color channels
    color_channel = model[-1]
    
    # get num_of_characters and char_dict
    num_of_characters, char_dict = get_dict(model_dir)

    # get model
    ocr_model = ocr_model = load_model_weights(model_dir, checkpoint, color_channel, num_of_characters)

    # get image paths and preprocess
    with open(f"{dataset_dir}/label.txt","r",encoding='utf=8') as f:
        infos = f.readlines()
    image_paths = [info.split("\t")[0].replace("/kaggle/input/dataset/","../dataset/").replace("test/","") for info in infos]

    images = [cv2.imread(image_path) for image_path in image_paths]
    if color_channel =='gray':
        images = [preprocess_for_gray_channel(image) for image in images]
    else:
        images = [preprocess_for_rgb_channel(image) for image in images]
    
    # predicted labels
    prediction = [predict(ocr_model, image, char_dict) for image in images]
    
    # get true labels
    true_labels = [label.split("\t")[-1].strip() for label in infos] 

    # evaluate
    correct_char = 0
    total_char = 0
    correct = 0
    for i in range(len(true_labels)):
        
        pr = prediction[i]
        tr = true_labels[i]
        total_char += len(tr)
        
        for j in range(min(len(tr), len(pr))):
            if tr[j] == pr[j]:
                correct_char += 1
                
        if pr == tr :
            correct += 1 

    print("**********************************")
    print(f"For {checkpoint}  Model {model[-1]} {model[-2]}")
    print('Correct characters predicted : %.2f%%' %(correct_char*100/total_char))
    print('Correct words predicted      : %.2f%%' %(correct*100/len(true_labels)))
    print("**********************************")