# Recognizer
This notebook will be used to validate your model and grade your work.

# Collect features and execute model (YOUR IMPLEMENTATION HERE)
You need to implement the following function that predicts the digits that are available in an image. The function should return an array with 4 elements, containing these 4 digits from left to right.

In [14]:
import glob
import os
import sklearn
import joblib
import skimage, skimage.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform.pyramids import pyramid_laplacian
import skimage
from skimage import io,morphology
import pickle

In [15]:
def image_to_binary(image,threshold_corretion=0):
    image = skimage.color.rgb2gray(image)
    thresh = skimage.filters.threshold_otsu(image)
    binary = image > (thresh +threshold_corretion)
    binary = np.invert(binary)
    binary = morphology.dilation(binary,morphology.disk(1))
    return binary

In [16]:
def split_digits_into_image_array(image):
    images = list()
    for i in range(4):
        images.append(image[0:31,(32 * i):(32*i + 31)])
    return images

In [17]:
def laplacian_pyramid_from_image(image):
    pyramid = tuple(pyramid_laplacian(image,multichannel=False))
    return pyramid

In [18]:
def transform_pyramid_into_1d_array(pyramid,from_layer):
    image_array = list()
    for pyr in pyramid[from_layer:]:
        for row in pyr:
            image_array.extend(row)
    return image_array

In [19]:
def prepare_laplacian_pyramid(pyramid):
    new_pyramid = list()
    for pyr in pyramid:
        pyr[pyr > 0.001] = 1
        pyr = np.around(pyr)
        new_pyramid.append(pyr)
    return new_pyramid

In [20]:
# Load the preprocessor (REPLACE WITH YOUR CODE)
# preproc = joblib.load('../classifiers/best_preprocessor.pkl') 

# Load your final model (REPLACE WITH YOUR CODE)
# laplacian_ensemble_classifier
clf = joblib.load('../classifiers/laplacian_ensemble_classifier.joblib') 

In [21]:
def classify_image(filename):
# """
# Load an image from file and predict the four digits in the image.
# The result should be an array containing the 4 digits (as string)
# """
    result = list()
    
    # Load the image from file
    image = skimage.io.imread(filename)
    # For each digit, collect features, preprocess and predict
    image_binary = image_to_binary(image)
    img_array = split_digits_into_image_array(image_binary)
    for img in img_array:
        pyramid = laplacian_pyramid_from_image(img)
        laplacian_vector_row = transform_pyramid_into_1d_array(prepare_laplacian_pyramid(pyramid),1)
        result.append(int(clf.predict(np.asarray(laplacian_vector_row).reshape(1,-1))[0]))
    
    return result

# silent deprecation warning

In [22]:
import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)


# Score your model (DON'T CHANGE THIS CODE!!!)

In [23]:
DATASET_FOLDER = '../dataset-images/'

# Outcomes
correct_classified_digits = 0
incorrect_classified_digits = 0
correct_classified_zipcodes = 0
incorrect_classified_zipcodes = 0

# Score the classifier
files = glob.glob(os.path.join(DATASET_FOLDER, '*.png'))
for f in files:
    # Get the correct label from the filename
    correct_label = f[-8:-4]
    # Predict using the classifier
    predicted_label = classify_image(f)
    
    # Score digits
    zipcode_correct = True
    for i in range(len(correct_label)):
        if str(correct_label[i]) == str(predicted_label[i]):
            correct_classified_digits += 1
        else:
            incorrect_classified_digits += 1
            zipcode_correct = False
    
    # Score correct zipcodes
    if zipcode_correct:
        correct_classified_zipcodes += 1
    else:
        incorrect_classified_zipcodes += 1

print("Digit accuracy: ", (correct_classified_digits / (correct_classified_digits + incorrect_classified_digits)), "(", correct_classified_digits, "/", incorrect_classified_digits, ")")
print("Zipcode accuracy: ", (correct_classified_zipcodes / (correct_classified_zipcodes + incorrect_classified_zipcodes)), "(", correct_classified_zipcodes, "/", incorrect_classified_zipcodes, ")")        

Digit accuracy:  0.9609375 ( 1845 / 75 )
Zipcode accuracy:  0.8541666666666666 ( 410 / 70 )
