## OpenCV Project


In [None]:
import cv2
import pytesseract
import country_list
import pycountry
import re

haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [None]:
#############################################
### READING AND MODIFIING ID:################
#############################################

def get_image(filepath):
    return cv2.imread(filepath)

def get_gray_image(filepath):
    return cv2.cvtColor(get_image(filepath), cv2.COLOR_BGR2GRAY)

def get_threshold(filepath):
    return cv2.threshold(get_gray_image(filepath), 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)

def get_rect_kernel():
    return cv2.getStructuringElement(cv2.MORPH_RECT, (10, 15))

def get_dilation(filepath):
    ret, thresh = get_threshold(filepath)
    rect_kernel = get_rect_kernel()
    return cv2.dilate(thresh, rect_kernel, iterations = 1)

def get_contours(filepath):
    return cv2.findContours(get_dilation(filepath), cv2.RETR_EXTERNAL, 
                                                 cv2.CHAIN_APPROX_NONE)

In [None]:
#############################################
### ID TEXT RECOGNITION :####################
############################################# 

def get_image_data(contour, image):
    x, y, w, h = cv2.boundingRect(contour)
    rect = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    cropped = image[y:y + h, x:x + w]
    text = pytesseract.image_to_string(image)
    return text


def get_text_recognition(filepath):
    contours, hierarchy = get_contours(filepath)
    gray_image = get_gray_image(filepath)
    identified_text = []
    for contour in contours:
        identified_text.append(get_image_data(contour, gray_image))
    return identified_text

In [None]:
#############################################
### CHECKING COUNTRY OF ORIGIN:##############
############################################# 
def try_extract_country(func):
    def wrapper(*args, **kwargs):
        try: 
            return func(*args, **kwargs)
        except:
            return False
 
    return wrapper    


def get_splited_list(filepath):
    identified_text = get_text_recognition(filepath)
    splited_list = []
    for string in identified_text:
        # Splitting using new lines character, dots, comas and special characters
        splited_list += re.split(r'[\n /,.;!]+', string.lower())
    return splited_list


@try_extract_country
def country_name_in_list(country_name, splited_list):
    return country_name in splited_list


@try_extract_country
def country_code_in_list(country_code, splited_list):
    return country_code in splited_list


@try_extract_country
def country_off_name_in_text(country_code, text):
    official_name = pycountry.countries.get(alpha_3=country_code.upper()).official_name.lower()
    return official_name in ' '.join(text).lower()


@try_extract_country
def country_code_in_serial_number(country_code, text):
    serial_number = 'P<'.lower() + country_code.lower()
    return serial_number in ' '.join(text).lower()


def get_list_of_countries():
    return [(country.alpha_3.lower(), country.name.lower()) 
                         for country in pycountry.countries]


def get_country_of_issue(filepath):
    text = get_text_recognition(filepath)
    splited_list = get_splited_list(filepath)
    list_of_countries = get_list_of_countries()
    for country_code, country_name in list_of_countries:
        if country_name_in_list(country_name, splited_list) or country_off_name_in_text(country_code, text) \
           or (country_code_in_list(country_code, splited_list) and country_code_in_serial_number(country_code, text)):
            return country_name.capitalize()
        else:
            continue
    return None

In [None]:
#############################################
### ID VERIFICATION PRINTOUT:################
############################################# 

def get_user_declaration(overwrite=None):
    if overwrite is None:
        return input('ID Country of Issue ->')
    else:
        return overwrite

def country_of_issue_check(filepath, overwrite=None):
    country_extracted = get_country_of_issue(filepath)
    country_declared = get_user_declaration(overwrite).capitalize()
    if country_extracted == country_declared:
        return 'ID OK'
    else:
        return 'WRONG ID'

In [None]:
#############################################
### CHECKING MODEL ACCURACY:#################
############################################# 

In [None]:
train_set = ['Belgium', 'Cyprus', 'Czechia', 'Hungary', 'Poland']
test_set = ['Netherlands', 'Lithuania', 'Ireland', 'Luxembourg', 'Spain',
 'Italy','Croatia','France','Switzerland','Norway','Slovenia','Malta','Austria',
 'Finland','Romania', 'Portugal','Denmark','Iceland']

In [None]:
print(r'The percentage of the population used as training data in this project is '
      f'{len(train_set)/(len(train_set) + len(test_set)):.0%}')

In [None]:
def get_accuracy(list_of_countries, train_data=True):
    dataset = 'train' if train_data else 'test' 
    population = len(list_of_countries)
    true_responces = 0
    for country in list_of_countries:
        filename = f'Downloads/id_project/{country}.jpeg'
        if country_of_issue_check(filename, country) == 'ID OK':
            true_responces += 1
        
    return f'Model accuracy on {dataset} dataset is {true_responces/population:.0%}'

In [None]:
train_accuracy = get_accuracy(train_set)

In [None]:
test_accuracy = get_accuracy(test_set, train_data=False)

In [None]:
train_accuracy

In [None]:
test_accuracy