In [53]:
## Predict Script 

# INPUT CONVENTION
# filenames: a list of strings containing filenames of images

# OUTPUT CONVENTION
# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings.
# Make sure that the length of the array and the list is the same as the number of filenames that
# were given. The evaluation code may give unexpected results if this convention is not followed.

## Read a single image from train dir
import numpy as np
import os
import cv2
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from skimage.color import rgb2hsv
from sklearn.svm import SVC
%matplotlib inline
from sklearn.externals import joblib

## Identify Background pixels
def find_mode_color(colors):
    flag=0
    for i in range(len(colors)):
        for j in range(i+1, len(colors)):
            if np.all(colors[i]==colors[j]) : return colors[i]
    return [255, 255, 255]

# Find the background color by taking average of 4 corners of the image
def mode_back_color(image):
    m=image.shape[0]
    n=image.shape[1]
    # Find corner colors
    colors = np.array([image[0, 0, :], image[m-1, 0, :], image[0, n-1, :], image[m-1, n-1, :]])
    return find_mode_color(colors)

# Remove the background color from the image
def remove_back_color(image, back_color):
    fore_image = np.copy(image)
    # Replace back color with black
    fore_image[(fore_image == back_color).all(axis = -1)] = [255, 255, 255]
    return fore_image

## Erode the obfuscating lines
def remove_back_lines(fore_image):
    dilate_image = np.copy(fore_image)
    # define the kernel with which you want to convolve for dilation
    kernel = np.ones((4, 4))
    dilate_image = cv2.dilate(fore_image, kernel, iterations=4)
    return dilate_image

## Convert an image to grayscale
def convert_to_gray(image):
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    grayscale_image = cv2.bitwise_not(grayscale_image)
    return grayscale_image

## bfs visit at non black pixels
def bfs_visit(x, y, m, n, k, visit, image, threshold=1):
    visit[x][y]=k
    queue=[[x, y]]
    while(len(queue)>0):
        [i, j] = queue[0]
        queue.pop(0)
        # up
        if i>1 and image[i-1][j]>threshold and visit[i-1][j]==0 :
            queue.append([i-1, j])
            visit[i-1][j]=k
        # left
        if j>1 and image[i][j-1]>threshold and visit[i][j-1]==0 :
            queue.append([i, j-1])
            visit[i][j-1]=k
        # down
        if i<m-1 and image[i+1][j]>threshold and visit[i+1][j]==0 :
            queue.append([i+1, j])
            visit[i+1][j]=k
        # right
        if j<n-1 and image[i][j+1]>threshold and visit[i][j+1]==0 :
            queue.append([i, j+1])
            visit[i][j+1]=k
    return visit

## find non_black and unvisited pixels and bfs_visit
def bfs(m, n, image, threshold):
    k=0
    visit = [[0 for j in range(n)] for i in range(m)]
    for x in range(m):
        for y in range(n):
            if image[x][y]<=threshold or visit[x][y]!=0 : continue
            k+=1
            visit=bfs_visit(x, y, m, n, k, visit, image, threshold)
    return k, visit

## Remove boxes where the max diff b/w xmin, xmax and ymin ymax is less than threshold
def valid_boxes(boxes, threshold):
    boxes_after_removal = []
    k=0
    for box in boxes:
        min_diff = min(box[2]-box[0], box[3]-box[1])
        if(min_diff<=threshold): continue
        k+=1
        boxes_after_removal.append(box)
    return k, np.array(boxes_after_removal)

## Find the bounding boxes of the connected regions in the image
def find_boxes(image, image_threshold=1, box_threshold=10):
    m=image.shape[0]
    n=image.shape[1]
    K, visit = bfs(m, n, image, image_threshold)
    limits = [[n, m, -n, -m] for k in range(K)]
    for i in range(m):
        for j in range(n):
            if image[i][j]<=image_threshold : continue
            k=visit[i][j]-1
            # left
            limits[k][0]=min(j, limits[k][0])
            # up
            limits[k][1]=min(i, limits[k][1])
            # right
            limits[k][2]=max(j, limits[k][2])
            # down
            limits[k][3]=max(i, limits[k][3])
    limits.sort()
    return valid_boxes(limits, box_threshold)

## Given a test image, predict the charcacters it contains
def predict_on_test_image(test_image, classifier, image_threshold=1, box_threshold=10,
                    x_dimension=110, y_dimension=110):
    string=""
    # find background color
    back_color = mode_back_color(test_image)
    # remove background color
    fore_image = remove_back_color(test_image, back_color)
    # remove obfuscating lines
    dilate_image = remove_back_lines(fore_image)
    # fade to black
    gray_image = convert_to_gray(dilate_image)
    # Find connected componenets and boxes
    letters, boxes = find_boxes(gray_image, image_threshold, box_threshold)
    for j in range(min(len(boxes), 4)):
        # crop the box out of the gray image, pad it with zeros and flatten
        crop_image = gray_image[boxes[j][1]:boxes[j][3], boxes[j][0]:boxes[j][2]]
        pad_image = np.zeros((x_dimension, y_dimension))
        pad_image[:crop_image.shape[0], :crop_image.shape[1]]=crop_image
        flatten_image = pad_image.flatten('F')
        # reshape flatten image for sending it into classifer
        flatten_image = np.reshape(flatten_image, [1]+list(flatten_image.shape))
        # predict the corresponding label
        integer_label = classifier.predict(flatten_image)
        char_label = chr(ord('A')+integer_label)
        string+=char_label
    return string

## Given a list of filepaths, return the length of strings and the list of strings
def decaptcha(test_paths):
    image_threshold=1
    box_threshold=10
    x_dimension=110
    y_dimension=110
    strings = []
    length_of_strings=[]
    # Load the svm classifier
    classifier = joblib.load('./svm_classifier.pkl')
    for test_path in test_paths:
        test_image = cv2.imread(test_path)
        string = predict_on_test_image(test_image, classifier, image_threshold, box_threshold,
                                 x_dimension, y_dimension)
        strings.append(string)
        length_of_strings.append(len(string))
        print(string)
    return np.array(length_of_strings), strings

In [54]:
## Evaluation Script

import time as tm
import numpy as np

# Perform longest common subsequence search on the (truncated) code and gold code
def lcs( str1, str2, p, q ):
    if p == 0 or q == 0:
        return 0
    elif str1[p-1] == str2[q-1]:
        return 1 + lcs( str1, str2, p - 1, q - 1 )
    else:
        return max( lcs( str1, str2, p - 1, q ), lcs( str1, str2, p, q - 1 ) )

def getLCS( code, goldCode ):
    return lcs( code, goldCode, min( len( code ), 4 ), len( goldCode ) )

# If there are 100 test points and the prediction code returns 110 predictions
# then we only consider the first 100 and discard the last 10 predictions. On
# the other hand, if the code returns only 90 predictions, then we assume that
# these were predictions on the first 90 test points and evaluate accordingly

def getCodeLengthMatch( numChars, goldNumChars ):
    minLen = min( len( codes ), len( goldCodes ) )
    return np.count_nonzero( numChars[0:minLen] == goldNumChars[0:minLen] )

def getCodeMatchScore( codes, goldCodes ):
    totScore = 0
    for i in range( min( len( codes ), len( goldCodes ) ) ):
        totScore += getLCS( codes[i], goldCodes[i] ) / len( goldCodes[i] )
    return totScore

numTest = 2
filepaths = [ "./test/image%d.png" % i for i in range( numTest ) ]
file = open( "./test/codes.txt", "r" )
goldCodes = file.read().splitlines()
file.close()
goldNumChars = np.array( [ len( goldCodes[i] ) for i in range( len( goldCodes ) ) ] )

# Get recommendations from predict.py and time the thing
tic = tm.perf_counter()
print(filepaths)
(numChars, codes) = decaptcha( filepaths )
toc = tm.perf_counter()

print( "Total time taken is %.6f seconds " % (toc - tic) )
print( "Fraction of code lengths that match is %.6f" % (getCodeLengthMatch( numChars, goldNumChars ) / numTest)  )
print( "Code match score is %.6f" % (getCodeMatchScore( codes, goldCodes ) / numTest) )

['./test/image0.png', './test/image1.png']
BGZ
BHAE
Total time taken is 1.345123 seconds 
Fraction of code lengths that match is 1.000000
Code match score is 1.000000
