In [1]:
input_address = input('Input image address:') #input must be in "" ("test.jpg" if file is in project folder),
                                              #acceptable formats: .jpg, .jpeg, .png, .gif, .bmp

Input image address:"test.jpg"


In [2]:
#INTERFACE
#Modes
hash_mode   = 1 #compare hashes
ahash_mode  = 1 #compare ahashes
phash_mode  = 1 #compare phashes
dhash_mode  = 1 #compare dhashes
ncc_mode    = 1 #find normalized cross correlation

pix_mode    = 1 #compare per-pixel
                #Quite effectively to a binary image. 
                #To grayscale need to use something to check the value of pixels rather than quantity. 
                #Perhaps: Levenshtein distance, MSE, RMSE, etc.

#Options
black_mode  = 0     #compare only black pixels
bin_ratio   = 200   #binarization threshold (0-255*3)
resize_mode = 1     #none = 0, antialias = 1, bicubic = 2 (0 IS NOT RECOMMENDED)
basewidth   = 8     #image width after compression (px)
hashing_mode = 'md5'#md5, sha1, sha224, sha256, sha384, sha512

rotate_mode = 0     #comrape with rotated examples (NOT RECOMMENDED)
rotate_degr = 87    #the degree to which the image is rotated by one step, 
                    #it will be approximated to integer number of steps for the passage of a circle.
    
from PIL import Image, ImageDraw
import os, hashlib, functools, scipy.fftpack, numpy, glob, operator, ImageChops, math
#Creating folders
#The folder where the user places the samples to be compared to the incoming image.
#Acceptable formats: .jpg, .jpeg, .png, .gif, .bmp
if not os.path.exists(os.getcwd()+'/examples/'):
    os.mkdir('examples')
examples_path = os.getcwd()+'/examples/'
#The folder in which the binary images will be generated.
if not os.path.exists(os.getcwd()+'/binary/'):
    os.mkdir('binary')
binary_path = os.getcwd()+'/binary/'
#The folder in which the resized images will be generated.
if not os.path.exists(os.getcwd()+'/resized/'):
    os.mkdir('resized')
resized_path = os.getcwd()+'/resized/'

#Global variables.
names   = []
values  = []
results = []
ahashes = []
phashes = []
dhashes = []
distances = []
nccors =[]

RESULT = ''

In [3]:
def clean_folders():
    #Cleaning folders
    #The folder in which the binary images will be generated.
    if os.path.exists(os.getcwd()+'/binary/'):   
        filelist = glob.glob(os.getcwd()+'/binary/*.*')
        for f in filelist:
            os.remove(f)
    #The folder in which the resized images will be generated.
    if os.path.exists(os.getcwd()+'/resized/'):
        filelist = glob.glob(os.getcwd()+'/resized/*.*')
        for f in filelist:
            os.remove(f)

In [4]:
def address_helper(input_img_addr, mode):
    if mode == 'png': #after preprocessing the program only works with .png format in order to avoid loss of quality
        input_img_addr = input_img_addr.replace('.jpg','.png')
        input_img_addr = input_img_addr.replace('.jpeg','.png')
        input_img_addr = input_img_addr.replace('.gif','.png')
        input_img_addr = input_img_addr.replace('.bmp','.bmp')
    if mode == 'bin': #change the address of the folder to /binary
        input_img_addr = input_img_addr.replace('/examples/','/binary/')
        input_img_addr = input_img_addr.replace('/resized/','/binary/')
    if mode == 'res': #change the address of the folder to /resized
        input_img_addr = input_img_addr.replace('/examples/','/resized/')
        input_img_addr = input_img_addr.replace('/binary/','/resized/')
    if mode == 'cln': #leave only the name of file (letter)
        for i in range(1, abs(360 / rotate_degr)):
            input_img_addr = input_img_addr.replace(str(360 / abs(360 / rotate_degr)*(i))+'.','.')
        input_img_addr = input_img_addr.replace('.jpg','')
        input_img_addr = input_img_addr.replace('.jpeg','')
        input_img_addr = input_img_addr.replace('.gif','')
        input_img_addr = input_img_addr.replace('.png','')
        input_img_addr = input_img_addr.replace('.bmp','')
        input_img_addr = input_img_addr.replace(os.getcwd()+'/binary/','')
        input_img_addr = input_img_addr.replace(os.getcwd()+'/resized/','')
    
    return input_img_addr

In [5]:
def binarization(input_img_addr):
    input_image  = Image.open(input_img_addr).convert('RGB') #convert for .gif opening fix
    input_width  = input_image.size[0]
    input_height = input_image.size[1]
    draw = ImageDraw.Draw(input_image)
     
    for i in range(input_width):
        for j in range(input_height):
            r, g, b = input_image.getpixel((i,j))
            s = r + g + b
            if (s > bin_ratio): 
                r, g, b = 255, 255, 255 #draw pixel black
            else:
                r, g, b = 0, 0, 0       #draw pixel white
            draw.point((i, j), (r, g, b))
    del draw
    #save in /binary folder like name.png
    input_img_addr = address_helper(input_img_addr,'png')
    input_image.save(address_helper(input_img_addr,'bin'), "PNG")

In [6]:
def save_rotate(input_img_addr, rotate_degr):  
    input_img_addr = address_helper(input_img_addr,'png')
    #first step out of cycle, becouse of don't want disposable checking in cycle
    (Image.open(input_img_addr).rotate(rotate_degr) #rotate first step
     .save(input_img_addr.replace('.',str(rotate_degr)+'.'), "PNG"))  #save first step
    input_img_addr = input_img_addr.replace('.',str(rotate_degr)+'.') #first step, str(...*(i-1)) returns '0'
    
    for i in range(2, 360 / rotate_degr):
        input_image = Image.open(input_img_addr).rotate(rotate_degr) 
        input_img_addr = input_img_addr.replace(str((rotate_degr)*(i-1))+'.',
                                                str((rotate_degr)*i)+'.')
        input_image.save(input_img_addr, "PNG")

In [7]:
def resize(input_img_addr, basewidth):
    input_image = Image.open(input_img_addr)
    height = int((float(input_image.size[1]) * 
                 float(basewidth / float(input_image.size[0])))) #calculate the proportional height
    if resize_mode == 1:
        input_image = input_image.resize((basewidth,height), Image.ANTIALIAS)
    if resize_mode == 2:
        input_image = input_image.resize((basewidth,height), Image.BICUBIC)
    else:
        input_image = input_image.resize((basewidth,height))
    input_image.save(address_helper(input_img_addr,'res'), "PNG")

In [8]:
def ham_dist(input_img_hash, example_hash):
    distance = 0
    #compare character by character
    for input_char, example_char in zip(input_img_hash, example_hash):
        if input_char != example_char:
            distance += 1
    return distance

In [9]:
def pix_compare(input_img_addr, example_addr):
    input_image = Image.open(input_img_addr)
    input_pixels = input_image.load()
    example = Image.open(example_addr)
    true = 0 #number of equal pixels
    
    for i in range(0, input_image.size[0]):
        for j in range(0, input_image.size[1]):
            input_pixel = input_image.getpixel((i, j))
            example_pixel = example.getpixel((i, j))
            if input_pixel == example_pixel:                        
                if not (black_mode == 1 and input_pixels[i, j][0] == 0):
                    true+=1  
    return true

In [10]:
def ncc(input_img_addr, example_addr):
    #based on syntacticbayleaves.com/2008/12/03/determining-image-similarity/  
    input_image = Image.open(input_img_addr)
    input_image = input_image.resize((input_image.size[0],input_image.size[0]), Image.ANTIALIAS)
    example = Image.open(example_addr).resize((input_image.size[0],input_image.size[0]), Image.ANTIALIAS)
    
    images = [input_image, example]
    vectors = []
    norms = []
    for image in images:
        vector = []
        for pixel in image.getdata():
            vector.append(numpy.mean(pixel))
        vectors.append(vector)
        norms.append(numpy.linalg.norm(vector, 2))
    a, b = vectors
    a_norm, b_norm = norms
    res = numpy.dot(a / a_norm, b / b_norm)
    return res

In [11]:
def get_hash(input_img_addr): #WEAK RESULTS
    input_image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth)) #convert 'tuple' to 'float', image is already binarized
    avg = functools.reduce(lambda x, y: x + y, input_image.getdata())/ 64.
    return functools.reduce(lambda x, yz: x | (yz[1] << yz[0]), 
                            enumerate(map(lambda i: 0 if i < avg else 1, input_image.getdata())),0)

In [12]:
def ahash(input_img_addr):
    #from ImageHash lib
    image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth))
    pixels = numpy.array(image.getdata()).reshape((basewidth, basewidth))
    avg = pixels.mean()
    diff = ''
    tmp = pixels > avg
    for i in range(0, basewidth-1):
        for j in range(0, basewidth-1):
            if tmp[i,j] == True:
                diff+='1'
            else:
                diff+='0'    
    return diff

In [13]:
def phash(input_img_addr):
    #from ImageHash lib
    image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth))
    pixels = numpy.array(image.getdata(), dtype = numpy.float).reshape((basewidth, basewidth))
    dct = scipy.fftpack.dct(scipy.fftpack.dct(pixels, axis=0), axis=1)
    dct_low_freq = dct[:basewidth, :basewidth]
    med = numpy.median(dct_low_freq)
    diff = ''
    tmp = dct_low_freq > med
    for i in range(0, basewidth-1):
        for j in range(0, basewidth-1):
            if tmp[i,j] == True:
                diff+='1'
            else:
                diff+='0'    
    return diff

In [14]:
def dhash(input_img_addr):
    #from ImageHash lib
    # resize(w, h), but numpy.array((h, w))
    image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth))
    pixels = numpy.array(image.getdata(), dtype=numpy.float).reshape((basewidth, basewidth))
    # compute differences between rows
    diff = ''
    tmp = pixels[:, 1:] > pixels[:, :-1]
    for i in range(0, basewidth-1):
        for j in range(0, basewidth-1):
            if tmp[i,j] == True:
                diff+='1'
            else:
                diff+='0'    
    return diff

In [15]:
def compare_1to1(input_img_addr, example_addr):
    example_addr = address_helper(example_addr,'png')
    input_img_addr = os.getcwd()+'/binary/'+address_helper(input_img_addr,'png')
    #compare per-pixel
    if pix_mode == 1:      
        values.append(pix_compare(input_img_addr, example_addr))
        
    resize(input_img_addr, basewidth)
    resize(example_addr, basewidth)
    example_addr = address_helper(example_addr,'res')
    input_img_addr = address_helper(input_img_addr,'res')
    
    #WARNING:now pictures are resized
    #compare ahashes
    if ahash_mode == 1:
        ahashes.append(ham_dist(ahash(input_img_addr), ahash(example_addr)))
    #compare phashes
    if phash_mode == 1:
        phashes.append(ham_dist(phash(input_img_addr), phash(example_addr)))
    #compare dhashes
    if dhash_mode == 1:
        dhashes.append(ham_dist(dhash(input_img_addr), dhash(example_addr)))
    #normalized cross correlation
    if ncc_mode == 1:
        nccors.append(ncc(input_img_addr, example_addr))
    
    names.append(address_helper(example_addr, 'cln'))   

In [16]:
def compare_1toN(input_img_addr):
    #delete previous values
    global names, values, distances, ahashes, phashes, dhashes, nccors
    names, values, distances, ahashes, phashes, dhashes, nccors = [],[],[],[],[],[],[]
        
    examples = os.listdir(examples_path) #get list of all files in /examples
    for i in range(len(examples)):
        try: #check if file is image
            binarization(examples_path+examples[i])
            if rotate_mode == 1:
                save_rotate(binary_path+examples[i],360 / abs(360 / rotate_degr)) #rotate to the approximate degree
        except IOError:
            pass
    examples = os.listdir(binary_path) #get list of all files in /binary + rotated images
    for i in range(len(examples)):
        try: #check if file is image
            compare_1to1(input_img_addr,binary_path+examples[i])
        except IOError:
            pass

In [17]:
def result_1toN():
    global results #local results
    global RESULT  #global result
    results = [''] #clean prev result
    
    #collecting all the local results
    if pix_mode == 1 and values != []:
        results.append(names[values.index(max(values))])
    if hash_mode == 1 and distances != []:
        results.append(names[distances.index(min(distances))])
    if ahash_mode == 1 and ahashes != []:
        results.append(names[ahashes.index(min(ahashes))])
    if phash_mode == 1 and phashes != []:
        results.append(names[phashes.index(min(phashes))])
    if dhash_mode == 1 and dhashes != []:
        results.append(names[dhashes.index(min(dhashes))])
    if ncc_mode == 1 and nccors != []:
        results.append(names[nccors.index(min(nccors))])
        
    RESULT += max(set(results), key=results.count) #add the most likely local result to the global

In [18]:
def print_1toN(): #developer function, NEWER EVER USE TO THE FULL DATASET
    if pix_mode == 1:
        print('Pixel mode:')
        for i in range(len(names)):
            print(names[i],values[i])
    if ahash_mode == 1:
        print('AHash mode:')
        for i in range(len(names)):
            print(names[i],ahashes[i])
    if phash_mode == 1:
        print('PHash mode:')
        for i in range(len(names)):
            print(names[i],phashes[i])
    if dhash_mode == 1:
        print('DHash mode:')
        for i in range(len(names)):
            print(names[i],dhashes[i])
    if dhash_mode == 1:
        print('NCC mode:')
        for i in range(len(names)):
            print(names[i],nccors[i])
    print('Results:')
    if pix_mode == 1 and values != []:
        print('Pixel mode:',names[values.index(max(values))])
    if ahash_mode == 1 and ahashes != []:
        print('AHash mode:',names[ahashes.index(min(ahashes))])
    if phash_mode == 1 and phashes != []:
        print('PHash mode:',names[phashes.index(min(phashes))])
    if dhash_mode == 1 and dhashes != []:
        print('DHash mode:',names[dhashes.index(min(dhashes))])
    if ncc_mode == 1 and nccors != []:
        print('DHash mode:',names[nccors.index(max(nccors))])
    print('Result:',RESULT)

In [19]:
binarization(input_address)
compare_1toN(input_address)
result_1toN()
print_1toN()

Pixel mode:
('compare', 44884)
('testgif', 38302)
('a', 47718)
('b', 47416)
('ctest', 38706)
('test', 47718)
AHash mode:
('compare', 8)
('testgif', 18)
('a', 0)
('b', 6)
('ctest', 17)
('test', 0)
PHash mode:
('compare', 9)
('testgif', 24)
('a', 0)
('b', 5)
('ctest', 20)
('test', 0)
DHash mode:
('compare', 4)
('testgif', 29)
('a', 0)
('b', 2)
('ctest', 16)
('test', 0)
NCC mode:
('compare', 0.99817553951743232)
('testgif', 0.97801097652380631)
('a', 0.99999999999999989)
('b', 0.99879890502289581)
('ctest', 0.94327227469333508)
('test', 0.99999999999999989)
Results:
('Pixel mode:', 'a')
('AHash mode:', 'a')
('PHash mode:', 'a')
('DHash mode:', 'a')
('DHash mode:', 'a')
('Result:', 'a')


In [20]:
clean_folders()