In [61]:
from PIL import Image, ImageDraw 
from IPython.display import Image as DImage
import sys, os, hashlib, functools, scipy.fftpack, numpy

In [62]:
#INTERFACE
#Modes
pix_mode    = 1 #compare per-pixel
hash_mode   = 1 #compare hashes
string_mode = 1 #compare hashes line by line
ahash_mode  = 1 #compare ahashes
phash_mode  = 1 #compare phashes
dhash_mode  = 1 #compare dhashes

#Options
black_mode  = 0 #compare only black pixels
bin_ratio   = 200 #binarization threshold (0-255*3)
resize_mode = 1 #none = 0, antialias = 1, bicubic = 2
rotate_mode = 1 #comrape rotated pictures
rotate_numb = 4 #number of rotations, degrees = 360 / rotate_numb
basewidth   = 8 #image width after compression (px)
hashing_mode = 'md5'#md5 sha1 sha224 sha256 sha384 sha512

#Creating folders
#The folder where the user places the samples to be compared to the incoming image.
#Acceptable formats: .jpg, .jpeg, .png, .gif, .bmp
if not os.path.exists(os.getcwd()+'/examples/'):
    os.mkdir('examples')
examples_path = os.getcwd()+'/examples/'
#The folder in which the binary images will be generated.
if not os.path.exists(os.getcwd()+'/binary/'):
    os.mkdir('binary')
binary_path = os.getcwd()+'/binary/'
#The folder in which the resized images will be generated.
if not os.path.exists(os.getcwd()+'/resized/'):
    os.mkdir('resized')
resized_path = os.getcwd()+'/resized/'

#Global variables.
names   = []
values  = []
strings = []
results = []
ahashes = []
phashes = []
dhashes = []
distances = []

RESULT = ''

In [24]:
input_address = input('Input image address:') #input must be in "" ("test.jpg" if file is in project folder),
                                              #acceptable formats: .jpg, .jpeg, .png, .gif, .bmp

Input image address:"testgif.gif"


In [63]:
def address_helper(input_img_addr, mode):
    if mode == 'png': #after preprocessing the program only works with .png format in order to avoid loss of quality
        input_img_addr = input_img_addr.replace('.jpg','.png')
        input_img_addr = input_img_addr.replace('.jpeg','.png')
        input_img_addr = input_img_addr.replace('.gif','.png')
        input_img_addr = input_img_addr.replace('.bmp','.bmp')
    if mode == 'bin': #change the address of the folder to /binary
        input_img_addr = input_img_addr.replace('/examples/','/binary/')
        input_img_addr = input_img_addr.replace('/resized/','/binary/')
    if mode == 'res': #change the address of the folder to /resized
        input_img_addr = input_img_addr.replace('/examples/','/resized/')
        input_img_addr = input_img_addr.replace('/binary/','/resized/')
    if mode == 'cln': #leave only the name of file (letter)
        input_img_addr = input_img_addr.replace('.jpg','')
        input_img_addr = input_img_addr.replace('.jpeg','')
        input_img_addr = input_img_addr.replace('.gif','')
        input_img_addr = input_img_addr.replace('.png','')
        input_img_addr = input_img_addr.replace('.bmp','')
        input_img_addr = input_img_addr.replace(os.getcwd()+'/binary/','')
        input_img_addr = input_img_addr.replace(os.getcwd()+'/resized/','')
    return input_img_addr

In [64]:
def binarization(input_img_addr):
    input_image  = Image.open(input_img_addr).convert('RGB') #convert for .gif opening fix
    input_width  = input_image.size[0]
    input_height = input_image.size[1]
    draw = ImageDraw.Draw(input_image)
     
    for i in range(input_width):
        for j in range(input_height):
            r, g, b = input_image.getpixel((i,j))
            s = r + g + b
            if (s > bin_ratio): 
                r, g, b = 255, 255, 255 #draw pixel black
            else:
                r, g, b = 0, 0, 0       #draw pixel white
            draw.point((i, j), (r, g, b))
    del draw
    #save in /binary folder like name.png
    input_img_addr = address_helper(input_img_addr,'png')
    input_image.save(address_helper(input_img_addr,'bin'), "PNG")

In [65]:
def saverotate(input_img_addr):
    input_img_addr = address_helper(input_img_addr,'png')
    
    input_image = Image.open(input_img_addr).rotate(90)
    input_img_addr = input_img_addr.replace('.','90.')
    input_image.save(input_img_addr, "PNG")
   
    input_image = Image.open(input_img_addr).rotate(90)
    input_img_addr = input_img_addr.replace('90.','180.')
    input_image.save(input_img_addr, "PNG")
   
    input_image = Image.open(input_img_addr).rotate(90)
    input_img_addr = input_img_addr.replace('180.','270.')
    input_image.save(input_img_addr, "PNG")

In [66]:
def gethash(input_img_addr):
    input_image = Image.open(input_img_addr).convert('L') #convert 'tuple' to 'float', image is already binarized
    avg = functools.reduce(lambda x, y: x + y, input_image.getdata())/ 64.
    return functools.reduce(lambda x, yz: x | (yz[1] << yz[0]), 
                            enumerate(map(lambda i: 0 if i < avg else 1, input_image.getdata())),0)

In [67]:
def hamdist(input_img_hash, example_hash):
    distance, i = 0, input_img_hash ^ example_hash
    while i:
        distance += 1
        i &= i - 1
    return distance

In [68]:
def shamdist(str1, str2, prevMin=None):
    diffs = 0
    if len(str1) != len(str2):
        return max(len(str1),len(str2))
    for ch1, ch2 in zip(str1, str2):
        if ch1 != ch2:
            diffs += 1
            if prevMin is not None and diffs > prevMin:
                return None
    return diffs

In [69]:
def resize(input_img_addr, basewidth):
    input_image = Image.open(input_img_addr)
    wpercent = (basewidth / float(input_image.size[0]))
    hsize = int((float(input_image.size[1]) * float(wpercent)))
    if resize_mode == 1:
        input_image = input_image.resize((basewidth,hsize), Image.ANTIALIAS)
    if resize_mode == 2:
        input_image = input_image.resize((basewidth,hsize), Image.BICUBIC)
    else:
        input_image = input_image.resize((basewidth,hsize))
    input_image.save(address_helper(input_img_addr,'res'), "PNG")

In [70]:
def pix_compare(input_img_addr, example_addr):
    input_image = Image.open(input_img_addr)
    input_pixels = input_image.load()
    example = Image.open(example_addr)
    true = 0 
    
    for i in range(0, input_image.size[0]):
        for j in range(0, input_image.size[1]):
            input_pixel = input_image.getpixel((i, j))
            example_pixel = example.getpixel((i, j))
            if input_pixel == example_pixel:                        
                if not (black_mode == 1 and input_pixels[i, j][0] == 0):
                    true+=1  
    return true

In [71]:
def string_compare(input_img_addr, example_addr):
    input_image = Image.open(input_img_addr)
    example = Image.open(example_addr)
    true = 0
    input_pix = ''
    example_pix = ''
    
    for i in range(0, input_image.size[0]):
        for j in range(0, input_image.size[1]):
            input_pix = input_pix + str(input_image.getpixel((i, j)))
            example_pix = example_pix + str(example.getpixel((i, j)))
        if hashing_mode == 'md5':
            true+=shamdist(hashlib.md5(input_pix).hexdigest(),hashlib.md5(example_pix).hexdigest())
        if hashing_mode == 'sha1':
            true+=shamdist(hashlib.sha1(input_pix).hexdigest(),hashlib.sha1(example_pix).hexdigest())
        if hashing_mode == 'sha224':
            true+=shamdist(hashlib.sha224(input_pix).hexdigest(),hashlib.sha224(example_pix).hexdigest())
        if hashing_mode == 'sha256':
            true+=shamdist(hashlib.sha256(input_pix).hexdigest(),hashlib.sha256(example_pix).hexdigest())
        if hashing_mode == 'sha384':
            true+=shamdist(hashlib.sha384(input_pix).hexdigest(),hashlib.sha384(example_pix).hexdigest())
        if hashing_mode == 'sha512':
            true+=shamdist(hashlib.sha512(input_pix).hexdigest(),hashlib.sha512(example_pix).hexdigest())
        input_pix = ''
        example_pix = ''
        
    for j in range(0, input_image.size[1]):
        for i in range(0, input_image.size[0]):
            input_pix = input_pix + str(input_image.getpixel((i, j)))
            example_pix = example_pix + str(example.getpixel((i, j)))          
        true+=shamdist(hashlib.md5(input_pix).hexdigest(),hashlib.md5(example_pix).hexdigest())
        input_pix = ''
        example_pix = ''
        
    return true

In [72]:
def ahash(input_img_addr):
    image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth))
    pixels = numpy.array(image.getdata()).reshape((basewidth, basewidth))
    avg = pixels.mean()
    diff = ''
    tmp = pixels > avg
    for i in range(0, basewidth-1):
        for j in range(0, basewidth-1):
            if tmp[i,j] == True:
                diff+='1'
            else:
                diff+='0'    
    return diff

In [73]:
def phash(input_img_addr):
    image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth))
    pixels = numpy.array(image.getdata(), dtype = numpy.float).reshape((basewidth, basewidth))
    dct = scipy.fftpack.dct(scipy.fftpack.dct(pixels, axis=0), axis=1)
    dct_low_freq = dct[:basewidth, :basewidth]
    med = numpy.median(dct_low_freq)
    diff = ''
    tmp = dctlowfreq > med
    for i in range(0, basewidth-1):
        for j in range(0, basewidth-1):
            if tmp[i,j] == True:
                diff+='1'
            else:
                diff+='0'    
    return diff

In [74]:
def dhash(input_img_addr):
    # resize(w, h), but numpy.array((h, w))
    image = Image.open(input_img_addr).convert('L').resize((basewidth, basewidth))
    pixels = numpy.array(image.getdata(), dtype=numpy.float).reshape((basewidth, basewidth))
    # compute differences between rows
    diff = ''
    tmp = pixels[:, 1:] > pixels[:, :-1]
    for i in range(0, basewidth-1):
        for j in range(0, basewidth-1):
            if tmp[i,j] == True:
                diff+='1'
            else:
                diff+='0'    
    return diff

In [75]:
def compare1to1(input_img_addr, example_addr):
    example_addr = address_helper(example_addr,'png')
    input_img_addr = os.getcwd()+'/binary/'+address_helper(input_img_addr,'png')
    
    if pix_mode == 1:      
        values.append(pix_compare(input_img_addr, example_addr))
        
    if string_mode == 1:
        strings.append(string_compare(input_img_addr, example_addr))
    
    resize(input_img_addr, basewidth)
    resize(example_addr, basewidth)
    example_addr = address_helper(example_addr,'res')
    input_img_addr = address_helper(input_img_addr,'res')
        
    if ahash_mode == 1:
        ahashes.append(shamdist(ahash(input_img_addr), ahash(example_addr)))
        
    if phash_mode == 1:
        phashes.append(shamdist(phash(input_img_addr), phash(example_addr)))
        
    if dhash_mode == 1:
        dhashes.append(shamdist(dhash(input_img_addr), dhash(example_addr)))
        
    if hash_mode == 1:
        distances.append(hamdist(gethash(input_img_addr), gethash(example_addr)))
            
    example_addr = address_helper(example_addr, 'cln')
    names.append(example_addr)   

In [81]:
def compare1toN(input_img_addr):
    #delete previous values
    global names
    global values
    global distances
    global strings
    global ahashes
    global phashes
    global dhashes
    names = []
    values = []
    distances = []
    strings = []
    ahashes = []
    phashes = []
    dhashes = []
    
    examples = os.listdir(examples_path)
    for i in range(len(examples)):
        if (".gif" in examples[i] 
            or ".jpg" in examples[i] 
            or ".png" in examples[i] 
            or ".bmp" in examples[i] 
            or ".jpeg" in examples[i]):
            binarization(examples_path+examples[i])
            if rotate_mode == 1:
                saverotate(binary_path+examples[i])
            
    examples = os.listdir(binary_path)
    for i in range(len(examples)):
        if (".gif" in examples[i] 
            or ".jpg" in examples[i] 
            or ".png" in examples[i] 
            or ".bmp" in examples[i] 
            or ".jpeg" in examples[i]):
            compare1to1(input_img_addr,binary_path+examples[i])

In [77]:
def print1toN():
    if pix_mode == 1:
        print('Pixel mode:')
        for i in range(len(names)):
            print(names[i],values[i])
    if hash_mode == 1:
        print('Hash mode:')
        for i in range(len(names)):
            print(names[i],distances[i])
    if string_mode == 1:
        print('String mode:')
        for i in range(len(names)):
            print(names[i],strings[i])
    if ahash_mode == 1:
        print('AHash mode:')
        for i in range(len(names)):
            print(names[i],ahashes[i])
    if phash_mode == 1:
        print('PHash mode:')
        for i in range(len(names)):
            print(names[i],phashes[i])
    if dhash_mode == 1:
        print('DHash mode:')
        for i in range(len(names)):
            print(names[i],dhashes[i])
    

In [78]:
def result1toN():
    print('Results:')
    global results
    global RESULT
    results = ['']
    if pix_mode == 1 and values != []:
        results.append(names[values.index(max(values))])
        print('Pixel mode:',names[values.index(max(values))])
    if hash_mode == 1 and distances != []:
        results.append(names[distances.index(min(distances))])
        print('Hash mode:',names[distances.index(min(distances))])
    if string_mode == 1 and strings != []:
        results.append(names[strings.index(min(strings))])
        print('String mode:',names[strings.index(min(strings))])
    if ahash_mode == 1 and ahashes != []:
        results.append(names[ahashes.index(min(ahashes))])
        print('AHash mode:',names[ahashes.index(min(ahashes))])
    if phash_mode == 1 and phashes != []:
        results.append(names[phashes.index(min(phashes))])
        print('PHash mode:',names[phashes.index(min(phashes))])
    if dhash_mode == 1 and dhashes != []:
        results.append(names[dhashes.index(min(dhashes))])
        print('DHash mode:',names[dhashes.index(min(dhashes))])
    RESULT += max(set(results), key=results.count)
    print('Result:',RESULT)

In [79]:
binarization(input_address)
compare1toN(input_address)
print1toN()
result1toN()

Hash mode:
('testgif270', 18)
('testgif90', 18)
('testgif', 0)
('testgif180', 18)
String mode:
('testgif270', 27084)
('testgif90', 27044)
('testgif', 0)
('testgif180', 27011)
AHash mode:
('testgif270', 15)
('testgif90', 12)
('testgif', 0)
('testgif180', 15)
PHash mode:
('testgif270', 25)
('testgif90', 27)
('testgif', 0)
('testgif180', 24)
DHash mode:
('testgif270', 26)
('testgif90', 31)
('testgif', 0)
('testgif180', 21)
Results:
('Hash mode:', 'testgif')
('String mode:', 'testgif')
('AHash mode:', 'testgif')
('PHash mode:', 'testgif')
('DHash mode:', 'testgif')
('Result:', 'testgif')
