In [1]:
from PIL import Image
import numpy as np
import pandas as pd
import math

In [2]:
def normalize_image(image):
    for i, pixel in enumerate(image):
        if pixel != 0:
            image[i] = 1
    return image

In [3]:
def load_image(path):
    image = np.array(Image.open(path).getdata())
    return normalize_image(image)

In [4]:
def __symmetrical_samples(img1, img2, y):
    samples = []
    X = img1 + img2
    X2 = img2 + img1
    sample = [X, y]
    sample2 = [X2, y]
    samples.append(sample)
    samples.append(sample2)
    
    return samples

In [5]:
def __column(matrix, i):
    return [row[i] for row in matrix]

In [6]:
def resize_2D_image(image, size):

    image_flattened = __image_2D_to_1D(image)
    image_zoomed = Image.new(mode="L", size=(int(len(image[0])), int(len(image))))
    image_zoomed.putdata(image_flattened)
    image_scaled = image_zoomed.resize((size, size))
    
    return __image_1D_to_2D(list(image_scaled.getdata()))

In [7]:
def __image_1D_to_2D(image):

    size = int(math.sqrt(len(image)))
    new_image = []
    r = []
    for i, pixel in enumerate(image):
        r.append(pixel)
        if (i + 1) % size == 0:
            new_image.append(r)
            r = []
    return new_image

In [8]:
def __image_2D_to_1D(image):

    new_image = []
    for r in image:
        for c in r:
            new_image.append(c)
    return new_image

In [9]:
def is_1D_image(image):

    try:
        if not isinstance(image, list):
            return False
        if isinstance(image[0], list):
            return False
        return True
    except:
        pass
    return False

In [10]:
def is_2D_image(image):

    try:
        if not isinstance(image, list):
            return False
        if isinstance(image[0], list):
            return True
    except:
        pass
    return False

In [11]:
def ensure_1D_image(image):

    if not is_1D_image(image):
        return __image_2D_to_1D(image)
    return image

In [12]:
def ensure_2D_image(image):

    if not is_2D_image(image):
        return __image_1D_to_2D(image)
    return image

In [13]:
def image_differences_2D(img1, img2):

    img1 = ensure_2D_image(img1)
    img2 = ensure_2D_image(img2)

    img_diff = []
    for i, r in enumerate(img1):
        new_row = []
        for j, c in enumerate(r):
            value = abs(img1[i][j] - img2[i][j])
            new_row.append(value)
        img_diff.append(new_row)
        
    return img_diff


In [14]:
def get_object_frame(image):

    x_min = len(image) + 1
    x_max = -1
    y_min = len(image) + 1
    y_max = -1

    for i, r in enumerate(image):
        for j, c in enumerate(r):
            if c == 0:
                if i < y_min: y_min = i
                if i > y_max: y_max = i
                if j < x_min: x_min = j
                if j > x_max: x_max = j

    return x_min, x_max, y_min, y_max

In [15]:
def crop_image(image, x_min, x_max, y_min, y_max):

    new_image = []
    for i, r in enumerate(image):
        new_row = []
        for j, c in enumerate(r):
            if i >= y_min and i <= y_max and j >= x_min and j <= x_max:
                new_row.append(c)
        if len(new_row) > 0:
            new_image.append(new_row)
    return new_image

In [16]:
def extract_visual_object_2D(image):

    x_min, x_max, y_min, y_max = get_object_frame(image)
    image_cropped = crop_image(image, x_min, x_max, y_min, y_max)
    return resize_2D_image(image_cropped, size=len(image))

In [17]:
def __object_cropp_scale(image1):       
    image1_2D_raw = ensure_2D_image(image1)
    image1_2D = extract_visual_object_2D(image1_2D_raw)
    
    return ensure_1D_image(image1_2D)

In [18]:
def __format_image(image):
    img_cropped = __object_cropp_scale(image)    
    
    return ensure_1D_image(img_cropped)

In [19]:
def format_fit_inputs(X, Y):

    new_samples = []
    for i, x in enumerate(X):
        img1 = __format_image(x[0])
        img2 = __format_image(x[1])
        samples = __symmetrical_samples(img1, img2, Y[i])
        for sample in samples:
            sample[0] += (ensure_1D_image(image_differences_2D(img1, img2)))
            new_samples.append(sample)
    X = __column(new_samples, 0)
    Y = __column(new_samples, 1)
    return X, Y

In [20]:
df = pd.read_csv("omniglot_dataset.csv")
X = []
y = []

In [21]:
def image_comparison():
    X_list = []
    y_list = []
    
    img1 = load_image(df.iloc[0]["path"])
    X_list.append([img1, load_image(df.iloc[1]["path"])])
    y_list.append(1)
    
    for i in range(0, 19):            
        img2 = np.random.choice(range(20, len(df)), replace=False)                

        X_list.append([img1, load_image(df.iloc[img2]["path"])])
        y_list.append(0)
    
    return X_list, y_list

In [22]:
X, y = image_comparison()

In [23]:
X, y = format_fit_inputs(X, y)

In [28]:
#len(y)

40

In [29]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(max_depth=5)
clf = clf.fit(np.array(X), np.array(y))