In [None]:
# load python packages
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib
import math
import pprint

# load dataset
import scipy.io          
dataset = scipy.io.loadmat('dataset.mat')

# get training and testing sets
x_train = dataset['train_image']
x_test = dataset['test_image']
y_train = dataset['train_label']
y_test = dataset['test_label']

nmb_samples, img_rows, img_cols = x_train.shape[0], x_train.shape[1], x_train.shape[2]
nmb_test_samples = x_test.shape[0]

# adjust training image format
import tensorflow as tf
from keras import backend as K

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

# type casting and dimensionality transformations
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
    
pp = pprint.PrettyPrinter(indent=4)

In [None]:
# HU MOMENTS. Unique value of the image
# close between similar images

def momgeom(I, X, Y, p, q):
    mpq = sum(sum(I*(X**p)*(Y**q)))
    return mpq
    
def hu_moments(img):
    N, M = img.shape
    X, Y = np.mgrid[0:M,0:N]
    m00 = momgeom(img, X, Y, 0, 0)
    m10 = momgeom(img, X, Y, 1, 0)
    m01 = momgeom(img, X, Y, 0, 1)
    m20 = momgeom(img, X, Y, 2, 0)
    m02 = momgeom(img, X, Y, 0, 2)
    m11 = momgeom(img, X, Y, 1, 1)
    
    div20 = ((m10**2) / m00)
    div02 = ((m01**2) / m00)
    mult11 =((m10*m01) / m00)
    div = m00**2
    
    n20 = (m20-div20)/div
    n02 = (m02-div02)/div
    n11 = (m11-mult11)/div
   
    mom1 = n20+n02
    mom2 = ((n20-n02)**2) + 4*n11**2
    
    hu = [mom1,mom2]
    return hu

def hu_points(train_sample):
    hupoints = []
    for i in range (200):
        first_image = train_sample[i,:,:,0]
        first_image = np.array(first_image, dtype='float')
        img = first_image.reshape((img_rows, img_cols))
        hupoints.append(hu_moments(img))
    hupoints = np.array(hupoints, dtype='float')
    hupoints = hupoints.reshape((200, 2))
    return hupoints

# POINTS with 2 hu moments that are going to be the coordinates for the kdtree

def coord_points(x_array, y_array):
    points = []
    for i in range(200):
        IP = x_array[i, 0]
        IL = y_array[i, 0]
        points.append([IP, IL])
    
    points = np.array(points, dtype='float')
    points = points.reshape((200, 2))
    return points

# KDTREE KNN

def EUdistance(points1, points2):
    x1, y1 = points1
    x2, y2 = points2    
    dx = x1 - x2
    dy = y1 - y2
    dist = math.sqrt((dx*dx)+(dy*dy))
    
    return dist

def buildkdtree(points, depth = 0):
    n = len(points)
    if n <= 0:
        return None
    axis = depth % 2
    sorted_points = sorted(points, key=lambda point: point[axis])
    return {
        'point' : sorted_points[n // 2],
        'left' : buildkdtree(sorted_points[:n // 2], depth+1),
        'right' : buildkdtree(sorted_points[n // 2+1:], depth+1)
    }

def kdtree_closestpoint(root, point, depth=0, best=None):
    if root is None:
        return best
    axis = depth % 2
    next_best = None
    next_branch = None
    if best is None or EUdistance(point, best) > EUdistance(point, root['point']):
        next_best = root['point']
    else:
        next_best = best
    if point[axis] < root['point'][axis]:
        next_branch = root['left']
    else:
        next_branch = root['right']
    return kdtree_closestpoint(next_branch, point, depth + 1, next_best)

def kNearestNeighbor(tree_root, test_array):
    prediction = []
    for i in range(200):
        best_point = kdtree_closestpoint(tree_root, test_array[i][:])
        prediction.append(best_point)
    return prediction

In [None]:
# KDTREE KNN MODEL

xtrain_husample = hu_points(x_train)
xtest_husample = hu_points(x_test)
train_points = coord_points (xtrain_husample, y_train)
test_points = coord_points (xtest_husample, y_test)
kdtree = buildkdtree(train_points)
pp.pprint(kdtree)

In [None]:
predict = kNearestNeighbor(kdtree, test_points)
predict = np.array(predict)
predict = np.sort(predict, axis=0)
predict

In [None]:
# KDTREE KNN MODEL

plt.plot(xtest_husample)
plt.ylabel('accuracy')
plt.xlabel('image')
plt.legend(['train', 'test'], )
plt.show()

In [None]:
# DECISION TREE

# POINTS with 2 hu moments that are going to be the splits conditions

def coord_points(x_array, y_array):
    points = []
    for i in range(200):
        IP1 = x_array[i, 0]
        IP2 = x_array[i, 1]
        IL = y_array[i, 0]
        points.append([IP1, IP2, IL])
    
    points = np.array(points, dtype='float')
    points = points.reshape((200, 3))
    return points

def split(node, max_depth, min_size, depth):
    left, right = node['classes']
    del(node['classes'])
    if not left or not right:
        node['left'] = node['right'] = terminal(left + right)
        return
    if depth >= max_depth:
        node['left'], node['right'] = terminal(left), terminal(right)
        return
    if len(left) <= min_size:
        node['left'] = terminal(left)
    else:
        node['left'] = get_split(left)
        split(node['left'], max_depth, min_size, depth+1)
    if len(right) <= min_size:
        node['right'] = terminal(right)
    else:
        node['right'] = get_split(right)
        split(node['right'], max_depth, min_size, depth+1)

def terminal(classes):
    outcomes = [point[-1] for point in classes]
    return max(set(outcomes), key=outcomes.count)

def test_split(index, value, x_array):
    left, right = list(), list()
    for point in x_array:
        if point[index] < value:
            left.append(point)
        else:
            right.append(point)
    return left, right

def get_split(x_array):
    class_values = list(set(point[-1] for point in x_array))
    b_index, b_value, b_classes = 999, 999, None
    for index in range(len(x_array[0])-1):
        for point in x_array:
            classes = test_split(index, point[index], x_array)
            b_index, b_value, b_classes = index, point[index], classes
    return {'index':b_index, 'value':b_value, 'classes':b_classes}

def build_tree(train, max_depth, min_size):
    root = get_split(train)
    split(root, max_depth, min_size, 1)
    return root

In [None]:
def predict(node, point):
    if point[node['index']] < node['value']:
        if isinstance(node['left'], dict):
            return predict(node['left'], point)
        else:
            return node['left']
    else:
        if isinstance(node['right'], dict):
            return predict(node['right'], point)
        else:
            return node['right']

def decisiontree(train, test, max_depth, min_size):
    tree = build_tree(train, max_depth, min_size)
    predictions = list()
    for point in test:
        prediction = predict(tree, point)
        predictions.append(prediction)
    return(predictions)

train_points = coord_points (xtrain_husample, y_train)
test_points = coord_points (xtest_husample, y_test)
sumhu1 = sum(xtrain_husample[:][0])
sumhu2 = sum(xtrain_husample[:][1])

tree = build_tree(train_points, 3, 0.00001)
pp.pprint(tree)

In [None]:
dstree = decisiontree(train_points, test_points, 3, 0.00001)
print(dstree)

In [None]:
# DECISION TREE MODEL PLOT

# summarize for accuracy
plt.plot(xtrain_husample)
plt.plot(xtest_husample)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('image')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# load python packages
import scipy as sp
import pandas as pd
import IPython
import sklearn
import keras

# import keras packages
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D 

batch_size = 32
num_classes = 2
epochs = 200

y_train = keras.utils.to_categorical(np.squeeze(y_train), num_classes)
y_test = keras.utils.to_categorical(np.squeeze(y_test), num_classes)


In [None]:
# CNN model (2 hidden layers with 512 nodes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu', 
                 input_shape=input_shape))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# compile model
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

# training
history = model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))

In [None]:
# print train and test losses and classification accuracies
score = model.evaluate(x_train, y_train, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()