# Standalone Demo of an optimized Random Forest Fork Classifier
Machine Learning For Robotics (RO47002) - Final Assignment 

Tom Kerssemakers - 4345487, Oyono Ramos Lourenço de Armada - 4459059


#### Scroll to the last cell for the demo

In [1]:
# Imports
import sklearn as sk
import scipy
import skimage
import skimage.transform
import skimage.util
import numpy as np
import os, glob
import ipywidgets
import pickle
import tensorflow as tf
import random
import math

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from matplotlib.patches import Circle
from IPython.display import display
from collections import OrderedDict
from scipy import ndimage as ndi
from tensorflow import keras
from tensorflow.python.keras.layers import Dense
from scipy.spatial import distance
from skimage import feature, data, io, filters
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras.callbacks import LearningRateScheduler
from tensorflow.python.keras.callbacks import History
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)




In [2]:
# Definitions

def eval_classifier(clf, X, y):
    y_pred=clf.predict(X)
    accuracy=accuracy_score(y,y_pred)
    confmat=confusion_matrix(y,y_pred)
    return y_pred, accuracy, confmat

def report_eval(name, accuracy, confmat):
    print(f'*** {name} ***')
    print(f' confusion matrix:')
    print(confmat)
    print(f' accuracy: {accuracy:.3f}')

def load_and_resize_image(filename, overwrite=False):
    MAX_SIZE = 512
    I = plt.imread(filename)
    
    # is the width or height too large?
    oversize_ratio = max(I.shape[0] / MAX_SIZE, I.shape[1] / MAX_SIZE)
    if oversize_ratio > 1.0:
        print('resizing image ...')
        I = skimage.transform.rescale(I, (1./oversize_ratio, 1/oversize_ratio, 1), anti_aliasing=True)
        
        # overwrite the old image file after resizing
        if overwrite:
            print(f'overwriting "{filename}" ...')
            plt.imsave(filename, I)
            
    return I


def BW_filter(filenames):
    Ibw = [skimage.io.imread(filename, as_gray=True) for filename in filenames]
    Ibwf = np.empty([len(Ibw),512,512])
    for i in range(len(Ibw)):
        Ibwf[i,:,:] = filters.sobel(Ibw[i])
    print('...all %i images have been transformed to B&W and filtered...' %len(Ibwf))
    return Ibwf

# Get Image information
def get_image_width(I):
    height, width = I.shape
    return(width)

def get_image_height(I):
    height, width = I.shape
    return(height)

# Remove points that are too close to the border of the figure
def remove_points_near_border(I, points, HALF_WIN_SIZE):
    W = get_image_width(I)
    H = get_image_height(I)

    # discard points that are too close to border
    points = points[points[:,0] > HALF_WIN_SIZE[1],:]
    points = points[points[:,1] > HALF_WIN_SIZE[0],:]
    points = points[points[:,0] < W - HALF_WIN_SIZE[1],:]
    points = points[points[:,1] < H - HALF_WIN_SIZE[0],:]
    
    return points

def sample_points_grid(I, WIN_SIZE, HALF_WIN_SIZE):
    # window centers
    W = get_image_width(I)
    H = get_image_height(I)
    
    step_size = (WIN_SIZE[0]//3, WIN_SIZE[1]//3)
    
    center_ys = range(HALF_WIN_SIZE[0], H-HALF_WIN_SIZE[0]+1, step_size[0])
    center_xs = range(HALF_WIN_SIZE[1], W-HALF_WIN_SIZE[1]+1, step_size[1])
    centers = np.array(np.meshgrid(center_xs, center_ys))
    centers = centers.reshape(2,-1).T
    centers = centers.astype(float) 
    
    # add a bit of random offset
    np.random.seed(1)
    centers += np.random.rand(*centers.shape) * 10 
    
    # discard points close to border where we can't extract patches
    centers = remove_points_near_border(I, centers, HALF_WIN_SIZE)
    
    return centers

def sample_points_around_targets2(I, p1, p2, WIN_SIZE, HALF_WIN_SIZE):
    Nt = 200 # samples at target locations, i.e. near start, end, and middle of cutlery
    target_std_dev = np.array(HALF_WIN_SIZE[:2])/1
    # sample around target locations
    tpoints1 = np.random.randn(Nt,2)
    tpoints1 = tpoints1 * target_std_dev + p1
    tpoints2 = np.random.randn(Nt,2)
    tpoints2 = tpoints2 * target_std_dev + p2
    points = np.vstack((tpoints1, tpoints2))
    # discard points close to border where we cannot extract patches
    points = remove_points_near_border(I, points, HALF_WIN_SIZE)
    return points

# Collect patch at generated points
def get_patch_at_point(I, p, HALF_WIN_SIZE):
    p = p.astype(int)
    P = np.array(I[p[1]-HALF_WIN_SIZE[1]:p[1]+HALF_WIN_SIZE[1], p[0]-HALF_WIN_SIZE[1]:p[0]+HALF_WIN_SIZE[1]])  
    return P

# Turn patches into feature vectors

def patch_to_vec(P):
    FEAT_SIZE = (9,9,1)
    tensor = skimage.transform.resize(P, FEAT_SIZE)
    x = tensor.flatten()    
    return x

# Define label for all points
def make_labels_for_points(I, p1, p2, points, WIN_SIZE):
    """ Determine the class label (as an integer) on point distance to different parts of the pen """
    num_points = points.shape[0]
    
    # for all points ....
    # ... determine their distance to tip of the cutlery
    dist1 = points - p1
    dist1 = np.sqrt(np.sum(dist1 * dist1, axis=1))
    
    # ... determine their distance to end of the cutlery
    dist2 = points - p2
    dist2 = np.sqrt(np.sum(dist2 * dist2, axis=1))
    
     # ... determine distance to cutlery middle
    alpha = np.linspace(0.2, 0.8, 100)
    midpoints = p1[None,:] * alpha[:,None] + p2[None,:] * (1. - alpha[:,None]) 
    dist3 = scipy.spatial.distance_matrix(midpoints, points)
    dist3 = np.min(dist3, axis=0)
    
    # the class label of a point will be determined by which distance is smallest
    #    and if that distance is at least below `dist_thresh`, otherwise it is background
    dist_thresh = WIN_SIZE[0] * 2./3.

    # store distance to closest point in each class in columns
    class_dist = np.zeros((num_points, 4))
    class_dist[:,0] = dist1
    class_dist[:,1] = dist2
    class_dist[:,2] = dist3
    class_dist[:,3] = dist_thresh
    
    # the class label is now the column with the lowest number
    labels = np.argmin(class_dist, axis=1)
    
    return labels

def extract_patches(I, p1, p2, WIN_SIZE, HALF_WIN_SIZE):
        # sample points around target
        points = sample_points_around_targets2(I, p1, p2, WIN_SIZE, HALF_WIN_SIZE)
        
        # determine the labels of the points
        labels = make_labels_for_points(I, p1, p2, points, WIN_SIZE)
        xs = []
        for p in points:
            P = get_patch_at_point(I, p, HALF_WIN_SIZE)
            x = patch_to_vec(P)
            xs.append(x)
        X = np.array(xs)
        return X, labels, points
    
def extract_patches2(I, p1, p2, WIN_SIZE, HALF_WIN_SIZE, dist):
        # sample points around target
        points = sample_points_around_targets2(I, p1, p2, WIN_SIZE, HALF_WIN_SIZE)

        # determine the labels of the points
        labels = make_labels_for_points(I, p1, p2, points, WIN_SIZE)
        xs = []
        for p in points:
            P = get_patch_at_point(I, p, HALF_WIN_SIZE)
            x = patch_to_vec(P)
            xs.append(x)
        X = np.array(xs)
        return X, labels, points

def show_pickuppoint(Ibwf, Is, idx, pickuppoints_all, gridpoints_all, points_all, p1_all, p2_all,bestgridpoints1_all,bestgridpoints2_all, best_mean1_all, best_mean2_all, showgridpoints, showpoints, showfilter):
    # load image and values
    if showfilter == True:
        I = Ibwf[idx]
    else: 
        I = Is[idx]
    # choose index of current image 
    pickup = pickuppoints_all[idx]
    gridpoints = gridpoints_all[idx]
    points = points_all[idx]
    p1 = p1_all[idx]
    p2 = p2_all[idx]
    bestgridpoints1=bestgridpoints1_all[idx]
    bestgridpoints2=bestgridpoints2_all[idx]
    best_mean1 = best_mean1_all[idx]
    best_mean2 = best_mean2_all[idx]
    
    # Plot figure
    plt.figure()
    plt.imshow(I, cmap=plt.cm.gray)
    
    if showpoints == True:
        plt.plot(points[:,0], points[:,1], '.y')
        plt.plot(best_mean1[0], best_mean1[1], '*c')
        plt.plot(best_mean2[0], best_mean2[1], '*c')
    
    if showgridpoints == True:
        plt.plot(gridpoints[:,0], gridpoints[:,1], '.g')
        plt.plot(p1[0], p1[1], 'bx')
        plt.plot(p2[0], p2[1], 'bx')
        plt.plot(bestgridpoints1[:,0],bestgridpoints1[:,1], '<m')
        plt.plot(bestgridpoints2[:,0],bestgridpoints2[:,1], '>m')
        
        
    plt.plot(pickup[0],pickup[1],'rx')
    plt.show()



In [3]:


#########################################################################################################


# IMAGE PREDICTOR
# Create validation set of full pictures
def image_predictior(image_dir, clf, show):

    #Import and resize data if needed
    filenames = glob.glob(os.path.join(image_dir, '*.jpg')) #     filenames.append(glob.glob(os.path.join(image_dir, '*.jpeg')))

    filenames = sorted(filenames)
    N = len(filenames)
    print(f'found {N} images in target directory')
    

    overwrite = True
    Is = [load_and_resize_image(filename, overwrite) for filename in filenames]
    print('loaded %d images' % len(Is))
    
    # PreProcessing of Images
    Ibwf = BW_filter(filenames)
    
    
    # Initialize storage
    WIN_SIZE = (50, 50, 1)
    HALF_WIN_SIZE = (WIN_SIZE[0] // 2, WIN_SIZE[1] // 2, WIN_SIZE[2])
    y_pred_all = []
    points_all = []
    imgids_all = []
    labels_all = []
    pickuppoint_all = []
    gridpoints_all = []
    p1_all = []
    p2_all = []
    bestgridpoints1_all = []
    bestgridpoints2_all = []
    best_mean1_all = []
    best_mean2_all = []

    
    # For every image:
    for idx in range(len(Ibwf)):
        I = Ibwf[idx]
        # Get uniform search point grid
        gridpoints = sample_points_grid(I, WIN_SIZE, HALF_WIN_SIZE)
        gridpoints = remove_points_near_border(I, gridpoints, HALF_WIN_SIZE)
        
        # Get patches from points and store in one big feature vector
        xs=[]
        for p in gridpoints:
            patch = get_patch_at_point(I, p, HALF_WIN_SIZE)
            x = patch_to_vec(patch)
            xs.append(x)
        xs=np.vstack(xs)
          
        # Predict probability of the grid patches to find tip and end of the cutlery
        clas = clf.predict(xs)
        prob = clf.predict_proba(xs)
        # Use points with high probability of a class
        best_idx0s = np.where(prob[:,0]>=0.90*max(prob[:,0]))
        best_idx1s = np.where(prob[:,1]>=0.90*max(prob[:,1]))
        bestgridpoints1 = gridpoints[best_idx0s]
        bestgridpoints2 = gridpoints[best_idx1s]
        p1 = np.mean(bestgridpoints1,axis=0,dtype=np.int)
        p2 = np.mean(bestgridpoints2,axis=0,dtype=np.int)
        dist = round(distance.euclidean(p1,p2))
        
        # Create new points around these points
        Xs, labels, points = extract_patches2(I, p1, p2, WIN_SIZE, HALF_WIN_SIZE, dist)
             
        # Predict classes and probabilities
        y_pred = clf.predict(Xs)
        y_pred_prob = clf.predict_proba(Xs)

        best_range1 = np.where(y_pred_prob[:,0]>=0.9*max(y_pred_prob[:,0]))
        best_range2 = np.where(y_pred_prob[:,1]>=0.9*max(y_pred_prob[:,1]))
        bestpoints1=np.array(points[best_range1])
        bestpoints2=points[best_range2]
        best_mean1=np.median(bestpoints1,axis=0)
        best_mean2=np.median(bestpoints2,axis=0)
        pickpoint=list([(best_mean1[0]+best_mean2[0])/2, (best_mean1[1]+best_mean2[1])/2])
        
        # Save values in order to plot
        gridpoints_all.append(gridpoints)
        p1_all.append(p1)
        p2_all.append(p2)
        points_all.append(points)
        y_pred_all.append(y_pred)
        labels_all.append(labels)
        imgids_all.append(np.ones(len(labels),dtype=int)*idx)
        pickuppoint_all.append(pickpoint)
        bestgridpoints1_all.append(bestgridpoints1)
        bestgridpoints2_all.append(bestgridpoints2)
        best_mean1_all.append(best_mean1)
        best_mean2_all.append(best_mean2)
        
    pickuppoints_all = np.vstack(pickuppoint_all)
    
    # Plot results
    if show == True:
        
        
        def plot_nth_pickuppoint(n, showgridpoints, showpoints, showfilter):
            show_pickuppoint(Ibwf,Is, n, pickuppoints_all, gridpoints_all, points_all, p1_all, p2_all, bestgridpoints1_all, bestgridpoints2_all, best_mean1_all, best_mean2_all, showgridpoints, showpoints, showfilter)
            
        ipywidgets.interact(plot_nth_pickuppoint,n=(0,len(Ibwf)-1), showgridpoints=(0,1), showpoints=(0,1), showfilter=(0,1))
    
    return pickuppoint_all, Ibwf  

# Demo
In this demo the resulting classifier, created in the Final Assignment is presented. First the results on the testdata is shown and in the end a ipywidget is introduced in order to do some validation on un-annoted data. 

### Results on Testdata


In [4]:
# Load model
savemodel = 'finalized_model.sav'
rf = pickle.load(open(savemodel, 'rb'))

# Load testdata
savetestdata = 'testdata.sav'
testdata = pickle.load(open(savetestdata, 'rb'))
X_test = testdata[0]
y_test = testdata[1]

# Evaluate the new classifier on test data
ypred_test_rf, rf_test_accuracy, rf_test_confmat = eval_classifier(rf, X_test, y_test)
report_eval('Random Forest New - TEST', rf_test_accuracy, rf_test_confmat)



*** Random Forest New - TEST ***
 confusion matrix:
[[2165    0   49    0]
 [   1 1836  247    6]
 [   2   24 1619   11]
 [  13    6   14  863]]
 accuracy: 0.946


### IPY Widget
#### How it works:
- This file opens .jpg files from the directory "demo_images". The images are loaded and reshaped if needed. It is possible to put some images in yourself in order to see how the classifier performs. The classifier works with square images, so make sure to make square images.
- Once the images are loaded, the classifier predicts the output and the results are printed in a ipywidget. In the widget there are some sliders which can be manipulated:
 - n - which is the n'th image in the directory
 - showgridpoints - show/hide the grid sampe points that are used to find the tip and the end of the fork, the purple left triangles are the points that have a probability higher that 90% of being the class "tip", the purple right triangles are the points that have a probability higher that 90% of being the class "end". The blue crosses are the mean value of all the left- and right triangle.
 - showpoint - show/hide the extra sample points which are casted around the previously found blue crosses. The cyan stars indicate the mean of the points that have a probability higher that 90% of being the class "tip" or "end".
 - showfilter - show/hide the filter that is used in preprocessing of the data.


In [6]:

y_pred_, Ibwf_ = image_predictior('images/demo_images',rf, show=True)

found 10 images in target directory
loaded 10 images
...all 10 images have been transformed to B&W and filtered...


interactive(children=(IntSlider(value=4, description='n', max=9), IntSlider(value=0, description='showgridpoin…