# Project Demo - Support Vector Machines on PYNQ
**Ryan Greer, 
19/03/2020**

## Dataset
- 3 hyperspectral images of Streptomyces strains for training and testing
- Training data (**KNOWN CLASSES**) and testing data (**UNKNOWN CLASSES**)

<img src="DATA.png">

## Load data and plot in 3D space
- The data originally had 256 variables associated with each pixel
- Using principle components analysis (PCA), data has been compressed to 3 dimensions
- First 3 dimensions of PCA data used as SVM argument

In [None]:
def get_training_matrix():
    f = open("training_matrix.dat","r")

    contents = f.read()
    training_mat_data = contents.split()
    #x = np.array(training_mat_data)
    #training_out = np.asfarray(x,np.float32)

    f.close()
    
    return training_mat_data
    
def get_testing_matrix():
    f = open("test_matrix.dat","r")

    contents = f.read()
    testing_mat_data = contents.split()
    #x = np.array(testing_mat_data)
    #self.training_mat_data_fi_uint16 = np.asarray(x,np.uint16)
    
    return testing_mat_data
    
    f.close()

In [None]:
import numpy as np

training_mat_data = get_training_matrix()
training_plot_data = np.transpose(np.reshape(training_mat_data,(150,3)))
training_plot_data_new = training_plot_data.tolist()

#training_plot_data_x[0:50]

In [None]:
#%matplotlib inline
%matplotlib notebook

from mpl_toolkits import mplot3d

import math

import matplotlib.pyplot as plt

fig = plt.figure()
#ax = plt.axes(projection='3d')
#ax = fig.add_subplot(111, projection='3d')
ax = fig.gca(projection='3d')

training_plot_data_x_1 = [float(i) for i in training_plot_data_new[0][0:50]]
training_plot_data_x_2 = [float(i) for i in training_plot_data_new[0][50:100]]
training_plot_data_x_3 = [float(i) for i in training_plot_data_new[0][100:150]]

training_plot_data_y_1 = [float(i) for i in training_plot_data_new[1][0:50]]
training_plot_data_y_2 = [float(i) for i in training_plot_data_new[1][50:100]]
training_plot_data_y_3 = [float(i) for i in training_plot_data_new[1][100:150]]

training_plot_data_z_1 = [float(i) for i in training_plot_data_new[2][0:50]]
training_plot_data_z_2 = [float(i) for i in training_plot_data_new[2][50:100]]
training_plot_data_z_3 = [float(i) for i in training_plot_data_new[2][100:150]]

ax.scatter(training_plot_data_x_1, training_plot_data_y_1, training_plot_data_z_1, c='red', s=1, alpha=1)
ax.scatter(training_plot_data_x_2, training_plot_data_y_2, training_plot_data_z_2, c='green', s=1, alpha=1)
ax.scatter(training_plot_data_x_3, training_plot_data_y_3, training_plot_data_z_3, c='blue', s=1, alpha=1)


ax.set_xlabel('1st principle component')
ax.set_ylabel('2nd principle component')
ax.set_zlabel('3rd principle component')

plt.show()

## Load the training bitstream

In [None]:
### SETUP load the overlay
from pynq import Overlay

overlay = Overlay("/home/xilinx/jupyter_notebooks/PROJECT_FULL_DEMO/SMO_FULL_PYNQ_Z2.bit")

## Setup the training drivers
- two drivers:
- 1) parse files containing dataset
- 2) format data to correct format and stream through PL/obtain results

In [None]:
from pynq import DefaultIP
import numpy as np

class parse_files():
    def __init__(self):
        #super().__init__()
        # dot product matrix will be computed from two training matrix streams 
        self.training_labels_data_fi_uint8 = None
        self.training_mat_data_fi_uint16 = None
        self.input_details_data_fi_uint32 = None
        
        # miscellaneous variables
        self.no_training_vectors_fi_uint32 = None
        self.no_training_vectors_int = None
        self.no_variables_fi_uint32 = None
        self.no_variables_int = None
        self.C_fi_uint32 = None
        self.tolerance_fi_uint32 = None
        # number of classifiers:
        self.no_classes = None
        
    def get_training_labels(self):
        # for self checking Python tests
        f = open("training_labels.dat","r")

        contents = f.read()
        training_labels_data = contents.split()
        x = np.array(training_labels_data)
        self.training_labels_data_fi_uint8 = np.asarray(x,np.uint8)

        f.close()    
        
    def get_training_matrix(self):
        f = open("training_matrix_fi.dat","r")

        contents = f.read()
        training_mat_data = contents.split()
        x = np.array(training_mat_data)
        self.training_mat_data_fi_uint16 = np.asarray(x,np.uint16)

        f.close()
        
    def get_input_details(self):
        f = open("training_details.dat","r")
        
        contents = f.read()
        input_details_data = contents.split()
        x = np.array(input_details_data)
        self.input_details_data_float = np.asfarray(x,np.float32)
        
        self.no_training_vectors_float = self.input_details_data_float[0]
        self.no_training_vectors_int = int(self.no_training_vectors_float)
        self.no_variables_float = self.input_details_data_float[1]
        self.no_variables_int = int(self.no_variables_float)
        self.C = self.input_details_data_float[2]
        self.tolerance = self.input_details_data_float[3]
        
        f.close()
        
        f = open("training_details_fi.dat","r")
        
        contents = f.read()
        input_details_data = contents.split()
        x = np.array(input_details_data)
        self.input_details_data_fi_uint32 = np.asarray(x,np.uint32)
        
        f.close()
        
    def get_no_classes(self):
        f = open("no_classes.dat")
        
        contents = f.read()
        self.no_classes = contents.split()
        self.no_classes = int(self.no_classes[0])
        
        f.close()

import pynq.lib.dma
import struct

from pynq import allocate

class SMO_driver(parse_files):
    def __init__(self):
        #super().__init__()
        
        self.get_no_classes()
        self.no_classifiers = int(0.5 * self.no_classes * (self.no_classes - 1))
        
        # DECLARE MEMORY FOR STACK
        self.training_labels_buffer = None
        self.training_matrix_buffer = None
        self.input_details_buffer = None
        self.alpha_out_buffer = None
        self.output_details_buffer = None

        # DMA library instantiate
        #self.dma_dp_o = overlay.SMO_1.dma_dp_o
        #self.dma_dp_i = overlay.SMO_1.dma_dp_i
        #self.dma_tl = overlay.SMO_1.dma_tl
        #self.dma_tm_o = overlay.SMO_1.dma_tm_o
        #self.dma_tm_i = overlay.SMO_1.dma_tm_i
        #self.dma_id = overlay.SMO_1.dma_id

        # these lists contain the DMA instances for the different cores used in the overlay
        self.dma_dp_o_1 = []
        self.dma_dp_i_1 = []
        self.dma_dp_o_2 = []
        self.dma_dp_i_2 = []
        self.dma_dp_id_o = []
        self.dma_dp_id_i = []
        self.dma_tl = []
        self.dma_tm_o = []
        self.dma_tm_i = []
        self.dma_id = []
        
        #self.dma_ao = overlay.SMO_1.dma_ao
        #self.dma_kkt = overlay.SMO_1.dma_kkt
        #self.dma_od = overlay.SMO_1.dma_od

        self.dma_ao = []
        self.dma_kkt = []
        self.dma_od = []
        
        self.no_cores = 2
        self.classifier_indices = []
        self.no_training_vectors_all = np.zeros(shape=(self.no_cores), dtype=np.uint32)
        
        # create a dispatcher to allow streamlined access to the different cores in the design
        SMO_1 = overlay.SMO_1
        SMO_2 = overlay.SMO_2
        
        self.SMO_dispatcher = {
            1: SMO_1,
            2: SMO_2,
        }
        
        # store training models
        self.sv_coeffs = []
        self.sv_indices = []
        self.no_svs = []
        self.offsets = []
        self.no_itrs = []    
        
    def fixed_point_to_float(self, input_, word_length, integer_length):
        # returns floating point representation of fixed point SIGNED integer input
        # specify the word length and integer length
        
        fractional_length = word_length - integer_length
        output = 0

        input_bin_string = "{0:b}".format(input_)

        for n1 in range(word_length - len(input_bin_string)):
            input_bin_string = '0' + input_bin_string
    
        no_positive = 1

        # number is negative
        if(input_bin_string[0] == '1'):
            no_positive = 0
            input_bin_tc = input_ - (1 << word_length)
            # input is now negative
            input_ = -input_bin_tc
    
        input_bin_string = "{0:b}".format(input_)

        for n1 in range(word_length - len(input_bin_string)):
            input_bin_string = '0' + input_bin_string
    
        for i, c in enumerate(input_bin_string):
            if(c == '1'):
                output = output + 2 ** (integer_length - 1 - i)

        if(no_positive == 1):
            return output
        else:
            return -output

        # https://stackoverflow.com/questions/699866/python-int-to-binary-string
        # https://stackoverflow.com/questions/538346/iterating-each-character-in-a-string-using-python
        # https://stackoverflow.com/questions/1604464/twos-complement-in-python
        
    def int_bits_IEEE754_to_float(self, to_convert):
        # credit - https://stackoverflow.com/questions/30124608/convert-unsigned-integer-to-float-in-python
        # convert integer bits (unsigned long 'L') (IEEE754 single-precision) to float 'f'
        s = struct.pack('>L', to_convert)
        return struct.unpack('>f', s)[0]
        
    def write_training_model_to_files(self, current_classifier):  
        # SV COEFFS TO FILE #
        # create new numpy array to copy to file - copt pynq buffer into
        coeffs_write = np.zeros(shape=(len(self.sv_coeffs[current_classifier-1]),1), dtype=np.uint32)
        np.copyto(coeffs_write, self.sv_coeffs[current_classifier-1])
        np.savetxt("coeffs_fi_"+str(current_classifier)+".dat",coeffs_write,'%d')
        
        # SUPPORT VECTORS TO FILE #
        #svs_write = self.training_mat_data_fi_uint16
        #svs_write = np.reshape(svs_write,(len(self.training_mat_data_fi_uint16/self.no_variables_int),self.no_variables_int))
        svs_write = np.reshape(self.training_mat_data_fi_uint16,((int(len(self.training_mat_data_fi_uint16)/self.no_variables_int),self.no_variables_int)))
        svs_write = svs_write[self.sv_indices[current_classifier-1]]
        np.savetxt("svs_fi_"+str(current_classifier)+".dat",svs_write,'%d')
        
        # OFFSET TO FILE #
        np.savetxt("offset_fi_"+str(current_classifier)+".dat",self.offsets[current_classifier-1],'%d')
        
    def write_n_svs_to_file(self):
        # populate number of support vectors to array and write to file
        no_classifiers = int(0.5 * self.no_classes * (self.no_classes - 1))
        n_svs = np.zeros(shape=(no_classifiers), dtype=np.uint32)
        
        for n1 in range(no_classifiers):
            n_svs[n1] = len(self.sv_coeffs[n1])
            
        np.savetxt("n_svs.dat",n_svs,'%d')
            
    def pynq_buffer_init(self):
        # DECLARE MEMORY FOR HEAP - need to use lists as there are multiple buffers needing to be transferred simultaneously 
        # containing different training sets
        
        self.training_matrix_buffers = []
        self.alpha_out_buffers = []
        self.output_details_buffers = []
        self.kkt_violation_buffers = []
        
        self.input_details_dpm_buffers = []
        
        self.input_details_buffer = allocate(shape=(5,), dtype=np.int32)
        
        for n1 in range(self.no_cores):
            self.output_details_buffer = allocate(shape=(1,), dtype=np.uint32)
            # declare buffers to receive indication signals to send new copies of training and dot product matrices
            # kkt_violation_buffer checks if there is a kkt violation and we need to execute the p loop
            self.kkt_violation_buffer = allocate(shape=(1,), dtype=np.uint8)
            self.input_details_dpm_buffer = allocate(shape=(2,), dtype=np.uint16)
            
            self.output_details_buffers.append(self.output_details_buffer)
            self.kkt_violation_buffers.append(self.kkt_violation_buffer)
            self.input_details_dpm_buffers.append(self.input_details_dpm_buffer)
        
    def pynq_buffer_delete(self):
        # close buffers - clean up heap memory
        
        for n1 in range(self.no_cores):
            if(n1 == len(self.classifier_indices)):
                break
            self.training_matrix_buffers[n1].close()
            self.input_details_buffer.close()
            
            self.alpha_out_buffers[n1].close()
            self.output_details_buffers[n1].close()
        
    def SMO_parallel(self, training_matrices, training_labels, index_1, index_2, no_training_vectors, no_variables, C, tolerance, max_itr):
        # index_1 is the index of the first training vector with respoect to the entire training dataset
        # index_2 is same but for negative class
        # lists for the different parallel classifier executions
        
        # initialise buffers
        self.pynq_buffer_init()
        no_variables = int(no_variables)
        
        # store start index of negative class
        negative_class_index = np.zeros(shape=(self.no_cores), dtype=np.uint32)
        for n1 in range(self.no_cores):
            if(n1 == len(self.classifier_indices)):
                break
            negative_class_index[n1] = np.where(training_labels[n1] == -1)[0][0]
            
        # instantiate all DMAs
        for n1 in range(self.no_cores):
            # check if we are out of range of no_classifiers
            if(n1 == len(self.classifier_indices)):
                break
            self.dma_dp_o_1.append(self.SMO_dispatcher[n1+1].dma_dp_o_1)
            self.dma_dp_i_1.append(self.SMO_dispatcher[n1+1].dma_dp_i_1)
            self.dma_dp_o_2.append(self.SMO_dispatcher[n1+1].dma_dp_o_2)
            self.dma_dp_i_2.append(self.SMO_dispatcher[n1+1].dma_dp_i_2)
            self.dma_dp_id_o.append(self.SMO_dispatcher[n1+1].dma_dp_id_o)
            self.dma_dp_id_i.append(self.SMO_dispatcher[n1+1].dma_dp_id_i)
            self.dma_tl.append(self.SMO_dispatcher[n1+1].dma_tl)
            self.dma_tm_o.append(self.SMO_dispatcher[n1+1].dma_tm_o)
            self.dma_tm_i.append(self.SMO_dispatcher[n1+1].dma_tm_i)
            self.dma_id.append(self.SMO_dispatcher[n1+1].dma_id)
            self.dma_ao.append(self.SMO_dispatcher[n1+1].dma_ao)
            self.dma_kkt.append(self.SMO_dispatcher[n1+1].dma_kkt)
            self.dma_od.append(self.SMO_dispatcher[n1+1].dma_od)
            
        # allocate buffers for each classifier
        for n1 in range(self.no_cores):
            if(n1 == len(self.classifier_indices)):
                break
            no_training_vectors[n1] = int(no_training_vectors[n1])
            #self.get_input_details(self.classifier_indices[n1])
            self.training_matrix_buffer = allocate(shape=(no_training_vectors[n1]*no_variables,), dtype=np.uint16)
            self.alpha_out_buffer = allocate(shape=(no_training_vectors[n1],1), dtype=np.uint32)
            
            self.training_matrix_buffers.append(self.training_matrix_buffer)
            self.alpha_out_buffers.append(self.alpha_out_buffer)
                
        # loop over number of cores to send data which is constant for the current classifier
        for n1 in range(self.no_cores):
            if(n1 == len(self.classifier_indices)):
                break
            training_labels_buffer = allocate(shape=(no_training_vectors[n1],), dtype=np.int8)
            np.copyto(training_labels_buffer, training_labels[n1])
            
            # also want to obtain and store the training matrices and dot product mastrices for all classifiers in this parallel iteration
            np.copyto(self.training_matrix_buffers[n1], training_matrices[n1])

            self.no_training_vectors_all[n1] = no_training_vectors[n1]
            
            #### INPUT DETAILS
            # convert floats to IEEE754 bits format
            no_training_vectors_IEEE754 = np.asarray(no_training_vectors[n1], dtype=np.float32).view(np.int32).item()
            no_variables_IEEE754 = np.asarray(no_variables, dtype=np.float32).view(np.int32).item()
            max_itr_IEEE754 = np.asarray(max_itr, dtype=np.float32).view(np.int32).item()
            tol_IEEE754 = np.asarray(tolerance, dtype=np.float32).view(np.int32).item()
            C_IEEE754 = np.asarray(C, dtype=np.float32).view(np.int32).item()
                    
            # INPUT_DETAILS
            # no_training_vectors - from file
            # no_variables - from file
            # max_itr, tolerance, C - specified by user
            x = [no_training_vectors_IEEE754, no_variables_IEEE754, max_itr_IEEE754, tol_IEEE754, C_IEEE754]
            np.copyto(self.input_details_buffer, np.asarray(x, np.int32))
            
            # INPUT_DETAILS for matrix multiply cores - these are integer, not float
            x = [no_training_vectors[n1], no_variables]
            np.copyto(self.input_details_dpm_buffers[n1], np.asarray(x, np.uint16))
            
            # transfer input details and training labels to DMA
            # send channels:
            self.dma_id[n1].sendchannel.transfer(self.input_details_buffer)
            self.dma_tl[n1].sendchannel.transfer(training_labels_buffer)
            self.dma_id[n1].sendchannel.wait()
            self.dma_tl[n1].sendchannel.wait()
            
            # transfer nput details for matrix multiply core
            #self.dma_dp_id[n1].sendchannel.transfer(self.input_details_dpm_buffers[n1])
            #self.dma_dp_id[n1].sendchannel.wait()
            
            # receive channels
            self.dma_ao[n1].recvchannel.transfer(self.alpha_out_buffers[n1])
                        
        # if this is 0, the design has exited without changed_alphas = 0 meaning its iterations have saturated
        # if it is 1, then we requiured less iterations than specified
        # this parameter is used to determing the last element in the "output_details" stream - the last element should be the offset
        changed_alphas_exit = np.zeros(shape=(self.no_cores,), dtype=np.uint32)
                            
        # iterate over the maximum number of iterations
        for n0 in range(max_itr):
            # iterate over the cores
            for n0_1 in range(self.no_cores):
                if(n0_1 == len(self.classifier_indices)):
                    break
                if(changed_alphas_exit[n0_1] == 0):
                    self.dma_od[n0_1].recvchannel.transfer(self.output_details_buffers[n0_1])
                    ##print("test1 -> ", n0_1)

                    self.dma_dp_id_o[n0_1].sendchannel.transfer(self.input_details_dpm_buffers[n0_1])
                    self.dma_dp_id_o[n0_1].sendchannel.wait()
            
                    # TRANSFER OUTER DOT PRODUCT MATRIX (FIRST TRAINING MATRIX - NEEDED ONCE PER ITERATION):
                    self.dma_dp_o_1[n0_1].sendchannel.transfer(self.training_matrix_buffers[n0_1])
                    # send initial copy of outer training matrix and dot product matrix
                    self.dma_tm_o[n0_1].sendchannel.transfer(self.training_matrix_buffers[n0_1])
        
            # p loops:
            for n1 in range(max(self.no_training_vectors_all)):
                # iterate over the cores
                for n1_1 in range(self.no_cores):
                    if(n1_1 == len(self.classifier_indices)):
                        break
                    if(changed_alphas_exit[n1_1] == 0):
                        if(n1 < self.no_training_vectors_all[n1_1]):
                            
                            # loop until we see a kkt violation or can execute next iteration of SMO
                            self.dma_kkt[n1_1].recvchannel.transfer(self.kkt_violation_buffers[n1_1])
                            
                            # TRANSFER SECOND TRAINING MATRIX (TO COMPUTE OUTER DOT PRODUCT MATRIX) - ONCE PER P LOOP
                            self.dma_dp_o_2[n1_1].sendchannel.transfer(self.training_matrix_buffers[n1_1])
                            self.dma_dp_o_2[n1_1].sendchannel.wait()

                            #print("1_test - core: ", n1_1)

                            while(1):
                                s2mm_status_kkt = self.dma_kkt[n1_1].read(0x34)
                                ##print(s2mm_status_kkt)
                                if(s2mm_status_kkt == 4098):
                                    break
                    
                            ##print("KKT violation buffer value: ", self.kkt_violation_buffers[n1_1])
                            if(self.kkt_violation_buffers[n1_1] == 1):
                                #print("2_test")
                                
                                # transfer nput details for matrix multiply core
                                self.dma_dp_id_i[n1_1].sendchannel.transfer(self.input_details_dpm_buffers[n1_1])
                                self.dma_dp_id_i[n1_1].sendchannel.wait()
                                # kkt violation - transfer inner matrices
                                self.dma_tm_i[n1_1].sendchannel.transfer(self.training_matrix_buffers[n1_1])

                                # COMPUTE AND TRANSFER DOT PRODUCT MATRIX:
                                self.dma_dp_i_1[n1_1].sendchannel.transfer(self.training_matrix_buffers[n1_1])
                                for n1_2 in range(self.no_training_vectors_all[n1_1]):
                                    #print("3_test")
                                    self.dma_dp_i_2[n1_1].sendchannel.transfer(self.training_matrix_buffers[n1_1])
                                    self.dma_dp_i_2[n1_1].sendchannel.wait()
                                    
                                self.dma_tm_i[n1_1].sendchannel.wait()
                                self.dma_dp_i_1[n1_1].sendchannel.wait()
                                
                            ##else:
                                ##print("test_2")
                                # no kkt violation
                                
                        
                            ##print("test3")
            
            # check to see if we should go to next iteration or if alpha has been calculated - we can break
            # if we should go to next iteration
            # loop over no_cores:
            for n0_1 in range(self.no_cores):
                if(n0_1 == len(self.classifier_indices)):
                    break
                if(changed_alphas_exit[n0_1] == 0):
                    while(1):
                        s2mm_status_od = self.dma_od[n0_1].read(0x34)
                        if(s2mm_status_od == 4098):
                            test = int(self.output_details_buffers[n0_1])
                            ##print("iteration ", self.fixed_point_to_float(test,32,12), " -> ", n0_1)
                            break
                    
                    if(int(self.fixed_point_to_float(test,32,12)) == (n0 + 1)):
                        # exiting with "changed_alphas =/= 0" (on last iteration)
                        ##print("test6")
                        changed_alphas_exit[n0_1] = 0
                        #continue
                    else:
                        # exiting with "changed_alphas == 0"
                        ##print("test7")
                        changed_alphas_exit[n0_1] = 1
                        #break
            
            # check if all training models are completed - if any remain, continue
            # if all complete, break_all goes to 1 - can read results from all classifiers
            break_all = 0
            for n0_1 in range(self.no_cores):
                if(n0_1 == len(self.classifier_indices)):
                    break
                if(changed_alphas_exit[n0_1] == 0):
                    break
                if(n0_1 == (self.no_cores - 1)):
                    break_all = 1
            
            if(break_all == 1):
                ##print("TEST_8")
                break
            
        # loop over cores - get results
        for n0 in range(self.no_cores):
            if(n0 == len(self.classifier_indices)):
                break
            if(changed_alphas_exit[n0] == 0):
                self.dma_od[n0].recvchannel.transfer(self.output_details_buffers[n0])
                self.dma_od[n0].recvchannel.wait()
                ##print(self.output_details_buffers[n0])
            ##else:
                ##print(self.output_details_buffers[n0])
        
            # DMA wait
            self.dma_ao[n0].recvchannel.wait()
            
            # NEED TO OBTAIN COEFFICIENTS - ALPHAS OF NEGATIVE CLASS SHOULD BE MULTIPLIED BY -1
            coeffs_temp = self.alpha_out_buffers[n0]
            coeffs_temp[negative_class_index[n0]:no_training_vectors[n0],0] = coeffs_temp[negative_class_index[n0]:no_training_vectors[n0],0] * -1
            # set very small coefficients to zero
            coeffs_temp[np.where(np.absolute(coeffs_temp) < 0.00001)] = 0
            coeffs_temp_2 = np.zeros(shape=(len(np.where(coeffs_temp != 0)[0])), dtype=np.uint32)
            coeffs_temp_2 = coeffs_temp[np.where(coeffs_temp != 0)[0]]
            #print(coeffs_temp_2)
            self.sv_coeffs.append(coeffs_temp_2)
            self.offsets.append(self.output_details_buffers[n0])
            
            # GET INDICES OF SUPPORT VECTORS WITH RESPECT TO ENTIRE TRAINING SET
            sv_indices_old = np.where(coeffs_temp != 0)[0]
            
            length_class_1 = negative_class_index[n0]
            length_class_2 = no_training_vectors[n0] - length_class_1
            
            #print(sv_indices_old)
            first_classifier_indices = np.where(sv_indices_old < length_class_1)[0]
            second_classifier_indices = np.where(sv_indices_old >= length_class_1)[0]
            
            sv_indices_new = np.zeros(shape=(len(sv_indices_old)), dtype=np.uint8)
            
            # these lines get the actual indices corresponding to the support vectors identified from the binary training
            sv_indices_new[first_classifier_indices] = sv_indices_old[first_classifier_indices] + index_1[n0]
            sv_indices_new[second_classifier_indices] = sv_indices_old[second_classifier_indices] - length_class_1 + index_2[n0]
            
            self.sv_indices.append(sv_indices_new)
            #self.alphas.append(self.alpha_out_buffers[n0])
            #self.offsets.append(self.output_details_buffers[n0])
            
            #print(self.alpha_out_buffers[n0])
        
        self.pynq_buffer_delete()
        
    def SMO_driver_top(self, C, tolerance, max_itr):
        # this function calls the "SMO_parallel" driver function to execute (no_cores) runs of the SMO in parallel
        # it populates the "classifier_instances" variable with the relevant numbers - e.g. if we had 5 classifiers,
        # and 2 cores, "classifier_instances" would take the values [1,2] on the first iteration, [3,4] on the second
        # and [5] on the third
        
        # this function also gets the indices for each classifier
        # e.g. the labels need to be changed to +1 and -1 and the correct classes of the full training matrix need to be used
        
        # THIS LIST CONTAINS TRAINING MATRICES AND TRAINING LABELS
        training_matrices = []
        training_labels = []
        no_training_vectors_all = []
        training_data_1_ind_all = []
        training_data_2_ind_all = []
        
        # keep track of which classifiers we are working on
        self.classifier_indices = []
        
        self.get_training_matrix()
        self.get_training_labels()
        self.get_input_details()
        
        done = 0
        current_classifier = 0
        
        # this keeps track of what core we are currently generating data for - if all cores have been used or more
        # need to be used...
        core_count = 0
        
        no_classifiers = int(0.5 * self.no_classes * (self.no_classes - 1))
        print("no_classifiers -> ", no_classifiers)
        
        for n1 in range(self.no_classes):
            # loop from zero to (no_classes - 1)
            for n2 in range(n1 + 1, self.no_classes):
                # loop from (upper loop index + 1) to (no_classes - 1)
                                       
                # iterate over number of cores
                if(core_count < self.no_cores and current_classifier < no_classifiers):
                    training_data_1_indices = np.where(self.training_labels_data_fi_uint8 == (n1+1))[0]        # postive class
                    training_data_2_indices = np.where(self.training_labels_data_fi_uint8 == (n2+1))[0]        # negative class
                    training_data_1_ind_all.append(training_data_1_indices[0])
                    training_data_2_ind_all.append(training_data_2_indices[0])
                    
                    length_class_1 = len(training_data_1_indices)
                    length_class_2 = len(training_data_2_indices)
                    no_training_vectors = length_class_1 + length_class_2
                
                    # populate the new training matrix with the two classes in question
                    # as training matrix has 2 dimensions, find first and last elements of interest
                    first_index_1 = training_data_1_indices[0] * self.no_variables_int
                    last_index_1 = (training_data_1_indices[length_class_1 - 1] + 1) * self.no_variables_int
                    first_index_2 = training_data_2_indices[0] * self.no_variables_int
                    last_index_2 = (training_data_2_indices[length_class_2 - 1] + 1) * self.no_variables_int
                    
                    training_matrix_new = np.zeros(shape=(no_training_vectors*self.no_variables_int), dtype=np.uint16)
                    training_matrix_new[0:length_class_1*self.no_variables_int] = self.training_mat_data_fi_uint16[first_index_1:last_index_1]
                    training_matrix_new[length_class_1*self.no_variables_int:(length_class_1*self.no_variables_int+length_class_2*self.no_variables_int)] = self.training_mat_data_fi_uint16[first_index_2:last_index_2]
                
                    # populate training labels with 1s and -1s
                    training_labels_new = np.zeros(shape=(no_training_vectors), dtype=np.int8)
                    training_labels_new[0:length_class_1] = 1
                    training_labels_new[length_class_1:no_training_vectors] = -1
                
                    training_matrices.append(training_matrix_new)
                    training_labels.append(training_labels_new)
                    
                    no_training_vectors_all.append(no_training_vectors)
                
                    current_classifier = current_classifier + 1
                    core_count = core_count + 1
                    
                    self.classifier_indices.append(current_classifier)
                    
                print("current_classifier -> ", current_classifier)
                    
                if(core_count == self.no_cores or current_classifier == no_classifiers):
                    # temporary
                    #print(training_matrices)
                    #print(training_labels)
                    #print(self.classifier_indices)
                    
                    self.SMO_parallel(training_matrices, training_labels, training_data_1_ind_all, training_data_2_ind_all, no_training_vectors_all, self.no_variables_int, C, tolerance, max_itr)
                    core_count = 0
                    
                    # reset lists to empty for next parallel iteration
                    training_matrices = []
                    training_labels = []
                    no_training_vectors_all = []
                    training_data_1_ind_all = []
                    training_data_2_ind_all = []
                    
                    # reset this to empty
                    self.classifier_indices = []
        
print("DONE")

## Instantiate the training driver

In [None]:
SMO_driver_inst = SMO_driver()

## Call driver top-level function
All FPGA data processing (e.g. DMA, MMIO, parallel processing...) is abstracted into one function call - there is no need for end-user to understand what the driver does or how the FPGA design has been developed

In [None]:
C = 10
tolerance = 0.0001
max_itr = 10

SMO_driver_inst.SMO_driver_top(C, tolerance, max_itr)

In [None]:
classifier_select = 3

print("offset = ")
print(SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.offsets[classifier_select-1]),32,12))
print("\n")

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    print("coeff ", loop+1, " = ", SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.sv_coeffs[classifier_select-1][loop]),32,12))
print("\n")

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    print("indice ", loop+1, " = ", SMO_driver_inst.sv_indices[classifier_select-1][loop])

## Get plane equations (training model)

In [None]:
# 1)

classifier_select = 1
coeffs_1 = []
indices_1 = []

offset_1 = SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.offsets[classifier_select-1]),32,12)

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    coeffs_1.append(SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.sv_coeffs[classifier_select-1][loop]),32,12))

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    indices_1.append(SMO_driver_inst.sv_indices[classifier_select-1][loop])
    
svs_write = np.reshape(SMO_driver_inst.training_mat_data_fi_uint16,((int(len(SMO_driver_inst.training_mat_data_fi_uint16)/SMO_driver_inst.no_variables_int),SMO_driver_inst.no_variables_int)))
svs_write = svs_write[indices_1]

svs_1 = np.zeros(shape=(50,3),dtype=np.float32)

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])): 
    for loop_2 in range(SMO_driver_inst.no_variables_int):
        svs_1[loop][loop_2] = SMO_driver_inst.fixed_point_to_float(int(svs_write[loop][loop_2]),16,1)
    
# 2)

classifier_select = 2
coeffs_2 = []
indices_2 = []

offset_2 = SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.offsets[classifier_select-1]),32,12)

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    coeffs_2.append(SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.sv_coeffs[classifier_select-1][loop]),32,12))

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    indices_2.append(SMO_driver_inst.sv_indices[classifier_select-1][loop])

svs_write = np.reshape(SMO_driver_inst.training_mat_data_fi_uint16,((int(len(SMO_driver_inst.training_mat_data_fi_uint16)/SMO_driver_inst.no_variables_int),SMO_driver_inst.no_variables_int)))
svs_write = svs_write[indices_2]

svs_2 = np.zeros(shape=(50,3),dtype=np.float32)

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])): 
    for loop_2 in range(SMO_driver_inst.no_variables_int):
        svs_2[loop][loop_2] = SMO_driver_inst.fixed_point_to_float(int(svs_write[loop][loop_2]),16,1)

# 3)

classifier_select = 3
coeffs_3 = []
indices_3 = []

offset_3 = SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.offsets[classifier_select-1]),32,12)

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    coeffs_3.append(SMO_driver_inst.fixed_point_to_float(int(SMO_driver_inst.sv_coeffs[classifier_select-1][loop]),32,12))

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    indices_3.append(SMO_driver_inst.sv_indices[classifier_select-1][loop])
    
svs_write = np.reshape(SMO_driver_inst.training_mat_data_fi_uint16,((int(len(SMO_driver_inst.training_mat_data_fi_uint16)/SMO_driver_inst.no_variables_int),SMO_driver_inst.no_variables_int)))
svs_write = svs_write[indices_3]

svs_3 = np.zeros(shape=(50,3),dtype=np.float32)

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])): 
    for loop_2 in range(SMO_driver_inst.no_variables_int):
        svs_3[loop][loop_2] = SMO_driver_inst.fixed_point_to_float(int(svs_write[loop][loop_2]),16,1)

In [None]:
classifier_select = 1
weights_1 = [0,0,0]

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    weights_1 = weights_1 + coeffs_1[loop] * svs_1[loop]
    
classifier_select = 2
weights_2 = [0,0,0]

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    weights_2 = weights_2 + coeffs_2[loop] * svs_2[loop]
    
classifier_select = 3
weights_3 = [0,0,0]

for loop in range(len(SMO_driver_inst.sv_coeffs[classifier_select-1])):
    weights_3 = weights_3 + coeffs_3[loop] * svs_3[loop]

In [None]:
x = np.arange(-1, 1, 0.5)
y = np.arange(-1, 1, 0.5)

#XX, YY = np.meshgrid(range(-1), range(1))
XX, YY = np.meshgrid(x,y)

# calculate corresponding z
#Z = (-weights_1[0] * XX - weights_1[1] * YY - offset_1) * 1. /weights_1[2]

## Plot planes on training points

In [None]:
fig = plt.figure()
#ax = plt.axes(projection='3d')
#ax = fig.add_subplot(111, projection='3d')
ax = fig.gca(projection='3d')

#training_plot_data_x_1 = [float(i) for i in training_plot_data_new[0][0:50]]
#training_plot_data_y_1 = [float(i) for i in training_plot_data_new[0][50:100]]
#training_plot_data_z_1 = [float(i) for i in training_plot_data_new[0][100:150]]

#training_plot_data_x_2 = [float(i) for i in training_plot_data_new[1][0:50]]
#training_plot_data_y_2 = [float(i) for i in training_plot_data_new[1][50:100]]
#training_plot_data_z_2 = [float(i) for i in training_plot_data_new[1][100:150]]

#training_plot_data_x_3 = [float(i) for i in training_plot_data_new[2][0:50]]
#training_plot_data_y_3 = [float(i) for i in training_plot_data_new[2][50:100]]
#training_plot_data_z_3 = [float(i) for i in training_plot_data_new[2][100:150]]

#training_plot_data_x_1 = [float(i) for i in training_plot_data_new[0][0:10]]
#training_plot_data_y_1 = [float(i) for i in training_plot_data_new[0][50:60]]
#training_plot_data_z_1 = [float(i) for i in training_plot_data_new[0][100:110]]

#training_plot_data_x_2 = [float(i) for i in training_plot_data_new[1][0:10]]
#training_plot_data_y_2 = [float(i) for i in training_plot_data_new[1][50:60]]
#training_plot_data_z_2 = [float(i) for i in training_plot_data_new[1][100:110]]

#training_plot_data_x_3 = [float(i) for i in training_plot_data_new[2][0:10]]
#training_plot_data_y_3 = [float(i) for i in training_plot_data_new[2][50:60]]
#training_plot_data_z_3 = [float(i) for i in training_plot_data_new[2][100:110]]

# temp
#weights_1 = [-1.5629,0.2301,-0.4966]
#offset_1 = -0.0654

ax.scatter(training_plot_data_x_1, training_plot_data_y_1, training_plot_data_z_1, c='red', s=1, alpha=1)
ax.scatter(training_plot_data_x_2, training_plot_data_y_2, training_plot_data_z_2, c='green', s=1, alpha=1)
ax.scatter(training_plot_data_x_3, training_plot_data_y_3, training_plot_data_z_3, c='blue', s=1, alpha=1)

Z = -(weights_1[0] * XX + weights_1[1] * YY + offset_1) / weights_1[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='yellow')
Z = (-weights_2[0] * XX - weights_2[1] * YY - offset_2) * 1./ weights_2[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='magenta')
Z = (-weights_3[0] * XX - weights_3[1] * YY - offset_3) * 1./ weights_3[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='cyan')

ax.set_xlabel('1st principle component')
ax.set_ylabel('2nd principle component')
ax.set_zlabel('3rd principle component')

#plt.axis([-1, 1, -1, 1])
ax.set_xlim((-1,1))
ax.set_ylim((-1,1))
ax.set_zlim((-1,1))

plt.show()

## Plot testing dataset

In [None]:
testing_mat_data = get_testing_matrix()
testing_plot_data = np.transpose(np.reshape(testing_mat_data,(150,3)))
testing_plot_data_new = testing_plot_data.tolist()

fig = plt.figure()
#ax = plt.axes(projection='3d')
#ax = fig.add_subplot(111, projection='3d')
ax = fig.gca(projection='3d')

testing_plot_data_x_1 = [float(i) for i in testing_plot_data_new[0][0:50]]
testing_plot_data_x_2 = [float(i) for i in testing_plot_data_new[0][50:100]]
testing_plot_data_x_3 = [float(i) for i in testing_plot_data_new[0][100:150]]

testing_plot_data_y_1 = [float(i) for i in testing_plot_data_new[1][0:50]]
testing_plot_data_y_2 = [float(i) for i in testing_plot_data_new[1][50:100]]
testing_plot_data_y_3 = [float(i) for i in testing_plot_data_new[1][100:150]]

testing_plot_data_z_1 = [float(i) for i in testing_plot_data_new[2][0:50]]
testing_plot_data_z_2 = [float(i) for i in testing_plot_data_new[2][50:100]]
testing_plot_data_z_3 = [float(i) for i in testing_plot_data_new[2][100:150]]

ax.scatter(testing_plot_data_x_1, testing_plot_data_y_1, testing_plot_data_z_1, c='red', s=1, alpha=1)
ax.scatter(testing_plot_data_x_2, testing_plot_data_y_2, testing_plot_data_z_2, c='green', s=1, alpha=1)
ax.scatter(testing_plot_data_x_3, testing_plot_data_y_3, testing_plot_data_z_3, c='blue', s=1, alpha=1)

ax.set_xlabel('1st principle component')
ax.set_ylabel('2nd principle component')
ax.set_zlabel('3rd principle component')

ax.set_xlim((-1,1))
ax.set_ylim((-1,1))
ax.set_zlim((-1,1))

plt.show()

In [None]:
Z = -(weights_1[0] * XX + weights_1[1] * YY + offset_1) / weights_1[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='yellow')
Z = (-weights_2[0] * XX - weights_2[1] * YY - offset_2) * 1./ weights_2[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='magenta')
Z = (-weights_3[0] * XX - weights_3[1] * YY - offset_3) * 1./ weights_3[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='cyan')

plt.show()

## Write training models to files
- these will be read by "deployment" section which will use them to classify unknown data!

In [None]:
# write to file for deployment
for n1 in range(SMO_driver_inst.no_classifiers):
    SMO_driver_inst.write_training_model_to_files(n1+1)
SMO_driver_inst.write_n_svs_to_file()

## Load deployment bitstream

In [None]:
### SETUP load the overlay
from pynq import Overlay

overlay = Overlay("/home/xilinx/jupyter_notebooks/PROJECT_FULL_DEMO/deployment_linear_PYNQ_Z2.bit")

## Load deployment drivers - similar to training

In [None]:
# ref: https://pynq.readthedocs.io/en/v2.5/overlay_design_methodology/overlay_tutorial.html
# ref: http://www.fpgadeveloper.com/2018/03/how-to-accelerate-a-python-function-with-pynq.html

print("loading...")

from pynq import DefaultIP
import numpy as np
import time

# the PARSE_FILES class is instantiated once and the all files required for computing the geometric values and test predictions
# may be loaded, stored and (saved - if required)
class parse_files():
    def __init__(self):
        #super().__init__()
        self.no_variables = None
        self.no_variables_int = None
        self.no_test_vectors = None
        self.no_test_vectors_int= None
        self.no_classes_int = None
        
        # other variables and arrays containing details on the training model and testing set
        self.n_svs_data_int = None
        self.testing_mat_fi_data_uint16 = None
        self.testing_labels_data_int = None
        
        # these are for each classifier and will need updated several times (for each training model)
        self.svs_fi_data_uint16 = None
        self.coeff_fi_data_uint32 = None
        self.offset_fi_data_uint32 = None
        
        
    def get_ds_details(self):
        # read in the dataset details
        f = open("ds_details.dat","r")

        contents = f.read()
        ds_details_data = contents.split()
        x = np.array(ds_details_data)
        ds_details_data_uint32 = np.asarray(x,np.uint32)
        #print(type(ds_details_data_uint32[0]))

        # no_variables
        self.no_variables = ds_details_data_uint32[0]
        self.no_variables_int = self.no_variables
        # (single-precision floating point 32 bit representation as an integer)
        
        # no_test_vectors
        self.no_test_vectors = ds_details_data_uint32[1]
        self.no_test_vectors_int = self.no_test_vectors

        # number of classes
        self.no_classes_int = ds_details_data_uint32[2]
    
    def get_no_svs(self):
        # read file containing the number of support vectors for each classifier
        f = open("n_svs.dat","r")

        contents = f.read()
        n_svs_data = contents.split()
        x = np.array(n_svs_data)
        self.n_svs_data_int = np.asarray(x,np.uint32)

        f.close()
        
    def get_testing_matrix(self):
        f = open("test_matrix_fi.dat","r")

        contents = f.read()
        testing_mat_fi_data = contents.split()
        x = np.array(testing_mat_fi_data)
        #self.testing_mat_data_uint16 = x.astype(uint16)
        self.testing_mat_fi_data_uint16 = np.asarray(x, np.uint16)
        
        f.close()
        
    #def get_kernel_parameters(self):
        
    def get_testing_labels(self):
        # for self checking Python tests
        f = open("test_labels.dat","r")

        contents = f.read()
        testing_labels_data = contents.split()
        x = np.array(testing_labels_data)
        #testing_labels_data_float = np.asfarray(x,np.float32)
        #self.testing_labels_data_int = testing_labels_data_float.astype(int)
        self.testing_labels_data_int = np.asarray(x, np.uint8)

        f.close()
        
        f = open("test_predictions_libsvm.dat","r")
        
        contents = f.read()
        testing_labels_data = contents.split()
        x = np.array(testing_labels_data)
        #testing_labels_data_float = np.asfarray(x,np.float32)
        #self.testing_labels_data_int = testing_labels_data_float.astype(int)
        self.test_predictions_libsvm = np.asarray(x, np.uint8)
        
        f.close()
        
    def get_support_vectors(self, current_classifier):
        # return support vectors for a particular classifier
        file_ext = ".dat"
        svs_file_name = "svs_fi_"
        svs_file_name_new = svs_file_name + str(current_classifier) + file_ext
        
        f = open(svs_file_name_new,"r")

        contents = f.read()
        svs_fi_data = contents.split()
        x = np.array(svs_fi_data)
        self.svs_fi_data_uint16 = np.asarray(x,np.uint16)
    
        f.close()
        
    def get_sv_coeffs(self, current_classifier):
        # store the support vector coefficients for a classifier
        file_ext = ".dat"
        coeffs_file_name = "coeffs_fi_"
        coeffs_file_name_new = coeffs_file_name + str(current_classifier) + file_ext

        f = open(coeffs_file_name_new,"r")

        contents = f.read()
        coeffs_fi_data = contents.split()
        x = np.array(coeffs_fi_data)
        self.coeffs_fi_data_uint32 = np.asarray(x,np.uint32)
    
        f.close()
        
    def get_offset(self, current_classifier):
        # store the offset for a classifier
        file_ext = ".dat"        
        offset_file_name  = "offset_fi_"
        offset_file_name_new = offset_file_name + str(current_classifier) + file_ext
        
        f = open(offset_file_name_new,"r")

        offset_fi_data = f.read()
        self.offset_fi_data_uint32 = np.asarray(offset_fi_data,np.uint32)
    
        f.close()
    
# the GEOMETRIC_VALUES_DRIVER class is instantiated once for each "geometric_values" IP core
# member functions include loading data to IP core AXI-lite slave interfaces for general design
# parameters and generating the contigous buffers to transfer through DMA to the AXI stream (AXIS) 
# ports on the IP
# INHERITS FROM PARSE_FILES
from pynq import MMIO

import pynq.lib.dma

from pynq import allocate
#from pynq import Xlnk
#xlnk = Xlnk()

class deployment_driver(parse_files):
    def __init__(self):
        #super().__init__()
             
        # current classifier we are calculating the geometric values for
        self.current_classifier = None
        
        self.geometric_values_out = None
        
        # get parameters from dat files which are general to all classifiers - i.e. not the support vectors, coefficient or offset
        self.get_ds_details()
        self.get_no_svs()
        self.get_testing_matrix()
        
        # used for parallel processing of geometric values
        self.classifier_indices = []
        self.no_classifiers = None
        
        # LISTS
        self.dma_data_instances = []#contains the support vectors followed immediately by testing matrix in C standard type contigous memory
        self.dma_cf_instances = []
        self.dma_ds_instances = []
        self.dma_gv_instances = []
        
        geometric_values_1 = overlay.geometric_values_1
        geometric_values_2 = overlay.geometric_values_2
        geometric_values_3 = overlay.geometric_values_3
        geometric_values_4 = overlay.geometric_values_4
        geometric_values_5 = overlay.geometric_values_5
        geometric_values_6 = overlay.geometric_values_6

        self.g_v_dispatcher = {
            1: geometric_values_1,
            2: geometric_values_2,
            3: geometric_values_3,
            4: geometric_values_4,
            5: geometric_values_5,
            6: geometric_values_6,
        }
        
        self.geometric_values_all = None
        self.test_predictions = None
        
        self.geometric_values_time = 0
        self.test_predictions_time = 0
        
        ## TEST ##
        self.tm_buffer = None
        
        
    def dma_init(self, no_classifiers):
        # initialise buffers not required to change on each iteration
        #self.test_matrix_buffer = allocate(shape=(self.no_test_vectors_int,self.no_variables_int), dtype=np.uint32)
        #np.copyto(self.test_matrix_buffer,self.testing_mat_data_float_IEEE754)
        
        # store all geometric values here
        self.geometric_values_all = np.zeros(shape=(self.no_test_vectors_int,int(no_classifiers)), dtype=np.uint32)
        self.test_predictions = np.zeros(shape=(self.no_test_vectors_int,), dtype=np.uint8)
        
        self.tm_buffer = allocate(shape=(self.no_test_vectors_int*self.no_variables_int,), dtype=np.uint16)
        np.copyto(self.tm_buffer,self.testing_mat_fi_data_uint16)
        
    def dma_delete(self):
        self.tm_buffer.close()    
        
    def dma_transfer_parallel(self, no_classifiers):
        # instantiate all DMAs - parallel design
        for n1 in range(6):
            self.dma_data_instances.append(self.g_v_dispatcher[n1+1].dma_data)
            self.dma_cf_instances.append(self.g_v_dispatcher[n1+1].dma_cf)
            self.dma_ds_instances.append(self.g_v_dispatcher[n1+1].dma_ds)
            self.dma_gv_instances.append(self.g_v_dispatcher[n1+1].dma_gv)
            
        geometric_values_buffer_1 = allocate(shape=(self.no_test_vectors_int,1), dtype=np.uint32)
        geometric_values_buffer_2 = allocate(shape=(self.no_test_vectors_int,1), dtype=np.uint32)
        geometric_values_buffer_3 = allocate(shape=(self.no_test_vectors_int,1), dtype=np.uint32)
        geometric_values_buffer_4 = allocate(shape=(self.no_test_vectors_int,1), dtype=np.uint32)    
        geometric_values_buffer_5 = allocate(shape=(self.no_test_vectors_int,1), dtype=np.uint32)    
        geometric_values_buffer_6 = allocate(shape=(self.no_test_vectors_int,1), dtype=np.uint32)    
    
        geo_values_dispatcher = {
            1: geometric_values_buffer_1,
            2: geometric_values_buffer_2,
            3: geometric_values_buffer_3,
            4: geometric_values_buffer_4,
            5: geometric_values_buffer_5,
            6: geometric_values_buffer_6,
        }
        
        # accumulate with time taken to transfer data to DMA in each classifier
        dma_transfer_time = 0
                
        # iterate over only required classifiers
        for n1 in range(6):
            if(n1 < len(self.classifier_indices)):
                #print("test")
                current_classifier = self.classifier_indices[n1]
                print("current_classifier: ", current_classifier)

                # get training model for current classifier to compute geometric values for this classifier
                # support vectors:
                self.get_support_vectors(current_classifier)
                # offset:
                self.get_offset(self.classifier_indices[n1])
                # support vector coefficients:
                self.get_sv_coeffs(self.classifier_indices[n1])

                # no_svs is obtained at start from one file
                # length of support vectors plus length of testing matrix by 256 variables
                svs_length = self.n_svs_data_int[self.classifier_indices[n1]-1] * self.no_variables_int
                testing_matrix_length = self.no_test_vectors_int * self.no_variables_int
                data_stream_length = svs_length + testing_matrix_length
                # length of coeffs plus one (for the offset)
                coeffs_stream_length = self.n_svs_data_int[self.classifier_indices[n1]-1] + 1
        
                #self.data_buffer = allocate(shape=(data_stream_length,), dtype=np.uint16)
            
                svs_buffer = allocate(shape=(svs_length,), dtype=np.uint16)
                coeffs_buffer = allocate(shape=(coeffs_stream_length,), dtype=np.uint32)
                ds_buffer = allocate(shape=(3,), dtype=np.uint32)
    
                #np.copyto(self.data_buffer[0:svs_length],self.svs_fi_data_uint16)
                #np.copyto(self.data_buffer[svs_length:data_stream_length],self.testing_mat_fi_data_uint16)
            
                np.copyto(svs_buffer,self.svs_fi_data_uint16)
                np.copyto(coeffs_buffer[0:coeffs_stream_length-1], self.coeffs_fi_data_uint32)
                coeffs_buffer[coeffs_stream_length-1] = self.offset_fi_data_uint32

                ds_buffer[0] = self.n_svs_data_int[self.classifier_indices[n1]-1]
                ds_buffer[1] = self.no_variables_int
                ds_buffer[2] = self.no_test_vectors_int
                
                
                # TEMP
                #offset = 0
                #print("sv 1")
                #print("Control: " + hex(self.dma_data_instances[n1].read(0x0 + offset)))
                #print("Status : " + hex(self.dma_data_instances[n1].read(0x4 + offset)))                
                #offset = 0
                #print("CF 1")
                #print("Control: " + hex(self.dma_cf_instances[n1].read(0x0 + offset)))
                #print("Status : " + hex(self.dma_cf_instances[n1].read(0x4 + offset)))                
                # TEMP                
                
                
                # transfer to DMA
                start_time = time.time()
                #self.dma_data_instances[n1].sendchannel.transfer(self.data_buffer)
                self.dma_cf_instances[n1].sendchannel.transfer(coeffs_buffer)
                self.dma_ds_instances[n1].sendchannel.transfer(ds_buffer)
                self.dma_gv_instances[n1].recvchannel.transfer(geo_values_dispatcher[n1+1])
                
                self.dma_data_instances[n1].sendchannel.transfer(svs_buffer)
                self.dma_data_instances[n1].sendchannel.wait()
                self.dma_data_instances[n1].sendchannel.transfer(self.tm_buffer)
                
                dma_transfer_time = dma_transfer_time + time.time() - start_time
                #print(dma_transfer_time)
                
                
                # TEMP
                #offset = 0
                #print("sv 2")
                #print("Control: " + hex(self.dma_data_instances[n1].read(0x0 + offset)))
                #print("Status : " + hex(self.dma_data_instances[n1].read(0x4 + offset)))                
                #offset = 0
                #print("CF 2")
                #print("Control: " + hex(self.dma_cf_instances[n1].read(0x0 + offset)))
                #print("Status : " + hex(self.dma_cf_instances[n1].read(0x4 + offset)))                  
                # TEMP

                #self.dma_ds_instances[n1].sendchannel.transfer(ds_buffer)
                #self.dma_gv_instances[n1].recvchannel.transfer(geo_values_dispatcher[n1+1])
                
                #del data_buffer
                coeffs_buffer.close()
                ds_buffer.close()
            else:
                break
                
        start_time = time.time()
                
        for n1 in range(6):
            if(n1 < len(self.classifier_indices)):
                self.dma_data_instances[n1].sendchannel.wait()
                self.dma_cf_instances[n1].sendchannel.wait()
                self.dma_ds_instances[n1].sendchannel.wait()
                self.dma_gv_instances[n1].recvchannel.wait()
            else:
                break
        
        elapsed_time = time.time() - start_time + dma_transfer_time
        self.geometric_values_time = self.geometric_values_time + elapsed_time
        #print("GEOMETRIC VALUES TIME: ")
        #print(elapsed_time)
                
        for n1 in range(6):
            if(n1 < len(self.classifier_indices)):        
                self.geometric_values_all[:,self.classifier_indices[n1]-1] = geo_values_dispatcher[n1+1][:,0]
                geo_values_dispatcher[n1+1].close()        
    
    def geometric_values_driver(self):
        # get training model for current classifier to compute geometric values for this classifier
        #self.get_support_vectors(current_classifier)
        #self.get_sv_coeffs(current_classifier)
        #self.get_offset(current_classifier)
        
        #self.dma_transfer(current_classifier)
        
        # generate require classifier indices in an 8-length array - there are currently 8 instances of geometric values
        # e.g. [1,2,3,4,5,6,7,8] then [9,10] if more than 8 classifiers or just [1,2,3,4,5,6]

        # get no_classifiers
        no_classifiers = self.no_classes_int * (self.no_classes_int - 1) / 2
        self.no_classifiers = no_classifiers
        
        self.dma_init(no_classifiers)

        current_classifier = 1
        done = 0
        
        while(done == 0):
            # generate indices - reset to length zero
            init_classifier = current_classifier
            # (init is the first classifier for the next batch of parallel processing)
            self.classifier_indices = []
            for n1 in range(6):
                if(current_classifier < (no_classifiers + 1)):
                    self.classifier_indices.append(init_classifier + n1)
                    #self.classifier_indices[n1] = current_classifier + n1
                    #if(n1 == 0):
                    #    self.classifier_indices[0] = current_classifier + n1
                    #else:
                    #    np.append(self.classifier_indices, current_classifier + n1)
                        
                    current_classifier = current_classifier + 1
            
            #print("current (next): ", current_classifier)
            if((current_classifier-1) == int(no_classifiers)):
                done = 1
            
            # call dma transfer - parallel calculate geometric values
            self.dma_transfer_parallel(no_classifiers)
        
        self.dma_delete()
        
    def test_predictions_driver(self):     
        no_classes = self.no_classes_int
        no_test_vectors = self

        dma_gv = overlay.test_predictions_1.dma_gv
        dma_ds = overlay.test_predictions_1.dma_ds
        dma_tp = overlay.test_predictions_1.dma_tp

        ge_values_buffer = allocate(shape=(self.no_test_vectors,int(self.no_classifiers)), dtype=np.uint32)
        dataset_buffer = allocate(shape=(2,1), dtype=np.uint32)

        np.copyto(ge_values_buffer,self.geometric_values_all)

        dataset_buffer[0] = self.no_classes_int
        dataset_buffer[1] = self.no_test_vectors

        test_predictions_out_buffer = allocate(shape=(self.no_test_vectors,1), dtype=np.uint8)

        start_time = time.time()
            
        # transfer to DMA
        dma_gv.sendchannel.transfer(ge_values_buffer)
        dma_ds.sendchannel.transfer(dataset_buffer)
        dma_tp.recvchannel.transfer(test_predictions_out_buffer)

        dma_gv.sendchannel.wait()
        dma_ds.sendchannel.wait()
        dma_tp.recvchannel.wait()
        
        elapsed_time = time.time() - start_time
        self.test_predictions_time = elapsed_time
        #print("TEST PREDICTIONS TIME: ")
        #print(elapsed_time)

        self.test_predictions = test_predictions_out_buffer

        # delete memory on heap to avoid memory leakage
        ge_values_buffer.close()
        dataset_buffer.close()
        test_predictions_out_buffer.close()
        
    def get_test_predictions(self):
        # get geometric values
        self.geometric_values_time = 0
        self.test_predictions_time = 0
        start_time = time.time()  
        
        self.geometric_values_driver()
        
        #elapsed_time = time.time() - start_time
        #print("TIME (geometric values total): ")
        #print(elapsed_time)
        
        # use geometric values to compute test predictions
        #start_time = time.time()  
        
        self.test_predictions_driver()
        
        elapsed_time = time.time() - start_time
        print("\nTIME TOTAL (WITH FILE READS): ", elapsed_time)
        
        print("TIME TO RECORD (NOT INCLUDING FILE READS): ", self.geometric_values_time + self.test_predictions_time)

print("\ndone")

## Instantiate deployment driver

In [None]:
deployment_driver_inst = deployment_driver()

## Call deployment drivver top-level function
-  similar to training

In [None]:
deployment_driver_inst.get_test_predictions()

## Print resulting test predictions

In [None]:
deployment_driver_inst.test_predictions

## elow function checks how accurate classification is and how consistent it is with MATLAB (double-precision)

In [None]:
# check the accuracy of the prediction and simlarity to libsvm result
deployment_driver_inst.get_testing_labels()

# track errors to compute accuracy of precdiction
err_count = 0
# track differences to libsvm - this indicates issues with the numerical precision of the algorithm
disimilarity_count = 0

for i in range(deployment_driver_inst.no_test_vectors_int):
    if(deployment_driver_inst.test_predictions[i] != deployment_driver_inst.testing_labels_data_int[i]):
        err_count = err_count + 1
    if(deployment_driver_inst.test_predictions[i] != deployment_driver_inst.test_predictions_libsvm[i]):
        disimilarity_count = disimilarity_count + 1
        
print("accuracy = ", (deployment_driver_inst.no_test_vectors_int - err_count) / deployment_driver_inst.no_test_vectors_int * 100, "%")
#print("similarity = ", (deployment_driver_inst.no_test_vectors_int - disimilarity_count) / deployment_driver_inst.no_test_vectors_int * 100, "%")

## Plot resulting test predictions

In [None]:
fig = plt.figure()
#ax = plt.axes(projection='3d')
#ax = fig.add_subplot(111, projection='3d')
ax = fig.gca(projection='3d')

pred_1_indices = np.where(deployment_driver_inst.test_predictions[:,0] == 1)
pred_2_indices = np.where(deployment_driver_inst.test_predictions[:,0] == 2)
pred_3_indices = np.where(deployment_driver_inst.test_predictions[:,0] == 3)

testing_plot_data_new = np.asarray(testing_plot_data_new)

testing_plot_data_x_1 = [float(i) for i in testing_plot_data_new[0][pred_1_indices[0]]]
testing_plot_data_x_2 = [float(i) for i in testing_plot_data_new[0][pred_2_indices[0]]]
testing_plot_data_x_3 = [float(i) for i in testing_plot_data_new[0][pred_3_indices[0]]]
#testing_plot_data_x_1 = [float(i) for i in testing_plot_data_new[0][tuple(pred_1_indices[0])]]
#testing_plot_data_x_2 = [float(i) for i in testing_plot_data_new[0][pred_2_indices[0]]]
#testing_plot_data_x_3 = [float(i) for i in testing_plot_data_new[0][pred_3_indices[0]]]

testing_plot_data_y_1 = [float(i) for i in testing_plot_data_new[1][pred_1_indices[0]]]
testing_plot_data_y_2 = [float(i) for i in testing_plot_data_new[1][pred_2_indices[0]]]
testing_plot_data_y_3 = [float(i) for i in testing_plot_data_new[1][pred_3_indices[0]]]

testing_plot_data_z_1 = [float(i) for i in testing_plot_data_new[2][pred_1_indices[0]]]
testing_plot_data_z_2 = [float(i) for i in testing_plot_data_new[2][pred_2_indices[0]]]
testing_plot_data_z_3 = [float(i) for i in testing_plot_data_new[2][pred_3_indices[0]]]

ax.scatter(testing_plot_data_x_1, testing_plot_data_y_1, testing_plot_data_z_1, c='red', s=1, alpha=1)
ax.scatter(testing_plot_data_x_2, testing_plot_data_y_2, testing_plot_data_z_2, c='green', s=1, alpha=1)
ax.scatter(testing_plot_data_x_3, testing_plot_data_y_3, testing_plot_data_z_3, c='blue', s=1, alpha=1)

Z = -(weights_1[0] * XX + weights_1[1] * YY + offset_1) / weights_1[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='yellow')
Z = (-weights_2[0] * XX - weights_2[1] * YY - offset_2) * 1./ weights_2[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='magenta')
Z = (-weights_3[0] * XX - weights_3[1] * YY - offset_3) * 1./ weights_3[2]
ax.plot_surface(XX,YY,Z,rstride=1,cstride=1,alpha=0.5,color='cyan')

ax.set_xlabel('1st principle component')
ax.set_ylabel('2nd principle component')
ax.set_zlabel('3rd principle component')

ax.set_xlim((-1,1))
ax.set_ylim((-1,1))
ax.set_zlim((-1,1))

plt.show()