# Part 1

## Import

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import decimal
import math
from scipy import signal

## Generate data

In [None]:
def get_sinData(start, stop, interval, noise_var = None):
    n = round((2 * np.pi) / interval) #nr of data points
    input = np.linspace(start, stop, n)
    input = input.reshape(input.shape[0], 1)
    output = np.sin(2 * input)
    if noise_var:
        noise = np.random.normal(loc = 0, scale = noise_var, size = np.shape(output))
        output = np.add(output, noise)
    return input, output

def get_sqrtData(start, stop, interval, noise_var = None):
    n = round((2 * np.pi) / interval) #nr of data points
    input = np.linspace(start, stop, n)
    input = input.reshape(input.shape[0], 1)
    output = signal.square(2 * input)
    if noise_var:
        noise = np.random.normal(loc = 0, scale = noise_var, size = np.shape(output))
        output = np.add(output, noise)
    return input, output

def shuffle_data(input, output):
    complete_data = np.column_stack((input, output))
    np.random.shuffle(complete_data)
    patterns = complete_data[:, 0:input.shape[1]]
    response = complete_data[:, input.shape[1]:input.shape[1] + output.shape[1]]
    return patterns, response


# patterns, output = get_sqrtData(0, 2*np.pi, 0.1)
#
# print('pat', patterns.shape)
# print('out', output.shape)
#
# shuf_pat, shuf_out = shuffle_data(patterns, output)
# print('shufPat', shuf_pat.shape)
# print('shufOut', shuf_out.shape)

## 3.1 Batch mode training using least squares - supervised learning

In [None]:
class RBF_NN():
    def __init__(self, mu_list, sigma):
        self.W = None
        self.mu = mu_list
        self.train_phi = None
        self.sigma = sigma
        self.trainError = []
        self.testError = []

    def RBF_kernel(self, x, mu, sigma):
        return np.exp((-(np.linalg.norm(x - mu)) ** 2) / (2 * sigma ** 2))

    def getPhi(self, inputs):
        N = len(inputs) # nr of training examples
        n = len(self.mu) # nr of clusters
        phi = np.zeros((N, n))
        for i in range(N):
            for j in range(n):
                phi[i, j] = self.RBF_kernel(inputs[i, :], self.mu[j, :], self.sigma)
        return phi

    def batch_train(self, inputs, response):
        self.train_phi = self.getPhi(inputs)
        phi_transpose = np.transpose(self.train_phi)
        self.W = np.linalg.inv(np.dot(phi_transpose, self.train_phi)).dot(phi_transpose).dot(response)

    def sequential_train(self, inputs, response, eta, epochs):
        self.W = np.random.randn(self.mu.shape[0], 1)/10
        self.train_phi = self.getPhi(inputs)
        for iter in range(epochs):
            for row in range(inputs.shape[0]):
                e = response[row] - np.dot(self.train_phi[row,:], self.W)
                delta_W = eta * e * self.train_phi[row, :].T
                self.W = np.add(self.W, np.reshape(delta_W, self.W.shape))

    def predict(self, inputs):
        phi = self.getPhi(inputs)
        return phi.dot(self.W)

    def abs_error(self, data, response):
        predictions = self.predict(data)
        return np.mean(np.abs(response - predictions))

    def run(self, train_data, train_response, test_data, test_response, batch=True, eta=0, epochs=0):
        if batch:
            self.batch_train(train_data, train_response)
        else:
            self.sequential_train(train_data, train_response, eta, epochs)
        self.trainError = self.abs_error(train_data, train_response)
        self.testError = self.abs_error(test_data, test_response)


def even_rbf_center(count):
    mu_list = []
    for i in range(count):
        mu_list.append(i*2*np.pi/(count))
    mu_list = np.reshape(np.array(mu_list), (len(mu_list),1))
    return mu_list
  
def kMeans(data, k):
    clusters = np.random.choice(np.squeeze(data), size=k) # randomly select initial clusters
    oldClusters = clusters.copy()

    converged = np.inf
    while converged > 0.000001:
        distances = np.squeeze(np.abs(data[:, np.newaxis] - clusters[np.newaxis, :]))
        closestCluster = np.argmin(distances, axis=1)
        for i in range(k):
            pointsForCluster = data[closestCluster == i]
            if len(pointsForCluster) > 1:
                clusters[i] = np.mean(pointsForCluster, axis=0)
        converged = np.linalg.norm(clusters - oldClusters)
        oldClusters = clusters.copy()
    return clusters.reshape(clusters.shape[0], data.shape[1])


def plot_predictedCurve(input_data, prediction, title):
    plt.plot(input_data, prediction)
    plt.title(title)
    #plt.figtext(.02, .02, "Training error is {}".format(train_error))
    plt.show()

In [None]:
###### SINUS data #######
#mu_list = even_rbf_center(50)
sigma = 0.15


#### training data, from 0 up to 2*pi ####
#### test data, from 0.05 up to 2*pi ####

sin_train_input, sin_train_output = get_sinData(0, 2*np.pi, 0.1)
sin_test_input, sin_test_output = get_sinData(0.05, 2*np.pi, 0.1)


for k in range(100):
    mu_list = even_rbf_center(k+1)
    batch_sin = RBF_NN(mu_list, sigma)
    batch_sin.run(sin_train_input, sin_train_output, sin_test_input, sin_test_output, batch=True, eta=0, epochs=0)
    #test_prediction = batch_sin.predict(sin_test_input)
    #plot_predictedCurve(sin_test_input, test_prediction, 'Test with ' + str(k+1) + ' evenly spaced kernels')
    print('Train error for kernel = ' + str(k+1), batch_sin.trainError, ' Test error: ', batch_sin.testError)
    # if batch_sin.testError < 0.001:
    #     break

##### SQR DATA ######

# mu_list = even_rbf_center(50)
# sigma = 0.1
#
#
# sqr_train_input, sqr_train_output = get_sqrtData(0, 2*np.pi, 0.1)
# sqr_test_input, sqr_test_output = get_sqrtData(0.05, 2*np.pi, 0.1)

## 3.2 Regression with noise 

In [None]:
class RBF_NN():
    def __init__(self, mu_list, sigma):
        self.W = None
        self.mu = mu_list
        self.train_phi = None
        self.sigma = sigma
        self.trainError = []
        self.testError = []
        self.online_err = []

    def RBF_kernel(self, x, mu, sigma):
        return np.exp((-(np.linalg.norm(x - mu)) ** 2) / (2 * sigma ** 2))

    def getPhi(self, inputs):
        N = len(inputs)  # nr of training examples
        n = len(self.mu)  # nr of clusters
        phi = np.zeros((N, n))
        for i in range(N):
            for j in range(n):
                phi[i, j] = self.RBF_kernel(inputs[i, :], self.mu[j, :], self.sigma)
        return phi

    def shuffle_data(self, input, output):
        complete_data = np.column_stack((input, output))
        np.random.shuffle(complete_data)
        patterns = complete_data[:, 0:input.shape[1]]
        response = complete_data[:, input.shape[1]:input.shape[1] + output.shape[1]]
        return patterns, response

    def batch_train(self, inputs, response):
        # inputs, response = self.shuffle_data(inputs, response)
        self.train_phi = self.getPhi(inputs)
        phi_transpose = np.transpose(self.train_phi)
        self.W = np.linalg.inv(np.dot(phi_transpose, self.train_phi)).dot(phi_transpose).dot(response)

    def sequential_train(self, inputs, response, eta, epochs):
        self.W = np.random.randn(self.mu.shape[0], 1) / 10
        err_list = []
        for iter in range(epochs):
            inputs, response = self.shuffle_data(inputs, response)
            self.train_phi = self.getPhi(inputs)
            for row in range(inputs.shape[0]):
                e = response[row] - np.dot(self.train_phi[row, :], self.W)
                dW = eta * e * self.train_phi[row, :].T
                self.W = np.add(self.W, np.reshape(dW, self.W.shape))
            err_list.append(self.abs_error(inputs, response))
        self.online_err = err_list

    def predict(self, inputs):
        phi = self.getPhi(inputs)
        return phi.dot(self.W)

    def abs_error(self, data, response):
        predictions = self.predict(data)
        return np.mean(np.abs(response - predictions))

    def run(self, train_data, train_response, test_data, test_response, batch=True, eta=0, epochs=0):
        if batch:
            self.batch_train(train_data, train_response)
        else:
            self.sequential_train(train_data, train_response, eta, epochs)
        self.trainError = self.abs_error(train_data, train_response)
        self.testError = self.abs_error(test_data, test_response)

def even_rbf_center(count):
    mu_list = []
    for i in range(count):
        mu_list.append(i * 2 * np.pi / (count))
    mu_list = np.reshape(np.array(mu_list), (len(mu_list), 1))
    return mu_list

def random_rbf_center(count):
    mu_list = []
    mu_list = np.random.uniform(0, 2 * np.pi, count).reshape(-1, 1)
    return mu_list

## 3.3 Competitive learning

In [None]:
def kMeans(data, k):
    clusters = np.random.choice(np.squeeze(data), size=k)  # randomly select initial clusters
    oldClusters = clusters.copy()

    converged = np.inf
    while converged > 0.000001:
        distances = np.squeeze(np.abs(data[:, np.newaxis] - clusters[np.newaxis, :]))
        closestCluster = np.argmin(distances, axis=1)
        for i in range(k):
            pointsForCluster = data[closestCluster == i]
            if len(pointsForCluster) > 1:
                clusters[i] = np.mean(pointsForCluster, axis=0)
        converged = np.linalg.norm(clusters - oldClusters)
        oldClusters = clusters.copy()
    return clusters.reshape(clusters.shape[0], data.shape[1])


def plot_predictedCurve(input_data, prediction, title):
    plt.plot(input_data, prediction)
    plt.title(title)
    # plt.figtext(.02, .02, "Training error is {}".format(train_error))
    plt.xlabel("number of nodes")
    plt.ylabel("test error")
    plt.show()


def plot_variation(datasets, labels):
    plt.figure()
    i = 0
    for data in datasets:
        plt.plot(data, label="{0}".format(labels[i]))
        i += 1
    plt.legend()
    plt.xlabel("number of nodes")
    plt.ylabel("test error")
    plt.show()

In [None]:
sin_train_input, sin_train_output = get_sinData(0, 2 * np.pi, 0.1, noise_var=0.1)
sin_train_input_nonoise, sin_train_output_nonoise = get_sinData(0, 2 * np.pi, 0.1)
sin_test_input, sin_test_output = get_sinData(0.05, 2 * np.pi, 0.1, noise_var=0.1)
sin_test_input_nonoise, sin_test_output_nonoise = get_sinData(0.05, 2 * np.pi, 0.1)

sqrt_train_input, sqrt_train_output = get_sqrtData(0, 2 * np.pi, 0.1, noise_var=0.1)
sqrt_train_input_nonoise, sqrt_train_output_nonoise = get_sqrtData(0, 2 * np.pi, 0.1)
sqrt_test_input, sqrt_test_output = get_sqrtData(0.05, 2 * np.pi, 0.1, noise_var=0.1)
sqrt_test_input_nonoise, sqrt_test_output_nonoise = get_sqrtData(0.05, 2 * np.pi, 0.1)

In [None]:

"""3.1.1"""
# number of RBF units
def compare_units(sigma, train_input, train_output, test_input, test_output):
    err_batch = []
    err_online = []
    for k in range(25):
        """
        Use different function to get mu for different task
        """
        mu_list = even_rbf_center(k+1)# evenly position 3.2.1
        # mu_list = random_rbf_center(k+1) # randomly position 3.2.4
        #mu_list = kMeans(train_input, k + 1)  # competitive learning approach 3.3
        batch = RBF_NN(mu_list, sigma)
        batch.run(train_input, train_output, test_input, test_output, batch=True, eta=0, epochs=0)
        #print('{0} batch: {1} & {2}'.format(str(k+1), batch.trainError, batch.testError))
        online = RBF_NN(mu_list, sigma)
        online.run(train_input, train_output, test_input, test_output, batch=False, eta=0.02,
                       epochs=1000)
        #test_prediction = batch.predict(test_input)
        #plot_predictedCurve(test_input, test_prediction, 'Test with ' + str(k+1) + ' evenly spaced kernels')
        err_batch.append(batch.testError)
        err_online.append(online.testError)
        if batch.testError < 0.1:
            print("batch:", k)
        if online.testError < 0.1:
            print("online:", k)
        if len(err_batch) > 5 and err_batch[-1] > err_batch[-2] and err_batch[-2] > err_batch[-3] and err_batch[-3] > err_batch[-4]:  # very stupid break out criterion
            print("The best performance of batch mode is {0} test error with {1} units".format(err_batch.index(min(err_batch)), min(err_batch)))
            print("The best performance of online mode is {0} test error with {1} units".format(err_online.index(min(err_online)), min(err_online)))
            break
        
    plot_variation([err_batch, err_online], ["batch", "online"])
    return err_batch, err_online
#compare_units(0.15, sin_train_input_nonoise, sin_train_output_nonoise, sin_test_input_nonoise, sin_test_output_nonoise)
#compare_units(0.2, sqrt_train_input_nonoise, sqrt_train_output_nonoise, sqrt_test_input_nonoise, sqrt_test_output_nonoise)


"""3.2.1 compare units with different sigmas/width"""
"""3.2.4 Comparison of positioning of RBF nodes"""
"""3.3.1 compare the performance of CL and pervious approach"""
# err_batch, err_online = compare_units(2, sin_train_input, sin_train_output, sin_test_input, sin_test_output)
def comp_units_width(train_input, train_output, test_input, test_output):
    sigmas = [0.1, 0.2, 0.5, 1, 2]
    batch_mode = []
    online_mode = []
    labelss = []
    for sigma in sigmas:
        print(sigma)
        err_batch, err_online = compare_units(sigma, train_input, train_output, test_input, test_output)
        batch_mode.append(err_batch)
        online_mode.append(err_online)
        labelss.append("sigma = {0}".format(sigma))
    plot_variation(batch_mode, labelss)
    plot_variation(online_mode, labelss)

#comp_units_width(sin_train_input, sin_train_output, sin_test_input, sin_test_output)
#comp_units_width(sqrt_train_input, sqrt_train_output, sqrt_test_input, sqrt_test_output)

"""3.2.2 rate of convergance"""
def eta_comp(train_input, train_output, test_input, test_output):
    etas = [0.01, 0.02, 0.05, 0.1, 0.3]
    mu_list = even_rbf_center(14)
    plt.figure()
    for etaa in etas:
        online = RBF_NN(mu_list, 1)
        online.run(train_input, train_output, test_input, test_output, batch=False, eta=etaa,
                       epochs=500)
        error = online.online_err
        plt.plot(error, label="{0}".format(etaa))
    plt.legend()
    plt.xlabel("epoches")
    plt.ylabel("training error")
    plt.show()

# eta_comp(sin_train_input, sin_train_output, sin_test_input, sin_test_output)
# eta_comp(sqrt_train_input, sqrt_train_output, sqrt_test_input, sqrt_test_output)

"""3.2.5 test performance on the original clean data"""
def comp_noise_clean(train_input, train_output, test_input, test_output):
    sigmas = [0.1, 0.2, 0.5, 1, 2]
    # sigmas = [0.2, 0.5, 1]
    labelss = []
    batch_mode = []
    online_mode = []
    for sigma in sigmas:
        print(sigma)
        err_batch, err_online = compare_units(sigma, train_input, train_output, test_input, test_output)
        print("batch", err_batch)
        print("online", err_online)
        batch_mode.append(err_batch)
        online_mode.append(err_online)
        labelss.append("sigma = {0}".format(sigma))
    print(batch_mode)
    print(online_mode)
    plot_variation(batch_mode, labelss)
    plot_variation(online_mode, labelss)
"""
[[0.6264354475153754, 0.6266872172201615, 0.4887120679937547, 0.6274310229155515, 0.4150086359628111, 0.3543160763866485, 0.3227317092451779, 0.2926115926467741, 0.2290565819211869, 0.17730540345429746, 0.13674434779722747, 0.09748233548008205, 0.07287464120206652, 0.05947602873505087, 0.053038509912698005, 0.05341336482561981, 0.05291054469547832, 0.05693126226542259, 0.05575939640318599, 0.05840601079181877, 0.058254458235089755, 0.05916936905006096, 0.05910304368844218, 0.057308146091864944, 0.05874246517110581], [0.597632692194807, 0.597412771669332, 0.4557724764071912, 0.5970836815312454, 0.2544674976085406, 0.13212690128488344, 0.06575867005783487, 0.059264540666089126, 0.05998476725700116, 0.05809724502332455, 0.05925073802528511, 0.058370813263181276, 0.0564656508767942, 0.05027261323179009, 0.049153910296232665, 0.0520162067479187, 0.052969593130839654, 0.0519950880396916, 0.05331386805391762, 0.05451670567491504, 0.05398383853489602, 0.05534412648940418, 0.054566941833275345, 0.05451899499223749, 0.05158389402773187], [0.5671237387037428, 0.5669447936602273, 0.5084081285571801, 0.5321584574143421, 0.11175966270493873, 0.11846128531374013, 0.09658380343181636, 0.06019065952907639, 0.046564671486894194, 0.04789764278297838, 0.04779637095118584, 0.045473098795445394, 0.046416753936087705, 0.047116763266506816, 0.046675416369999244, 0.06061983901593838, 0.05650796127141547, 0.15062331302208443, 18.298377538272007, 0.13142113124213708, 1.0596934453329843, 0.49963352183336424, 0.7043037387599488, 0.4384057462019677, 1.2365277379739397]]
[[0.6264499164202747, 0.6266851992632196, 0.4887078593749584, 0.6274527902271275, 0.4150020180884687, 0.3542566234790148, 0.3226107416996139, 0.2925483567116449, 0.22898519837582731, 0.17726624612942624, 0.13674101762469587, 0.09750988564701499, 0.07290233842465933, 0.05948254258809207, 0.05301022727710761, 0.05327872356483626, 0.052851393676658524, 0.05674185396571497, 0.05572757284683715, 0.05842974300584201, 0.058252592187843774, 0.05913625461483281, 0.058663724185059676, 0.05647982071661344, 0.05740899890202468], [0.59764146272757, 0.5974459003445108, 0.4556668060450438, 0.5970598179822615, 0.25451170981336707, 0.13215061070793038, 0.06555129779961581, 0.05942798218287982, 0.05978739948958395, 0.05776984403494936, 0.058969896341052076, 0.05871236030646619, 0.057631969884859746, 0.05638789385211598, 0.05633392092121069, 0.05672186102629433, 0.05575464709932623, 0.055902482182464575, 0.05363161249597479, 0.05476559038054154, 0.05409439144140135, 0.053905178675178946, 0.05209427912395047, 0.05462163501149573, 0.04896314637894932], [0.567035237154541, 0.5669212096304944, 0.5085901266065697, 0.5320300436199145, 0.11205438314512396, 0.10792166480655023, 0.09422844935535439, 0.08712702150626549, 0.08669586776321465, 0.07883190390594812, 0.07874894840743714, 0.07837522252546383, 0.079823099657737, 0.0744371892005381, 0.07392631594113643, 0.07197874221525043, 0.06929622359000628, 0.06930197439077602, 0.0749446316209452, 0.07304467140122284, 0.07564711543702304, 0.06618472982652028, 0.0707766090012465, 0.06887440099138069, 0.06717816601279836]]
"""
# comp_noise_clean(sin_train_input, sin_train_output, sin_test_input_nonoise,sin_test_output_nonoise)
# comp_noise_clean(sqrt_train_input, sqrt_train_output, sqrt_test_input_nonoise,sqrt_test_output_nonoise)