In [22]:
# importing all the essential libraries
import numpy as np
import pandas as pd
import random
from sklearn.cluster import KMeans

#generate the input dataset
Xi = []
Xj = []
output_func = []

# two for loops for Xi and Xj to generate 441 random samples.
for i in range(0,21):
    for j in range(0,21):
        Xi_eq = (-2 + 0.2*i)
        Xj_eq = (-2 + 0.2*j)
        Xi.append(Xi_eq)
        Xj.append(Xj_eq)
        # target generation based on the function.
        x1 = Xi_eq**2
        x2 = Xj_eq**2
        calc = x1 + x2
        # storing the target/ output in output_func.
        if (calc <= 1):
            output_func.append(1)
        elif (calc > 1):
            output_func.append(-1)

# creating a dataframe of Xi, Xj and target value.
df = pd.DataFrame({'Xi':Xi,'Xj':Xj,'f':output_func})

In [23]:
# shuffling the dataset for 80-20 : Train - Test split 
df = df.sample(frac=1).reset_index(drop=True)
df = np.array(df)

# Train set and Test set split (80-20)
train_set_size = int(0.8 * len(df)) 
test_set_size = int(0.2 * len(df))

# creating X_train, X_test, y_train and y_test.
X_train = df[:train_set_size,0:2] 
X_test = df[train_set_size:,0:2]
y_train = df[:train_set_size,2]
y_test = df[train_set_size:,2]

In [24]:
# function to calculate euclidean distance between two centroids(points)
def get_distance(x1, x2):
    sum = 0
    for i in range(len(x1)):
        sum += (x1[i] - x2[i]) ** 2
    return np.sqrt(sum)

# function to calculate mean square error given actual and predicted output
def mean_square_error(y, y_pred):
    sum = 0
    for i in range(len(y)):
        sum += (y[i] - y_pred[i]) ** 2
    return sum / len(y)

# RBF kernel function for the hidden nodes
def get_rbf(x, c, s):
    return np.exp(-get_distance(x, c)**2 / (2 * s**2))

# A function which will create list of the RBF kernel function outputs
def get_rbf_as_list(X, centroids, std_list):
    RBF_list = []
    for x in X:
        RBF_list.append([get_rbf(x, c, s) for (c, s) in zip(centroids, std_list)])
    return np.array(RBF_list)

# Function to calculate accuracy given actual and predicted output
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

In [25]:
# The main fit function to calculate the RBFNN
def fit(X, y, tX, ty,spread_func,k,k_means = False):
    # If-else loop to select centroids randomly or using kmeans clustering
    if k_means == True:
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(X)
        centroids = kmeans.cluster_centers_
    elif k_means == False:
        centroids = random.sample(list(X), k)

    # creating a list to store the spread function 
    std_list = []
    # appending the spread function in the list of spread function
    for i in range(len(centroids)):
        std_list.append(spread_func)
    
    # Creating list of RBF function output and storing it in the RBF_X
    RBF_X = get_rbf_as_list(X, centroids, std_list)

    # finding the weights using least squared linear regression
    w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ y
    
    # Obtaining the predicted output of the test set.
    RBF_list_tst = get_rbf_as_list(tX, centroids, std_list)
    pred_ty = RBF_list_tst @ w
    
    # obtaining the final prediction of the test set
    final_pred = []
    for i in pred_ty:
        if i < 0:
            final_pred.append(-1)
        elif i >= 0:
            final_pred.append(1)
    # calculating the accuracy of the test accuracy
    accuracy_test = accuracy_metric(ty,final_pred)
    print("The final testing accuracy is: ",accuracy_test)
    
    # Obtaining the mean square error of the test dataset.
    mse_test = mean_square_error(ty,final_pred)
    print("The Mean Square Error of Testing dataset is: ",mse_test)
    
    # obtaining the predicted output of the train set
    RBF_list_train = get_rbf_as_list(X, centroids, std_list)
    pred_trainy = RBF_list_train @ w
    
    # obtaining the final prediction of the train set
    final_pred_train = []
    print("-----------------------------------------------------------------------------")
    for i in pred_trainy:
        if i < 0:
            final_pred_train.append(-1)
        elif i >= 0:
            final_pred_train.append(1)
    # obtaining the final train set accuracy.
    accuracy_train = accuracy_metric(y,final_pred_train)
    print("the final training accuracy is: ",accuracy_train)
    
    # mean square error of the train dataset.
    mse_train = mean_square_error(y,final_pred_train)
    print("The Mean Square Error of Training Dataset is: ",mse_train)

In [26]:
# First part of the Question 3
# in here k = number of centroids
k = len(X_train)
spread_list = [0.1,0.5,0.9,1,3,5,9,10]
for i in spread_list:
    print("*****************************For Spread Function of: ",i,"*****************************")
    fit(X_train, y_train, X_test, y_test,i, k, k_means=False)

*****************************For Spread Function of:  0.1 *****************************
The final testing accuracy is:  94.3820224719101
The Mean Square Error of Testing dataset is:  0.2247191011235955
-----------------------------------------------------------------------------
the final training accuracy is:  100.0
The Mean Square Error of Training Dataset is:  0.0
*****************************For Spread Function of:  0.5 *****************************
The final testing accuracy is:  93.25842696629213
The Mean Square Error of Testing dataset is:  0.2696629213483146
-----------------------------------------------------------------------------
the final training accuracy is:  100.0
The Mean Square Error of Training Dataset is:  0.0
*****************************For Spread Function of:  0.9 *****************************
The final testing accuracy is:  97.75280898876404
The Mean Square Error of Testing dataset is:  0.0898876404494382
--------------------------------------------------------

In [18]:
# second part of Question 3
# in here k = number of centroids
# firstly randomly selecting the centers
k = 150
spread_list = [0.1,0.5,0.9,1,3,5,9,10]
for i in spread_list:
    print("*****************************For Spread Function of: ",i,"***************************** random center selection")
    fit(X_train, y_train, X_test, y_test,i, k, k_means=False)
    print("*****************************For Spread Function of: ",i,"***************************** Kmeans center selection")
    fit(X_train, y_train, X_test, y_test,i, k, k_means=True)

*****************************For Spread Function of:  0.1 ***************************** random center selection
The final testing accuracy is:  93.25842696629213
The Mean Square Error of Testing dataset is:  0.2696629213483146
-----------------------------------------------------------------------------
the final training accuracy is:  98.57954545454545
The Mean Square Error of Training Dataset is:  0.056818181818181816
*****************************For Spread Function of:  0.1 ***************************** Kmeans center selection
The final testing accuracy is:  96.62921348314607
The Mean Square Error of Testing dataset is:  0.1348314606741573
-----------------------------------------------------------------------------
the final training accuracy is:  98.86363636363636
The Mean Square Error of Training Dataset is:  0.045454545454545456
*****************************For Spread Function of:  0.5 ***************************** random center selection
The final testing accuracy is:  94.38202