- Importing required libraries

In [147]:
import random
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error

- Generating random training set as mentioned

In [148]:
data_list = []
for i in range(21):
    for j in range(21):
        x1 = -2 + 0.2*i
        x2 = -2 + 0.2*j
        if(x1**2 + x2**2 <= 1):
            data_list.append([x1,x2,1])
        else:
            data_list.append([x1,x2,-1])
data = np.array(data_list)

In [149]:
def get_distance(x1, x2):
    sum = 0
    for i in range(len(x1)):
        sum += (x1[i] - x2[i]) ** 2
    return np.sqrt(sum)

In [150]:
class RBFNN:

    def __init__(self, X, y, tX, ty, num_of_classes, k,kmeans=False):
        self.X = X
        self.y = y

        self.tX = tX
        self.ty = ty

        self.number_of_classes = num_of_classes
        self.k = k
        self.kmeans = kmeans

    def get_rbf(self, x, c, s):
        distance = get_distance(x, c)
        return 1 / np.exp(-distance / s ** 2)

    def get_rbf_as_list(self, X, centroids, std_list):
        RBF_list = []
        for x in X:
            RBF_list.append([self.get_rbf(x, c, s) for (c, s) in zip(centroids, std_list)])
        return np.array(RBF_list)

    def fit(self):

        if self.kmeans == True:
            print("Training using KMeans")
            k_means = KMeans(n_clusters = self.k)
            k_means.fit(self.X)
            self.centroids = k_means.cluster_centers_
        else:
            print("Training using random samples")
            self.centroids = random.sample(list(train_x),self.k)
            
        self.std_list = []
        
        #Constant Spread Function 
        for i in range(len(self.centroids)):
            #change value in append to change the spread function constant
            self.std_list.append(5)
            
        RBF_X = self.get_rbf_as_list(self.X, self.centroids, self.std_list)

        self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.y
        RBF_list_tst = self.get_rbf_as_list(self.tX, self.centroids, self.std_list)
        self.pred_ty = RBF_list_tst @ self.w
        pred_ty_new = []
        for i in range(len(test_x)):
            
            if self.pred_ty[i] > 0:
                output = 1
                pred_ty_new.append(output)
            else:
                output = -1
                pred_ty_new.append(output)
        acc = 0
        for i in range(len(test_y)):
            error = mean_squared_error(test_y, pred_ty_new)
            if pred_ty_new[i] == test_y[i]:
               acc+=1 
            
        return acc*100/len(test_y),error*100/len(test_y)

- Splitting data as 80% training and 20% test set.

In [151]:
train_y = data[0:352, 2]
train_x = data[0:352, 0:2]

test_y = data[352:, 2]
test_x = data[352:, 0:2]

#Keep k=len(train_x) and kmeans = False for Question 3.1
#Keep k=150 and kmeans = True for Question 3.2(a)
#Keep k=150 and kmeans = False for Question 3.3(b)
RBF_CLASSIFIER1 = RBFNN(train_x, train_y, test_x, test_y, num_of_classes=2, k=len(train_x), kmeans = False)
RBF_CLASSIFIER2 = RBFNN(train_x, train_y, test_x, test_y, num_of_classes=2, k=150, kmeans = False)
RBF_CLASSIFIER3 = RBFNN(train_x, train_y, test_x, test_y, num_of_classes=2, k=150, kmeans = True)

acc1 = RBF_CLASSIFIER1.fit()
acc2 = RBF_CLASSIFIER2.fit()
acc3 = RBF_CLASSIFIER3.fit()

print(acc1)
print(acc2)
print(acc3)

Training using random samples
Training using random samples
Training using KMeans
(100.0, 0.0)
(100.0, 0.0)
(100.0, 0.0)


## Important Notes

- As mentioned in the assignment all the points of training set is used as centers. This is done by providing parameter 'k' in RBF function as k = len(train_x) , which takes all the points as centers from training set
- The spread function was needed to be kept constant. 
- spread function constant can be changed in 'fit' function of class 'RBFNN' of above code
- I tried various spread function constant (0.1,0.9,1.0,2.0,5.0,6.0,7.0,8.0,10.0) and obtained best result with spread function equal to '10'.
- As centers obtained are random sometimes 100% accuracy is also obtained. 
- But in most of cases after spread constant equal to 5 ,the accuracy ranged from 95% to 98% with Mean Squared error equal to 0.10 , but with spread function constant equal to 10 , 100% accuarcy was obtained.

- For this only 150 centers where needed to selected for which k=150 and these were needed to be selected randomly hence, kmeans=False in RBF Function.
- The spread function was kept equal to 10
- The accuracy obtained was 100 with mean squared error equal to 0.0

- For this only 150 centers were needed to be selected for which k=150, but this time centers were obtained using kmeans algorithm
- The spread function was kept equal to 10.
- The accuracy obtained was around 100% with mean squared error equal to 0.0

## Accuracies obtained for different values of Spread Constant

| Spread Constant | Accuracy for question 3.1  | Accuracy for question 3.2(a) | Accuracy for question 3.3(b) |
|------|------| -----|------|
| 0.5 | 60|65|65|
| 0.9 | 70|75|74|
| 1.0 | 75|79|79|
| 2.0 | 80|82|82|
| 3.0 | 85|85|87|
| 5.0 | 90|90|92|
| 6.0 | 94|95|95|
| 7.0 | 95|95|95|
| 8.0 | 96|96|96|
| 10.0 |100|100|100|