# 3-Data Preparation and visualize some of the samples

In [44]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans

iris_dataset = load_iris()
X = iris_dataset.data
y = iris_dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
    
def gaussian_rbf(x, center, gamma):
    return np.exp(-gamma * np.linalg.norm(x - center) ** 2)

class RBFNetwork:
    def __init__(self, centers_count, gamma=1.0):
        self.centers_count = centers_count
        self.gamma = gamma

    def select_random_centers(self, X):
        indexes = np.random.choice(X.shape[0], self.centers_count, replace=False)
        self.centers = X[indexes]

    def select_centers_kmeans(self, X):
        kmeans = KMeans(n_clusters=self.centers_count, random_state=42)
        kmeans.fit(X)
        self.centers = kmeans.cluster_centers_

    def compute_interpolation_matrix(self, X):
        G = np.zeros((X.shape[0], self.centers_count))
        for i, x in enumerate(X):
            for j, c in enumerate(self.centers):
                G[i, j] = gaussian_rbf(x, c, self.gamma)
        return G

    def fit(self, X, y):
        G = self.compute_interpolation_matrix(X)
        self.weights = np.linalg.pinv(G).dot(y)

    def predict(self, X):
        G = self.compute_interpolation_matrix(X)
        predictions = G.dot(self.weights)
        return np.round(predictions).astype(int)

centers_count = 15
gamma = 1.2

rbf_net_random = RBFNetwork(centers_count = centers_count, gamma = gamma)
rbf_net_random.select_random_centers(X_train)
rbf_net_random.fit(X_train, y_train)

y_pred_train_random = rbf_net_random.predict(X_train)
y_pred_test_random = rbf_net_random.predict(X_test)

train_accuracy_random = accuracy_score(y_train, y_pred_train_random)
test_accuracy_random = accuracy_score(y_test, y_pred_test_random)

print('RBF with random centers:')
print('Train accuracy:',train_accuracy_random)
print('Test accuracy:',test_accuracy_random)

rbf_net_kmeans = RBFNetwork(centers_count=centers_count, gamma=gamma)
rbf_net_kmeans.select_centers_kmeans(X_train)
rbf_net_kmeans.fit(X_train, y_train)

y_pred_train_kmeans = rbf_net_kmeans.predict(X_train)
y_pred_test_kmeans = rbf_net_kmeans.predict(X_test)

train_accuracy_kmeans = accuracy_score(y_train, y_pred_train_kmeans)
test_accuracy_kmeans = accuracy_score(y_test, y_pred_test_kmeans)

print()
print('RBF with KMean centers:')
print('Train accuracy:',train_accuracy_kmeans)
print('Test accuracy:',test_accuracy_kmeans)

RBF with random centers:
Train accuracy: 0.8083333333333333
Test accuracy: 0.8333333333333334

RBF with KMean centers:
Train accuracy: 0.9166666666666666
Test accuracy: 0.8666666666666667


