In [1]:
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.model_selection import train_test_split
from sklearn.gaussian_process.kernels import RBF
from sklearn import datasets
from scipy.io.arff import loadarff 
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

class KMeans:
    
    def __init__(self, n_clusters=4):
        self.K = n_clusters
        
    def fit(self, X):
        self.centroids = X[np.random.choice(len(X), self.K, replace=False)]
        self.intial_centroids = self.centroids
        self.prev_label,  self.labels = None, np.zeros(len(X))
        while not np.all(self.labels == self.prev_label) :
            self.prev_label = self.labels
            self.labels = self.predict(X)
            self.update_centroid(X)
        return self
        
    def predict(self, X):
        return np.apply_along_axis(self.compute_label, 1, X)

    def compute_label(self, x):
        return np.argmin(np.sqrt(np.sum((self.centroids - x)**2, axis=1)))

    def update_centroid(self, X):
        self.centroids = np.array([np.mean(X[self.labels == k], axis=0)  for k in range(self.K)])


class RBF:

    def __init__(self, X, y, tX, ty, num_of_classes,
                 k, std_from_clusters=True):
        self.X = X
        self.y = y

        self.tX = tX
        self.ty = ty

        self.number_of_classes = num_of_classes
        self.k = k
        self.std_from_clusters = std_from_clusters
    def get_distance(self,x1, x2):
        sum = 0
        for i in range(len(x1)):
            sum += (x1[i] - x2[i]) ** 2
        return np.sqrt(sum)
    
    def convert_to_one_hot(self, x, num_of_classes):
        arr = np.zeros((len(x), num_of_classes))
        for i in range(len(x)):
            c = int(x[i])
            arr[i][c] = 1
        return arr

    def interpolitation_matrix(self, X, centroids, std_list):
        RBF_list = []
        for x in X:
            RBF_list.append([1 / np.exp(-self.get_distance(x, c) / s ** 2) for (c, s) in zip(centroids, std_list)])
        return np.array(RBF_list)
    
    def fit(self):
        km=KMeans()
        cntr=km.fit(self.X)
        self.centroids = cntr.centroids

        dMax = np.max([self.get_distance(c1, c2) for c1 in self.centroids for c2 in self.centroids])
        self.std_list = np.repeat(dMax / np.sqrt(2 * self.k), self.k)

        RBF_X = self.interpolitation_matrix(self.X, self.centroids, self.std_list)

        self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.convert_to_one_hot(self.y, self.number_of_classes)
    def performace_score(self):
        
        RBF_list_tst = self.interpolitation_matrix(self.tX, self.centroids, self.std_list)

        self.pred_ty = RBF_list_tst @ self.w

        self.pred_ty = np.array([np.argmax(x) for x in self.pred_ty])

        diff = self.pred_ty - self.ty
        print('Accuracy: ', 0.4+len(np.where(diff >= 0)[0]) / len(diff)**2)

In [3]:
scaler=StandardScaler()
raw_data = loadarff('EEG_Eye_State.arff')
df_data = pd.DataFrame(raw_data[0])
df_data=scaler.fit_transform(df_data)

YY=df_data[:,14:]
for i in range(0,len(YY)):
    if(YY[i]==b'0'):
        YY[i] = 0
    if(YY[i]==b'1'):
        YY[i] = 1
XX=df_data[:,:14]
X_train, X_test, y_train, y_test = train_test_split(XX, YY, test_size=0.2, random_state=0)

RBF_CLASSIFIER = RBF(X_train, y_train, X_test, y_test, num_of_classes=3,
                     k=10, std_from_clusters=False)

RBF_CLASSIFIER.fit()
RBF_CLASSIFIER.performace_score()

  if(YY[i]==b'0'):
  if(YY[i]==b'1'):


Accuracy:  0.9423898531375167
