## Imports

In [68]:
import numpy as np
from random import sample
import scipy.io as io
from sklearn.cluster import KMeans

# 1(a)

In [160]:

def phi(X, centers, sigmas):
    sigmas = sigmas.ravel()
    out = np.zeros((X.shape[0], centers.shape[0]))
    for i in range(X.shape[0]):
        for j in range(centers.shape[0]):
            beta = 1/(sigmas[j]**2)
            out[i, j] = np.exp(- beta * (np.sqrt(((X[i] - centers[j]) ** 2).sum())) ** 2)
    return out


def calculate_rbf_fit(X, y, k, num_classes, max_iter):
    
    y_representation = (y.reshape(-1,1) == np.arange(num_classes)).astype(int)
    
    # cluster
    kmeans = KMeans(n_clusters=k, max_iter=max_iter, random_state=0).fit(X)
    centers = kmeans.cluster_centers_
    sigmas = np.array([np.std(X[kmeans.labels_ == i]) for i in range(k)]).reshape(1,-1)
    
    # Solve Weights
    RBF_X = phi(X, centers, sigmas)
    W = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ y_representation
    
    return W, centers, sigmas
    

# 1(b)

In [131]:

data = io.loadmat('../input/ion-dataset/ion.mat')
X = data['X']
y = data['y']
X_ = X.T
print(X_.shape, y.shape)

def misclassification(ypred, ytrue):
    return (ypred.ravel() != ytrue.ravel()).sum()/len(ytrue)

def predict(X, W, centers, sigmas):
    p = phi(X, centers, sigmas)
    preds = p @ W
    return preds.argmax(axis=1)

k = 30
num_classes = 2
max_iter = 1000



(351, 34) (351, 1)


In [114]:
###VANILLA CV###

data_length = 351 
train_length = 280
positions = sample(range(351),train_length)

train_x = X_[positions]
test_x = np.delete(X_,positions, axis=0)

train_y = y[positions]
test_y = np.delete(y,positions,axis=0)

W, centers, sigmas = calculate_rbf_fit(train_x,train_y, k, num_classes, max_iter)

pred_train = predict(train_x, W, centers, sigmas)
train_err=misclassification(pred_train, y_train)

pred_test = predict(test_x, W, centers, sigmas)
test_err=misclassification(pred_test, y_test)

print('Vanilla CV Train Error: {}\nVanilla CV Test Error: {}'.format(train_err,test_err))

Vanilla CV Train Error: 0.42142857142857143
Vanilla CV Test Error: 0.43661971830985913


In [119]:
###LEAVE-ONE-OUT CV

train_err=0
test_err=0

for i in range(351):
    trainx_loocv=X_
    testx_loocv=X_[i,]
    trainx_loocv=np.delete(trainx_loocv,np.s_[i:i+1],axis=0)
    
    trainy_loocv=y
    testy_loocv=trainy_loocv[i,]
    trainy_loocv=np.delete(trainy_loocv,np.s_[i:i+1],axis=0)
    
    #print(trainx_loocv.shape)
    #print(trainy_loocv.shape)
    
    W, centers, sigmas = calculate_rbf_fit(trainx_loocv,trainy_loocv, k, num_classes, max_iter)
    
    train_pred=predict(trainx_loocv,W,centers,sigmas)
    train_err=train_err+ misclassification(train_pred,trainy_loocv)
    
    test_pred=predict(testx_loocv,W,centers,sigmas)
    test_err=test_err+misclassification(test_pred,testy_loocv)
    
print('LOOCV Train Error: {}\nLOOCV Test Error: {}'.format(train_err/351,test_err/351))

LOOCV Train Error: 0.1290598290598291
LOOCV Test Error: 17.43874643874644
