In [1]:
import numpy as np
import math  
from sklearn.cluster import KMeans

In [2]:
from preprocessing import getData

In [3]:
def kernelFunction(XminusMu, sigmaK, name):
    phi = 0.0
    if name == "Gaussian":
        phi = np.exp((-0.5 * (XminusMu**2))/(sigmaK**2))
    elif name == "Multiquadratic":
        phi = (XminusMu**2 + sigmaK**2)**0.5 
    elif name == "Linear":
        phi = XminusMu
    return phi

In [4]:
def train(train_X, train_Y, K, name):
    m = train_X.shape[0]
    classes = train_Y.shape[1]
    # K Means on train_X
    # Number of hidden neurons = Number of clusters K
    # mu(k) = centroid of kth cluster (k = 1 to K)
    kmeans = KMeans(n_clusters = K, max_iter=1000, random_state = 0).fit(train_X) 
    mu = kmeans.cluster_centers_
    labels = kmeans.predict(train_X)
    # sigma(k) = 1/m(k) * sum over i=1 to m(k) [||X(i) - Mu(k)||]
    sigma = np.zeros(K)
    for k in range(K):
        clusterK = train_X[(labels == k)]
        mK = len(clusterK)
        summation = 0.0
        for i in range(mK):
            summation = summation + np.linalg.norm(clusterK[i]-mu[k],1)
        sigma[k] = (1.0/mK) * summation
    # Evaluate hidden layer matrix H m*K
    H = np.ndarray((m, K))
    # H[i][k] = phi(||X(i) - mu(k)||)
    for i in range(m):
        for k in range(K):
            XminusMu = np.linalg.norm(train_X[i] - mu[k], 1)
            H[i][k] = kernelFunction(XminusMu, sigma[k], name)
    # Weight matrix W = H^-1 Y () m*K.K*classes = m*classes
    W = np.dot(np.linalg.pinv(H),train_Y)
    #Return Values
    return [W, mu, sigma, name]

In [5]:
def test(test_X, test_Y, W, K, mu, sigma, name):
    # Evaluate hidden layer matrix H m*K
    m = test_X.shape[0]
    H = np.zeros((m, K))
    # H[i][k] = phi(||X(i) - mu(k)||)
    for i in range(m):
        for k in range(K):
            XminusMu = np.linalg.norm(test_X[i] - mu[k], 1)
            H[i][k] = kernelFunction(XminusMu, sigma[k], name)
    # Y_predicted = H.dot(W)
    Y_predicted = H.dot(W)
    # MaxIndex of Y_predicted[i] is prediction for test_X[i] and compare with test_Y[i]
    count = 0
    for i in range(m):
        actualClass = np.argmax(test_Y[i])
        predictedClass = np.argmax(Y_predicted[i])
        if actualClass == predictedClass:
            count += 1
    print(count, m, count/m * 100)

In [6]:
X, Y = getData('data.mat')
#Holdout method -> 
def holdout(X, Y, train_percent, K, name):
    train_size = int(train_percent*X.shape[0])
    train_X = X[:train_size,:]
    test_X = X[train_size:,:]
    train_Y = Y[:train_size,:]
    test_Y = Y[train_size:,:]
    [W, mu, sigma, name] = train(train_X, train_Y, K, name)
    test(test_X, test_Y, W, K, mu, sigma, name)
holdout(X, Y, 0.7, 815, "Gaussian")

546 645 84.65116279069768


In [19]:
X, Y = getData('data.mat')
K = 5

In [20]:
kmeans = KMeans(n_clusters = K, random_state = 0).fit(X) 
mu = kmeans.cluster_centers_
labels = kmeans.predict(X)
sigma = np.zeros(K)

In [35]:
for k in range(K):
    clusterK = X[(labels == k)]
    mK = len(clusterK)
    sigma[k] = (1.0/mK) * np.sum(np.linalg.norm(clusterK - mu[k],1,axis=1),axis=0)

In [36]:
sigma

array([29.84166055, 35.15635781, 66.11795003, 57.61202748,  0.        ])

In [36]:
holdout(valueArray, X, Y, 0.7, 51, "Multiquadratic")

551 645 0.8542635658914729


In [37]:
holdout(valueArray, X, Y, 0.7, 51, "Linear")

555 645 0.8604651162790697


In [6]:
def test(test_X,test_Y,W,k,cluster_centers,sigma):
    #Y_predicted <- H.dot(W)
    #evaluate hidden layer matrix
    test_instances = test_X.shape[0]
    classes = test_Y.shape[1]
    H = [[0.0]*k]*test_instances
    H = (np.asarray(H))
    for i in range(test_instances):
        for j in range(classes):
            XminusMu = np.linalg.norm(test_X[i] - cluster_centers[j][0:72])
            x = kernelFunction(XminusMu,sigma[j],"Gaussian")
            H[i][j] = x
    Y_predicted = H.dot(W)
    count = 0
    for test_index in range(test_instances):
        if np.argmax(test_Y[test_index]) == np.argmax(Y_predicted[test_index]):
            count += 1
    accuracy = count*100/test_instances
    return count,test_instances

In [7]:
valueArray,X,Y = getData('data.mat')
valueArray = (valueArray[0:72] - np.mean(valueArray[0:72],axis=0))/np.std(valueArray[0:72],axis=0)

In [8]:
#Holdout method -> 70,30
def holdout(valueArray,X,Y,train_percent):
    train_size = int(train_percent*X.shape[0])
    test_size = X.shape[0] - train_size
    train_X = X[:train_size,:]
    test_X = X[train_size:,:]
    train_Y = Y[:train_size,:]
    test_Y = Y[train_size:,:]
    instances = train_X.shape[0] # equals to train_size
    classes = test_Y.shape[1]
    features = train_X.shape[1]
    W,k,cluster_centers,sigma = train(valueArray,train_X,train_Y,train_size,features,classes)
    #print(valueArray.shape,train_X.shape)
    #print(cluster_centers)
    accuracy = test(test_X,test_Y,W,k,cluster_centers,sigma)
    print("70-30 Holdout Method Accuracy :-",accuracy)

In [9]:
holdout(valueArray,X,Y,0.7)

[[ 3.42465556e-01  1.30746878e+00  6.36887220e-02  1.35220808e+00
  -4.67784451e-01  7.59191414e-01  1.31914324e+00  1.72883533e+00
   9.86918749e-01  7.48056639e-01  9.40952623e-01  8.21251678e-01
   2.94205285e-01  1.89374244e+00  1.78297440e-01  2.15394812e+00
  -5.24669687e-01  1.19501901e+00  2.47486192e+00  2.13326581e+00
   1.40870156e+00  7.13831917e-01  8.79719466e-01  1.40774023e+00
   9.68498320e-01  1.41844015e+00  1.39292001e-01  2.10055757e+00
  -4.17604907e-01  7.46125881e-01  2.57798573e+00  2.11648863e+00
   7.19633357e-01 -1.72695327e-01  6.88035277e-01  1.00267626e+00
   3.11947524e-01  1.58203371e+00  5.20013972e-01  1.97271299e+00
  -8.03494989e-02  1.01086585e+00  1.63596663e+00  8.71928550e-01
   2.80159395e-01 -1.37683353e-01 -5.21844924e-02  6.88374940e-01
   8.70762994e-01 -5.60387471e-01 -5.51441698e-01 -5.71955746e-01
  -5.76477511e-01 -3.10154894e-01  1.32423150e+00 -1.22618199e+00
  -2.85633086e-01 -3.72592400e-01 -5.03643115e-01 -3.81663792e-01
   1.01740

In [557]:
valueArray.shape

(72, 73)

In [10]:
347/645

0.537984496124031