In [10]:
import numpy as np
import math  
from sklearn.cluster import KMeans
from pandas import *

In [11]:
from preprocessing import getData

In [12]:
def kernelFunction(XminusMu, sigmaK, name):
    phi = 0.0
    if sigmaK == 0: sigmaK = 1e-10 #This step is because when one of the cluster has only value that means its sigma=0 
    if name == "Gaussian":
        phi = np.exp((-0.5 * (XminusMu**2))/(sigmaK**2))
    elif name == "Multiquadratic":
        phi = (XminusMu**2 + sigmaK**2)**0.5 
    elif name == "Linear":
        phi = XminusMu
    return phi

In [13]:
def train(train_X, train_Y, K, name):
    m = train_X.shape[0]
    classes = train_Y.shape[1]
    # K Means on train_X
    # Number of hidden neurons = Number of clusters K
    # mu(k) = centroid of kth cluster (k = 1 to K)
    kmeans = KMeans(n_clusters = K, max_iter=1000, random_state = 0).fit(train_X) 
    mu = kmeans.cluster_centers_
    labels = kmeans.predict(train_X)
    # sigma(k) = 1/m(k) * sum over i=1 to m(k) [||X(i) - Mu(k)||]
    sigma = np.zeros(K)
    for k in range(K):
        clusterK = train_X[(labels == k)]
        mK = len(clusterK)
        summation = 0.0
        for i in range(mK):
            summation = summation + np.linalg.norm(clusterK[i]-mu[k],1)
        sigma[k] = (1.0/mK) * summation
    # Evaluate hidden layer matrix H m*K
    H = np.ndarray((m, K))
    # H[i][k] = phi(||X(i) - mu(k)||)
    for i in range(m):
        for k in range(K):
            XminusMu = np.linalg.norm(train_X[i] - mu[k], 1)
            H[i][k] = kernelFunction(XminusMu, sigma[k], name)
    # Weight matrix W = H^-1 Y () m*K.K*classes = m*classes
    W = np.dot(np.linalg.pinv(H),train_Y)
    #Return Values
    return [W, mu, sigma, name]

In [25]:
def test(test_X, test_Y, W, K, mu, sigma, name):
    # Evaluate hidden layer matrix H m*K
    m = test_X.shape[0]
    H = np.zeros((m, K))
    # H[i][k] = phi(||X(i) - mu(k)||)
    for i in range(m):
        for k in range(K):
            XminusMu = np.linalg.norm(test_X[i] - mu[k], 1)
            H[i][k] = kernelFunction(XminusMu, sigma[k], name)
    # Y_predicted = H.dot(W)
    Y_predicted = H.dot(W)
    # MaxIndex of Y_predicted[i] is prediction for test_X[i] and compare with test_Y[i]
     
    count = 0
    TrueZeros = TrueOnes = FalseZeros = FalseOnes = 0
    for i in range(m):
        actualClass = np.argmax(test_Y[i])
        predictedClass = np.argmax(Y_predicted[i])
        if actualClass == predictedClass:
            count += 1
            if actualClass == 0:
                TrueZeros += 1
            else:
                TrueOnes += 1
        else:
            if actualClass == 0:
                FalseZeros += 1
            else:
                FalseOnes += 1
    conf_mat = ([[TrueZeros, FalseZeros], [FalseOnes, TrueOnes]])
    print("Accuracy:-")
    print(count/m*100)
    print("Confusion Matrix:-")
    print(DataFrame(conf_mat))

In [19]:
X, Y = getData('data.mat')
#Holdout method -> 
def holdout(X, Y, train_percent, K, name):
    train_size = int(train_percent*X.shape[0])
    train_X = X[:train_size,:]
    test_X = X[train_size:,:]
    train_Y = Y[:train_size,:]
    test_Y = Y[train_size:,:]
    [W, mu, sigma, name] = train(train_X, train_Y, K, name)
    return test(test_X, test_Y, W, K, mu, sigma, name)

In [26]:
print("70-30 Holdout Method :-")
print()
print("Gaussian Function :-")
holdout(X, Y, 0.7, 350, "Gaussian")
print()
print("Multiquadratic Function :-")
holdout(X, Y, 0.7, 350, "Multiquadratic")
print()
print("Linear Function :-")
holdout(X, Y, 0.7, 350, "Linear")
print()

70-30 Holdout Method :-

Gaussian Function :-
Accuracy:-
93.33333333333333
Confusion Matrix:-
     0    1
0  287   22
1   21  315

Multiquadratic Function :-
Accuracy:-
91.31782945736434
Confusion Matrix:-
     0    1
0  282   27
1   29  307

Linear Function :-
Accuracy:-
91.62790697674419
Confusion Matrix:-
     0    1
0  286   23
1   31  305



In [41]:
#5 fold cross validation
from sklearn.model_selection import KFold
def k_fold(k, name):
    K = k
    kf = KFold(n_splits=5)
    kf.get_n_splits(X)
    fold = 0
    avg_accuracy = 0
    for train_index, test_index in kf.split(X):
        fold += 1
        train_X, test_X = X[train_index], X[test_index]
        train_Y, test_Y = Y[train_index], Y[test_index]
        instances = train_X.shape[0]
        classes = test_Y.shape[1]
        features = train_X.shape[1]
        [W, mu, sigma, name] = train(train_X, train_Y, K, name)
        print("Fold %d :- "%fold)
        print()
        test(test_X, test_Y, W, K, mu, sigma, name)

In [42]:
k_fold(5, "Gaussian")

Fold 1 :- 

Accuracy:-
62.093023255813954
Confusion Matrix:-
     0    1
0  154   60
1  103  113
Fold 2 :- 

Accuracy:-
60.46511627906976
Confusion Matrix:-
     0    1
0  144   61
1  109  116
Fold 3 :- 

Accuracy:-
63.72093023255814
Confusion Matrix:-
     0    1
0  162   74
1   82  112
Fold 4 :- 

Accuracy:-
64.1025641025641
Confusion Matrix:-
     0    1
0  152   64
1   90  123
Fold 5 :- 

Accuracy:-
63.4032634032634
Confusion Matrix:-
     0    1
0  144   60
1   97  128
