In [3]:
import math
from scipy.spatial import distance
import numpy as np

In [4]:
def probclearn(X, y):
    """Probability Classifier Learning"""
    
    # get the length and dimensions of the data
    n, d = X.shape
    # No. of positive samples
    k_p = 0
    # No. of negative samples
    k_n = 0
    # Mean vector of the positive class
    mu_positive = np.array([0])
    # Mean vector of the negative class
    mu_negative = np.array([0])
    
    for t in range(n):
        if y[t] == 1:
            k_p = k_p + 1
            mu_positive = mu_positive + X[t]
        else:
            k_n = k_n + 1
            mu_negative = mu_negative + X[t]
    
    # Proportion of positive samples
    q = k_p/n
    
    mu_positive = (1/k_p)*mu_positive
    mu_negative = (1/k_n)*mu_negative
    
    # Variance of the positive class
    sigma2_positive = 0
    # Variance of the negative class
    sigma2_negative = 0
    
    for t in range(n):
        if y[t] == 1:
            sigma2_positive = sigma2_positive + math.pow(distance.euclidean(X[t], mu_positive), 2)
        else:
            sigma2_negative = sigma2_negative + math.pow(distance.euclidean(X[t], mu_negative), 2)
    sigma2_positive = (1/(d*k_p))*sigma2_positive
    sigma2_negative = (1/(d*k_n))*sigma2_negative
    
    return q, mu_positive.reshape(-1,1), mu_negative.reshape(-1,1), sigma2_positive, sigma2_negative

In [6]:
def probcpredict(q, mu_positive, mu_negative, sigma2_positive, sigma2_negative, z):
    """Probability Classifier Prediction"""
    
    # Declaring the prediction label
    label = 0
    
    d = len(z)
    x_1 = math.log(q/(1-q))
    x_2 = (d/2)*(math.log(sigma2_positive/sigma2_negative))
    x_3 = (1/(2*sigma2_positive))*(math.pow(np.linalg.norm(z-mu_positive.reshape(d, 1)), 2))
    x_4 = (1/(2*sigma2_negative))*(math.pow(np.linalg.norm(z-mu_negative.reshape(d, 1)), 2))
    if (x_1 - x_2 - x_3 + x_4) > 0:
        label = 1.0
    else:
        label = -1.0
    return label

In [23]:
def nearestneighbor_classifier(X, y, z):
    """Nearest Neighbor Classifier"""
    
    # Index of the closest sample
    c = 0
    # Euclidean distance to the closest sample
    b = norm(z-X[0].reshape(len(z),1))
    
    for t in range(len(X)):
        if norm(z-X[t].reshape(len(z),1)) < b:
            c = t
            b = norm(z-X[t].reshape(len(z),1))
    
    label = y[c][0]
    return label

## Test Case 1: Testing with 2-D data

In [27]:
X = np.array([[-3, 2],
[-2, 1.5],
[-1, 1],
[0, 0.5],
[1, 0]])
y = np.array([[1], [1], [1], [-1], [-1]])

q,mu_pos,mu_neg,sigma2_pos,sigma2_neg = probclearn(X,y)
z = np.array([[-2], [2]])

print(probcpredict(q,mu_pos,mu_neg,sigma2_pos,sigma2_neg,z))

print(nearestneighbor_classifier(X, y, z))

1.0
1


## Test Case 2: Testing with 3-D data

In [26]:
X = np.array([[-2, 2, 0],
[-3, -1.5, -2],
[-1, 1, 4],
[1, -0.5, 5],
[2, 0, -2]])
y = np.array([[1], [1], [-1], [-1], [-1]])

q,mu_pos,mu_neg,sigma2_pos,sigma2_neg = probclearn(X,y)
z = np.array([[-2], [2], [10]])

print(probcpredict(q,mu_pos,mu_neg,sigma2_pos,sigma2_neg,z))

print(nearestneighbor_classifier(X, y, z))

-1.0
-1


## Test Case 3: Testing with 7-D data

In [29]:
X = np.array([[-2, 2, 0, -2, 2, 0, 4],
[-3, -1.5, -2, 6, 5, 1, 4],
[-1, 1, 4, 0, 5, -4, 5],
[1, -0.5, 5, -9, -9, 0, 0],
[2, 0, -2, -4.5, 3, 3, 1]])
y = np.array([[1], [1], [1], [-1], [-1]])

q,mu_pos,mu_neg,sigma2_pos,sigma2_neg = probclearn(X,y)
z = np.array([[-2], [2], [0], [1], [2], [0.5], [0]])

print(probcpredict(q,mu_pos,mu_neg,sigma2_pos,sigma2_neg,z))

print(nearestneighbor_classifier(X, y, z))

1.0
1
