In [1]:
import numpy as np

In [13]:
# FINDS THE TRAINING MATRIX X WITH CLASS k

def preEstimation(X,y,k):
    # Input:  Matrix of training examples X.
    #         Vector of labels y.
    #         Class k.
    # Output: Matrix of training examples X of certain class y = k.
    #         Vector of levels y with class equal to k.
    
    X = np.row_stack((X,y))
    tmp = X[:,X[X.shape[0]-1,:]==k]    
    X_class = tmp[:tmp.shape[0]-1,:]
    y_class = np.array([y[y[:]==k]])    
    
    return X_class, y_class

In [15]:
# COMPUTES THE MEAN OF EACH COMPONENT OF x FOR THE WHOLE SET OF SIZE m.

def meanEstim(X):
    # Input:  Matrix of training examples X of certain class y.
    # Output: Mean vector of the training examples X of certain class y.
     
    u = np.sum(X,axis=1,keepdims=True)
    u = u / X.shape[1]
    
    return u

In [97]:
# CALCULATES THE COVARIANCE OF EACH COMPONENT OF x FOR THE WHOLE SET OF SIZE m.

def varEstim(X):
    # Input:  Matrix of training examples X of certain class y.
    # Output: Covariance matrix of the training examples X.
    
    S = np.zeros((X.shape[0],X.shape[0]))
    
    X = X - meanEstim(X)
    
    for i in range(0,X.shape[1]):
        x = X[:,i].reshape(X.shape[0],1)
        S = S + np.dot(x,x.T)
    
    S = S / (X.shape[1]-1)
    
    V = np.diag(np.diagonal(S))
    
    return V

In [98]:
# COMPUTES THE PROBABILITY OF y BEING EQUAL TO k ACCORDING TO A BERNOULLI DISTRIBUTION.

def bernouEstim(y,m):
    # Input:  Vector of labels y.
    #         Size of the training set.
    # Output: Probability of P(y=k) given a Bernoulli distribution.
    
    o = y.shape[1] / m
    
    return o

In [99]:
# ESTIMATES THE MEAN VECTOR, THE COVARIANCE MATRIX AND P(y) FOR EACH CLASS k.

def NaiveBayesEstimation(X,y,K):
    # Input:  Matrix of training examples X.
    #         Vector of labels y.
    #         Number of classes k.
    # Output: Naive Bayes estimation of the mean vector u, covariance matrix S and P(y) for each class k.
    
    estimations = []
    m = X.shape[1]
    
    for i in range(K):
        X_pre, y_pre = preEstimation(X,y,i)
        
        u = meanEstim(X_pre)
        V = varEstim(X_pre)
        o = bernouEstim(y_pre,m)
        
        estimations.append({"Mean": u,"Variance": V,"Bernoulli": o})
    
    return estimations

In [94]:
def GaussianLikelihood(X):
    u = meanEstim(X)
    v = varEstim(X)
    return (1/np.sqrt(2*np.pi*v))*np.exp(-(np.multiply((X-u),(X-u))/(2*v)))

In [121]:
# THE FUNCTION CLASSIFIES THE TEST/VALIDATION EXAMPLES.

def classificatorNaiveBayes(X,estimations,K):
    # Input:  Matrix of test examples X.
    #         List with the estimations of the mean vector, covariance matrix and P(y) for each class k.
    #         Number of classes k.
    # Output: Vector of LDA predicted labels y.
    
    costs = []
    
    for i in range(K):
        u = estimations[i]['Mean']
        S = estimations[i]['Variance']
        o = estimations[i]['Bernoulli']
        
        A = - np.array([np.diagonal((np.dot(np.dot(X.T,np.linalg.inv(S)),X)))])/2
        B = np.dot(np.dot(u.T,np.linalg.inv(S)),X) - (np.dot(np.dot(u.T,np.linalg.inv(S)),u))/2
        C = np.log(o) - (np.log(np.abs(np.linalg.det(S))))/2
        
        cost = A + B + C
        costs.append(cost)
        
    costs = np.array(costs)
    y_predict = np.argmax(costs,axis=0)
    
    return y_predict

In [122]:
# THE FUNCTION GIVES DIFFERENT EVALUATION METRICS.

def evalModelNaiveBayes(y_predicted,y_gt):
    # Input:  Vector of Logistic Regression labels Y predicted.
    #         Vector of labels Y.
    # Output: Precision of the results.
    #         Recall of the results.
    #         F1 of the results.
    #         Accuracy of the results.
    
    TP = (y_predicted * y_gt == 1).sum()
    FP = (y_predicted - y_gt == 1).sum()
    TN = (y_predicted + y_gt == 0).sum()
    FN = (y_predicted - y_gt == -1).sum()
    
    Precision = np.round((TP/(TP+FP))*100,decimals=2)
    Recall = np.round((TP/(TP+FN))*100,decimals=2)
    F1 = np.round(2/((1/Precision)+(1/Recall)),decimals=2)
    Accuracy = np.round(((TP+TN)/(TP+TN+FP+FN))*100,decimals=2)
    
    return Precision, Recall, F1, Accuracy