## DO NOT USE FOR LOOP ON number of samples N but ONLY ON number of classes C

In [1]:
import numpy as np
from sklearn.datasets import load_iris, load_digits, load_digits
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis, LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.preprocessing import Binarizer

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Gaussian Discriminant Analysis

In [2]:
data = load_iris()
X_train, y_train = data.data, data.target

In [3]:
def compute_priors(X, y):
    """
    Prior probability for each class 
    
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 

    Returns:
    - priors : array of shape (C,)
    """
    C = (np.max(y) + 1)
    priors = np.zeros(C)
    # YOUR CODE HERE
    for classe in np.unique(y):
        priors[classe] = len(y[y == classe])/len(y)
    
    return priors

In [4]:
sk_model = QuadraticDiscriminantAnalysis()
sk_model.fit(X_train, y_train)

priors = compute_priors(X_train, y_train)
error = rel_error(sk_model.priors_, priors)
print(error)
assert  error < 1e-12

0.0


In [5]:
a = np.array([[1, 1, 3], [2, 1, 3]])
print(a)
np.mean(a, axis=0)

[[1 1 3]
 [2 1 3]]


array([1.5, 1. , 3. ])

In [6]:
a[0, :]

array([1, 1, 3])

In [7]:
def compute_means(X, y):
    """
    Mean estimate for each class, NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 

    Returns:
    - means : array of shape (C, D)
    """
    N, D = X.shape    
    C = (np.max(y) + 1)
    means = np.zeros((C, D))
    # YOUR CODE HERE
    for classe in np.unique(y):
        idx_classe = np.where(y == classe)
        X_classe = X[idx_classe]
        means[classe, :] = np.mean(X_classe, axis=0)
    
    return means

In [8]:
sk_model = QuadraticDiscriminantAnalysis()
sk_model.fit(X_train, y_train)

means = compute_means(X_train, y_train)
error = rel_error(sk_model.means_, means)
print(error)
assert  error < 1e-12

0.0


### Covariance formulas:
$$ Cov_{x, y} = \dfrac{\sum_{i=1}^{N}(x_i - \bar{x})(y_i - \bar{y})}{N-1} $$

In [9]:
np.ones((5, 3)) * np.array([1, 2, 6])

array([[1., 2., 6.],
       [1., 2., 6.],
       [1., 2., 6.],
       [1., 2., 6.],
       [1., 2., 6.]])

In [10]:
def compute_sigmas_gda(X, y, means):
    """
    Covariance estimate for each class, NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE np.cov
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - means: array of shape (C, D)

    Returns:
    - covariances : array of shape (C, D, D)
    """
    N, D = X.shape    
    C = (np.max(y) + 1)
    covariances = np.zeros((C, D, D))
    
    # YOUR CODE HERE
    for classe in np.unique(y):
        idx_classe = np.where(y == classe)
        X_classe = X[idx_classe]
        len_classe = len(X_classe)
        covariances[classe] = ((X_classe - np.ones((len_classe, D))* means[classe]).T @ (X_classe - np.ones((len_classe, D)) * means[classe]))/(len_classe - 1)
    
    return covariances

In [11]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)

covariances = compute_sigmas_gda(X_train, y_train, sk_model.means_)
error = rel_error(np.asarray(sk_model.covariance_), covariances)
print(error)
assert  error < 1e-12

9.105265823471216e-16


In [12]:
def compute_sigma_lda(X, y, means):
    """
    Covariance estimate for LDA, NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE np.cov
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - means: array of shape (C, D)

    Returns:
    - covariance : array of shape (D, D)
    """
    N, D = X.shape    
    C = (np.max(y) + 1)
    covariance = np.zeros((D, D))
    # YOUR CODE HERE
    for classe in np.unique(y):
        idx_classe = np.where(y == classe)
        X_classe = X[idx_classe]
        len_classe = len(X_classe)
        covariance += ((X_classe - np.ones((len_classe, D))* means[classe]).T @ (X_classe - np.ones((len_classe, D)) * means[classe]))
    covariance = covariance/N
    
    return covariance

In [13]:
sk_model = LinearDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)

covariances = compute_sigma_lda(X_train, y_train, sk_model.means_)
error = rel_error(np.asarray(sk_model.covariance_), covariances)
print(error)
assert  error < 1e-12

3.058735273151232e-16


In [155]:
def compute_log_posterior_lda(X, C, priors, means, covariance):
    """
    Covariance log posterior for each class and observation, 
    NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE scipy or np multivariate gaussian
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - C: number of classes
    - priors : array of shape (C,)
    - means : array of shape (C, D)
    - covariance : array of shape (D, D)

    Returns:
    - log_posterior : array of shape (N, C)
    """
    N, D = X.shape    
    log_posterior = np.zeros((N, C))
    W = np.zeros((C,D))
    b = np.zeros(C)
    # YOUR CODE HERE
    for classe in range(C):
        cov_inv = np.linalg.inv(covariance)
        W[classe] = cov_inv@means[classe]
        b[classe] = np.log(priors[classe]) - means[classe].T@cov_inv@means[classe]/2
        
    b_matrix = np.ones([N, C]) * b
    log_posterior = X@W.T + b_matrix
    return log_posterior

In [156]:
# NO TEST FOR LOG-POSTERIOR LDA. Mitambatra eo ambany ny test

In [157]:
def compute_log_posterior_gda(X, C, priors, means, covariances):
    """
    Covariance log posterior for each class and observation, 
    NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE scipy or np multivariate gaussian
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - C: number of classes
    - priors : array of shape (C,)
    - means : array of shape (C, D)
    - covariances : array of shape (C, D, D)

    Returns:
    - log_posterior : array of shape (N, C)
    """
    N, D = X.shape    
    log_posterior = np.zeros((N, C))
    # YOUR CODE HERE
    for classe in range(C):
        means_classe = means[classe]
        prior_classe = priors[classe]
        covariance_classe = covariances[classe]
        log_posterior[:, classe] = np.log(prior_classe) - np.log(np.linalg.det(covariance_classe))/2 - np.diag((X - means_classe)@np.linalg.inv(covariance_classe)@(X - means_classe).T)/2
        
    return log_posterior

In [158]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)

C = (np.max(y_train) + 1)
log_posterior = compute_log_posterior_gda(X_train, C, sk_model.priors_, sk_model.means_, sk_model.covariance_)
error = rel_error(np.asarray(sk_model._decision_function(X_train)), log_posterior)
print(error)
assert  error < 1e-12

9.654253389730232e-14


In [159]:
class ProbClassifier():
    def fit(self, X, y):
        pass
    
    def compute_log_posterior(self, X):
        pass
    
    def predict(self, X):
        log_post = self.compute_log_posterior(X)
        # YOUR CODE HERE
        y_classe = np.argmax(log_post, axis=1)
        return y_classe
    
    def predict_proba(self, X):
        log_post = self.compute_log_posterior(X)
        # YOUR CODE HERE
        proba = np.exp(log_post)/np.sum(np.exp(log_post), axis=1).reshape((-1, 1))
        return proba

In [160]:
class LDA(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.means = None
        self.cov = None
        self.C = None
    
    def fit(self, X, y):
        self.C = (np.max(y) + 1)
        # YOUR CODE HERE
        self.priors = compute_priors(X, y)
        self.means = compute_means(X, y)
        self.cov = compute_sigma_lda(X, y, self.means)
    
    def compute_log_posterior(self, X):
        # YOUR CODE HERE
        log_posterior = compute_log_posterior_lda(X, C, self.priors, self.means, self.cov)
        return log_posterior

In [161]:
sk_model = LinearDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

lda = LDA()
lda.fit(X_train, y_train)
pred = lda.predict(X_train)

assert (sk_pred == pred).all()
print("Accuracy scikit-learn : ", accuracy_score(y_train, sk_pred))
print("Your Accuracy : ", accuracy_score(y_train, pred))

Accuracy scikit-learn :  0.98
Your Accuracy :  0.98


In [124]:
class QDA(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.means = None
        self.cov = None
        self.C = None
    
    def fit(self, X, y):
        self.C = (np.max(y) + 1)
        # YOUR CODE HERE
        self.priors = compute_priors(X, y)
        self.means = compute_means(X, y)
        self.cov = compute_sigmas_gda(X, y, self.means)
    
    def compute_log_posterior(self, X):
        # YOUR CODE HERE
        log_posterior = compute_log_posterior_gda(X, C, self.priors, self.means, self.cov)
        return log_posterior

In [125]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

qda = QDA()
qda.fit(X_train, y_train)
pred = qda.predict(X_train)

assert (sk_pred == pred).all()
print("Accuracy scikit-learn : ", accuracy_score(y_train, sk_pred))
print("Your Accuracy : ", accuracy_score(y_train, pred))

Accuracy scikit-learn :  0.98
Your Accuracy :  0.98


In [126]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict_proba(X_train)

qda = QDA()
qda.fit(X_train, y_train)
pred = qda.predict_proba(X_train)

error = rel_error(pred, sk_pred)
print(error)
assert error < 1e-12

1.6651042200099916e-14


# Naive Bayes Classifiers

##  Bernouilli Naive Bayes

In [147]:
data = load_digits()
X_train2, y_train2 = data.data, data.target
X_train2_transf = Binarizer().fit_transform(X_train2)

In [211]:
class BernouilliNaiveBayes(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.C = None
        self.theta = None
    
    def fit(self, X, y):
        """
        Estimate the parameter theta
        NO FOR LOOP ON number of samples N but ONLY ON number of classes C
        DO NOT USE scipy or np density
        """
        N, D = X.shape
        self.C = (np.max(y) + 1)
        self.theta = np.zeros((D, self.C))
        self.priors = np.zeros(self.C)
        # YOUR CODE HERE
        for classe in range(self.C):
            N_c = len(y[y == classe])
            self.priors[classe] = N_c/len(y)
            X_classe = X[np.where(y==classe)]
            self.theta[:, classe] = np.sum(X_classe, axis=0)/N_c
            
    
    def compute_log_posterior(self, X):
        N, D = X.shape
        log_post = np.zeros((N,self.C))
        # YOUR CODE HERE
        for classe in range(self.C):
            theta_bar = 1 - self.theta[:, classe]
            theta_mat = np.ones((N, D)) * self.theta[:, classe]
            theta_bar_mat = 1 - theta_mat
            X_bar = 1 - X
            log_post[:, classe] = np.log(self.priors[classe]) + np.sum(X*np.log(theta_mat + 1e-10) + X_bar * np.log(theta_bar_mat + 1e-10), axis=1)   
            
        return log_post

In [212]:
sk_model = BernoulliNB()
sk_model.fit(X_train2_transf, y_train2)
sk_pred = sk_model.predict(X_train2_transf)

model = BernouilliNaiveBayes()
model.fit(X_train2_transf, y_train2)
pred = model.predict(X_train2_transf)

sk_acc = accuracy_score(y_train2, sk_pred)
model_acc = accuracy_score(y_train2, pred)
print("Accuracy scikit-learn : ", sk_acc)
print("Your Accuracy : ", model_acc)
assert sk_acc - model_acc < 0.01

Accuracy scikit-learn :  0.8636616583194212
Your Accuracy :  0.8742348358375069


## Gaussian Naive Bayes

In [274]:
class GaussianNaiveBayes(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.C = None
        self.mu = None
        self.sigma = None
    
    def fit(self, X, y):
        """
        Estimate the parameters mu and sigma
        NO FOR LOOP ON number of samples N but ONLY ON number of classes C
        DO NOT USE scipy or np density
        """
        N, D = X.shape
        self.C = (np.max(y) + 1)
        self.sigma = np.zeros((D, self.C))
        self.mu = np.zeros((D,self.C))
        self.priors = np.zeros(self.C)
        # YOUR CODE HERE
        for classe in range(C):
            N_c = len(y[y==classe])
            self.priors[classe] = N_c/len(y)
            X_classe = X[np.where(y==classe)]
            self.mu[:, classe] = np.sum(X_classe, axis=0)/N_c
            self.sigma[:, classe] = np.sum(np.square(X_classe - self.mu[:, classe]), axis=0)/N_c
    
    def compute_log_posterior(self, X):
        N, D = X.shape
        log_post = np.zeros((N,self.C))
        # YOUR CODE HERE
        for classe in range(self.C):
            u_matrice = np.ones((N, D)) * self.mu[:, classe]
            sigma_matrice = np.ones((N, D)) * self.sigma[:, classe]
            log_post[:, classe] = np.log(self.priors[classe]) + np.sum(np.log((1/np.sqrt(sigma_matrice*2*np.pi)) * np.exp((-0.5)*np.square(X-u_matrice)/sigma_matrice)), axis=1)   
            
        return log_post
        

In [275]:
print(np.pi)

3.141592653589793


In [276]:
sk_model = GaussianNB()
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = GaussianNaiveBayes()
model.fit(X_train, y_train)
pred = model.predict(X_train)

sk_acc = accuracy_score(y_train, sk_pred)
model_acc = accuracy_score(y_train, pred)
print("Accuracy scikit-learn : ", sk_acc)
print("Your Accuracy : ", model_acc)
assert sk_acc - model_acc < 0.01

Accuracy scikit-learn :  0.96
Your Accuracy :  0.96
