In [41]:
from abc import ABC, abstractmethod
import numpy as np
from scipy.stats import multivariate_normal

class Model(ABC):
    @abstractmethod
    def train(self, train_inputs, train_labels):
        pass
    
    @abstractmethod
    def predict(self, test_inputs):
        pass
        
class DiagonalGaussian(Model):
    def train(self, train_inputs, train_labels):
        self.mean = np.mean(train_inputs, axis=0)
        self.covariance = np.diag(np.diag(np.cov(train_inputs.T, bias=True)))
        self.inv_covariance = np.linalg.inv(self.covariance)
        self.first_half_pdf = 1 / (np.sqrt((2 * np.pi) ** train_inputs.shape[1] * np.linalg.det(self.covariance)))
    
    def __logpdf__(self, x):
        diff = x - self.mean
        second_half_pdf = np.exp(-0.5 * np.dot(np.dot(diff, self.inv_covariance), diff))
        return np.log(self.first_half_pdf * second_half_pdf)
    
    def predict(self, test_inputs):
        predictions = np.zeros((test_inputs.shape[0]))
        for i, x in enumerate(test_inputs):
            predictions[i] = self.__logpdf__(x)
        return predictions

dg = DiagonalGaussian()
data = np.loadtxt('iris.txt')
train_inputs = data[:50, :4]
train_labels = data[:50, 4]

dg.train(train_inputs, train_labels)
dg.predict(data[:50, :4])

array([ 2.16127041,  1.51938483,  1.26793987,  1.19769284,  2.11076801,
       -1.09803387,  1.49955902,  2.25363001, -0.28190945,  0.94639246,
        1.35641798,  1.78178283,  0.50923948, -3.51508248, -2.63076825,
       -4.06814008, -0.58375588,  2.12451922, -1.18311858,  1.69216249,
        0.68251108,  0.96529864, -2.11176223, -1.63831061, -1.14148148,
        1.30823645,  0.96360787,  2.08360975,  2.05863208,  1.38974255,
        1.40256456,  0.62404938, -0.38346607, -0.90230527,  1.82842113,
        0.93500747,  0.81652604,  1.18274874, -0.32140106,  2.21749453,
        1.78171302, -3.76898949,  0.14445509, -3.71941704, -2.48437276,
        1.35451695,  1.4311738 ,  1.35451269,  1.63893169,  2.15763768])