In [51]:
from abc import ABC, abstractmethod
import numpy as np
from scipy.stats import multivariate_normal

class Model(ABC):
    @abstractmethod
    def train(self, train_inputs, train_labels):
        pass
    
    @abstractmethod
    def predict(self, test_inputs):
        pass
        
class DiagonalGaussian(Model):
    def train(self, train_inputs, train_labels):
        self.mean = np.mean(train_inputs, axis=0)
        self.covariance = np.diag(np.diag(np.cov(train_inputs.T, bias=True)))
        self.inv_covariance = np.linalg.inv(self.covariance)
        self.first_half_pdf = 1 / (np.sqrt((2 * np.pi) ** train_inputs.shape[1] * np.linalg.det(self.covariance)))
    
    def __logpdf__(self, x):
        diff = x - self.mean
        second_half_pdf = np.exp(-0.5 * np.dot(np.dot(diff, self.inv_covariance), diff))
        return np.log(self.first_half_pdf * second_half_pdf)
    
    def predict(self, test_inputs):
        predictions = np.zeros((test_inputs.shape[0]))
        for i, x in enumerate(test_inputs):
            predictions[i] = self.__logpdf__(x)
            
        return predictions

dg = DiagonalGaussian()
data = np.loadtxt('iris.txt')
train_inputs = data[:40, :4]
train_labels = data[:40, 4]

dg.train(train_inputs, train_labels)


class ParzenIsotropicGaussian(Model):
    def __init__(self, sigma = 0.5):
        self.sigma = sigma
        
    def train(self, train_inputs, train_labels):
        self.train_inputs = train_inputs
        self.train_labels = train_labels
        
        self.covariance = np.identity(train_inputs.shape[1]) * self.sigma
        self.inv_covariance = np.linalg.inv(self.covariance)
        self.first_half_pdf = 1 / (np.sqrt((2 * np.pi) ** train_inputs.shape[1] * np.linalg.det(self.covariance)))
    
    def __logpdf__(self, x, mean):
        diff = x - mean
        second_half_pdf = np.exp(-0.5 * np.dot(np.dot(diff, self.inv_covariance), diff))
        return np.log(self.first_half_pdf * second_half_pdf)

    def predict(self, test_inputs):
        predictions = np.zeros((test_inputs.shape[0]))
        for i, x in enumerate(test_inputs):
            for X in train_inputs:
                predictions[i] += self.__logpdf__(x, X)
                
        predictions /= train_inputs.shape[0]
        return predictions

parzen = ParzenIsotropicGaussian()

parzen.train(train_inputs, train_labels)
dg.predict(data[40:50, :4])
parzen.predict(data[40:50, :4])

array([-2.61595977, -4.22945977, -3.07945977, -2.73895977, -2.92795977,
       -2.85145977, -2.72795977, -2.84245977, -2.71545977, -2.61195977])