In [None]:
from dsio.anomaly_detectors import AnomalyMixin
import numpy as np

class OnlineEM(AnomalyMixin):
    def __init__(self, gammas, lambdas, segment_length):
        # gammas and lambdas are the initialization
        self.gammas = np.array(gammas)
        self.lambdas = np.array(lambdas)
        self.segment_length = segment_length

        assert len(gammas) == len(lambdas)

        # number of poisson mixtures
        self.m = len(gammas)

        # list of the gammas_i
        # each element represent the value of gamma_i for an iteration
        self.gammas_over_time = [[] for _ in gammas]
        self.lambdas_over_time = [[] for _ in lambdas]
        self.likelihood = []

        # number of current iteration
        self.iteration_k = 0

    def calculate_participation(self, data):
        """
        :param data: n array of the data to train
        :return: an (n, m) array of the participation of each data point to each poisson distribution
                m is the number of distributions
        """
        f = np.zeros(shape=(len(data), self.m))
        for i, x in enumerate(data):
            total_x = np.sum(self.gammas * np.array([poisson(x, lambda_i) for lambda_i in self.lambdas]))
            f[i] = (self.gammas * np.array([poisson(x, lambda_i) for lambda_i in self.lambdas])) / total_x

        return f

    def calculate_likelihood(self, data):
        # naive implementation for likelihood calculation
        new_likelihood = 0
        for x in data:
            total_x = np.sum(self.gammas * np.array([poisson(x, lambda_i) for lambda_i in self.lambdas]))
            new_likelihood = new_likelihood + log(total_x)
        return new_likelihood

    def train_batch(self, data):
        """
        :param data: the batch data 
        updates gammas, lambdas and likelihood
        """

        self.iteration_k += 1
        n = len(data)

        f = self.calculate_participation(data)

        # update gammas and lambdas
        temp_sum = f.sum(axis=0)

        update_factor = 1 / (pow(self.iteration_k, 0.6))
        self.gammas = (1 - update_factor) * self.gammas + update_factor * (temp_sum / n)

        temp = np.zeros(self.m)
        for i, x in enumerate(data):
            temp = temp + x * f[i]
        self.lambdas = (1 - update_factor) * self.lambdas + update_factor * (temp / temp_sum)

        # append last value of gammas and lambdas
        for i, gamma_i in enumerate(self.gammas):
            self.gammas_over_time[i].append(gamma_i)

        for i, lambda_i in enumerate(self.lambdas):
            self.lambdas_over_time[i].append(lambda_i)

        self.likelihood.append(self.calculate_likelihood(data))

    def get_new_batch(self, data, pos):
        n = len(data)

        assert self.segment_length <= n

        if self.segment_length + pos <= n:
            return data[pos: pos + self.segment_length], pos + self.segment_length

        return np.concatenate((data[pos: n], data[0: self.segment_length - (n - pos)]), axis=0), self.segment_length - (n - pos)

    def train(self, data, batch_number=100):
        """
        :param data: the whole dataset to train from 
        :param batch_number: the number of iterations to perform on data with barch_size self.batch_size
        """

        # the starting position of the current batch in the data
        pos = 0
        for _ in range(batch_number):
            batch, pos = self.get_new_batch(data, pos)

            self.train_batch(batch)

    def get_gammas(self):
        return self.gammas_over_time

    def get_lambdas(self):
        return self.lambdas_over_time

    def get_likelihood(self):
        return self.likelihood
    