# Streaming

In [None]:
# init
from .one_sparse_recovery import *
from .misra_gries import *

## Implementation of the Misra-Gries Algorithm

In [None]:

"""
Implementation of the Misra-Gries algorithm.
Given a list of items and a value k, it returns the every item in the list
that appears at least n/k times, where n is the length of the array

By default, k is set to 2, solving the majority problem.

For the majority problem, this algorithm only guarantees that if there is
an element that appears more than n/2 times, it will be outputed. If there
is no such element, any arbitrary element is returned by the algorithm.
Therefore, we need to iterate through again at the end. But since we have filtred
out the suspects, the memory complexity is significantly lower than
it would be to create counter for every element in the list.
"""

In [None]:
"""
For example:
Input misras_gries([1,4,4,4,5,4,4])
Output {'4':5}
Input misras_gries([0,0,0,1,1,1,1])
Output {'1':4}
Input misras_gries([0,0,0,0,1,1,1,2,2],3)
Output {'0':4,'1':3}
Input misras_gries([0,0,0,1,1,1]
Output None
"""

In [None]:
def misras_gries(array,k=2):
    """Misra-Gries algorithm

    Keyword arguments:
    array -- list of integers
    k -- value of k (default 2)
    """
    keys = {}
    for i in array:
        val = str(i)
        if val in keys:
            keys[val] = keys[val] + 1

        elif len(keys) < k - 1:
            keys[val] = 1

        else:
            for key in list(keys):
                keys[key] = keys[key] - 1
                if keys[key] == 0:
                    del keys[key]

    suspects =  keys.keys()
    frequencies = {}
    for suspect in suspects:
        freq = _count_frequency(array,int(suspect))
        if freq >= len(array) / k:
            frequencies[suspect] = freq

    return frequencies if len(frequencies) > 0 else None


def _count_frequency(array,element):
    return array.count(element)

## Implementation of the Non-Negative 1-Sparse Recovery Algorithm

In [None]:
"""
Non-negative 1-sparse recovery problem.
This algorithm assumes we have a non negative dynamic stream.

Given a stream of tuples, where each tuple contains a number and a sign (+/-), it check if the
stream is 1-sparse, meaning if the elements in the stream cancel eacheother out in such
a way that ther is only a unique number at the end.
"""

In [None]:
"""
Examples:
#1
Input:  [(4,'+'), (2,'+'),(2,'-'),(4,'+'),(3,'+'),(3,'-')],
Output: 4
Comment: Since 2 and 3 gets removed.
#2
Input:  [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+')]
Output: 2
Comment: No other numbers present
#3
Input:  [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')]
Output: None
Comment: Not 1-sparse
"""

In [None]:
def one_sparse(array):
    """1-sparse algorithm

    Keyword arguments:
    array -- stream of tuples
    """
    sum_signs = 0
    bitsum = [0]*32
    sum_values = 0
    for val,sign in array:
        if sign == "+":
            sum_signs += 1
            sum_values += val
        else:
            sum_signs -= 1
            sum_values -= val

        _get_bit_sum(bitsum,val,sign)

    if sum_signs > 0 and _check_every_number_in_bitsum(bitsum,sum_signs):
        return int(sum_values/sum_signs)
    else:
        return None

#Helper function to check that every entry in the list is either 0 or  the same as the
#sum of signs
def _check_every_number_in_bitsum(bitsum,sum_signs):
    for val in bitsum:
        if val != 0 and val != sum_signs :
            return False
    return True

# Adds bit representation value to bitsum array
def _get_bit_sum(bitsum,val,sign):
    i = 0
    if sign == "+":
        while val:
            bitsum[i] += val & 1
            i +=1
            val >>=1
    else :
        while val:
            bitsum[i] -= val & 1
            i +=1
            val >>=1