Algorithm 1(eliminate points-m): 
    Input: p1,p2,... , pn' (in order) where n' is the number of points in the stream. 
    Output: Skyline points S' 
    1: Let x = 24m. 
    2: Pass 1: For j : 1, 2, ..., x, let p'j be a point picked uniformly at random from the stream. 
    Let S be the set of such points.
    3: Pass 2: 
    4: for i = 1, ..., n' do 
    5: for any p'j, if pi dominates p'j then p'j:=pi
    6: end for 
    7: Let S'={p'1,p'2,...,p'x}.
    8: Pass 3: 
            Delete from stream all points in S' and all points dominated by any point in S'.
    9: return S' 

In [1]:
from random import randint
# Each class is just to illustrate that each is one of the main steps proposed in the paper
# otherwise these would have all been put in one single class 
# the first class is made in order to ensure a proper stream creation, this class won't be used if 
# some real life data can directly be injected in the following functions 
class stream_creation:
    def __init__(self,_dimensions, stream_size):
        self._stream_size = n # What the meaning of this n 
        self._dimensions = dimensions

    def stream_creation(_dimensions, _stream_size):
        stream = ()
        for point in range(_stream_size):
            coordinates = ()
            for coordinate in range(_dimensions):
                coordinates = coordinates + (randint(1,100),)
            stream = stream +(coordinates,)
        return(stream)

# this is the first algorithm
# it is important to note that a function from this class has been eliminated 
# that function being the delete from stream points in S function 
# the reason for this being that the computing time was five time quicker 
# the reason for the speed increase being that that function required to go over each point of the stream 
# and compare each of these points in the stream to the points in the reservoir 
# but in the previous function, where dominance from random points of stream on reservoir is done,
# if the point is deleted from the stream just after being added to the reservoir it has a double advantage 
# the first advantage being that the deleted function had to look all points from reservoir in stream 
# but we know that the locations of these points are known during the domination from random streampoints 
# hence instead of looking it up we delete it after having put it in the reservoir 
# the second advantage being that points in the reservoir that are dominated are also eliminated afterwards 
# hence points that the reservoir would dominate and eliminate anyways in the following function 
# are already eliminated without needing to look it up another time 
class eliminate_points_algorithm:
    # this is the activation function 
    def __init__(self,amount_skylinepoints, stream): 
        self._amount_skylinepoints = amount_skylinepoints
        self._stream = stream 
    # as the stream has to be transformed it has to be transformed from a tuple into a list 
    def transform_stream_tuple_in_list(_stream):
        stream_list= [] 
        for component in _stream:
            stream_list.append(component)
        return(stream_list)
    # the following function creates the reservoir randomly, the first part takes the first 24*skylinepoints from stream
    # the second part takes randomly some points and puts them in the reservoir instead of points already in there 
    def reservoir_sampling(stream,_amount_skylinepoints):
        S = []
        if len(stream)>24*_amount_skylinepoints:
            size_sample = 24*_amount_skylinepoints
            for initial_sample in range(size_sample):
                S.append(stream[initial_sample])
            
            for address_candidate_sample, candidate_sample in enumerate(stream): 
                chance = randint(0,address_candidate_sample)
                if chance < size_sample:
                    S[chance] = candidate_sample
        else:
            for initial_sample in range(len(stream)):
                S.append(stream[initial_sample])
        return(S) 

    # this is the random domination function, each point from the stream is taken and compared to a random point 
    # from the reservoir, if it dominates that reservoir point it replaces it, otherwise it doesn't 
    # in both cases it is eliminated from the stream afterwards as either it is in the stream either it is equal or dominated by the reservoir 
    # if equal there is already such a point in reservoir hence it is not needed anymore as its coordinates are already in the reservoir 
    def random_domination_from_stream_on_reservoir(stream, S):  
        for candidate in stream:
            chance = randint(0,len(S)-1)
            points = 0  
            if candidate[0]>= S[chance][0] and candidate[1]>=S[chance][1] and(candidate[0]> S[chance][0] or candidate[1]>S[chance][1]):
                S[chance] = candidate
                stream.remove(candidate)
        return(S,stream)
    
    # here the remaining points in the stream that are dominated by any point from the reservoir are eliminated 
    def elimination_dominated_by_S_components_from_stream(stream,S):            
        for victim in stream:
            for candidate in S:
                points = 0
                if victim in stream:
                    if candidate[0]>= victim[0] and candidate[1]>=victim[1] and(candidate[0]> victim[0] or candidate[1]>victim[1]):
                        stream.remove(victim)
        return(stream)



Algorithm 2 (Streaming RAND): 
    1: Let n be the number of points in the input stream. 
    Let m' = 1. 
    2: while the input stream is not empty do: 
    3: let n' be the current number of points in the stream 
    4: Call eliminate points (m'log(nlogn))
    5: If more than n'/2 points are left in the stream, m' = 2 m'
    6: end while 
    Remark: In case the stream cannot be changed, we do not have to actually delete points from stream. 
    We only keep the skyline points found so far and consider only points in the stream that is not dominated by any found skyline points. 

In [41]:
# the second class deals with an issue that wasn't approached by the algorithm from the first class
# it has to find the right amount of skylinepoints 
# this is done via increasing progressively the amount of skylinepoints until there is nothing left in the stream 
class streaming_RAND:
    def __init__(self, stream):
        self._stream = stream 
        
    def find_skylinepoints(stream):
        # the remaining_stream is what takes the stream, or what is left from it as it went through the previous round
        # the reason for this being that the points of the already made reservoir and dominated in the reservoir are 
        # already fine, the only issue being that not enough points could be taken 
        # hence it makes no sense to restart the algorithm from zero each time and compute all stream points over again 
        remaining_stream = eliminate_points_algorithm.transform_stream_tuple_in_list(stream)
        # the total_skylinepoints keep track of the total increase in skylinepoints that will be in the total reservoir 
        # this is necessary as through the calls of the eliminate_function algorithm we already have found a pool
        # hence if we did just took the skylinepoints and doubled it instead of doing it with the total skylinepoints 
        # the real total of skyline points that we'd get in the end would increase exponentially instead of doubling or 
        # +1 in case the size of the stream less than halves 
        skylinepoints = 1
        total_skylinepoints = 1
        previous_skylinepoints = 0
        # the total pool is where all the points from the reservoirs obtained from the remove point algorithms are added up
        # this is another consequence of choosing to only work on the remaining stream 
        total_pool = []
        # the length of the previous stream must be known in order to know if we have to, or not, double the 
        # number of skylinepoints or simply add one skylinepoint 
        length_previous_stream = len([])
        # a price to pay with the fusion of the two functions in the eliminate_function algorithm is that 
        # at the very end there are still few remaining dominating points that remain 
        # the algorithm couldn't deal with those points despite being dominant hence for all points the points are 
        # going through the reservoir procedure and when there are only 24 points left (or 1 skylinepoint) 
        # then all are added to the final pool as these points have gone through all the reservoir dominating runs 
        # these points could only be dominating 
        while len(remaining_stream) > 24:  
            # in the following five lines the eliminate_points_algorithm is simply used with whatever amount of 
            # skylinepoints we decide to work on 
            non_dominated_reservoir = eliminate_points_algorithm.reservoir_sampling(remaining_stream,skylinepoints)
            dominated_reservoir_and_stream_without_reservoir = eliminate_points_algorithm.random_domination_from_stream_on_reservoir(remaining_stream,non_dominated_reservoir)
            dominated_reservoir = dominated_reservoir_and_stream_without_reservoir[0]
            stream_list_with_no_reservoir_points = dominated_reservoir_and_stream_without_reservoir[1]
            dominated_stream = eliminate_points_algorithm.elimination_dominated_by_S_components_from_stream(stream_list_with_no_reservoir_points,dominated_reservoir)
            # this if is used if the amount of streaming points has at least halved 
            if length_previous_stream >= 2*len(dominated_stream):
                total_skylinepoints = total_skylinepoints*2
            # this else is used if the amount of streaming points has not at least halved 
            else:
                total_skylinepoints += 1
            # the previous_skylinepoints, total_skylinepoints are updated and these define the amount of skylinepoints 
            # that have to be run 
            previous_skylinepoints += skylinepoints 
            skylinepoints = total_skylinepoints-previous_skylinepoints
            remaining_stream = dominated_stream
            # the length of the dominated stream is updated 
            length_previous_stream = len(dominated_stream)
            # the reservoir going out of the eliminate_points_algorithm is put in the total_pool before it gets 
            # destroyed by the next iteration 
            for component in dominated_reservoir:
                total_pool.append(dominated_reservoir)
            print(total_skylinepoints//2, len(remaining_stream),len(dominated_reservoir),len(total_pool))
            
        # this bit of code is dealing with the last 24 or less points remaining in the stream that couldn't be 
        # dominated by the multiple reservoirs, given they made it to this point they can be assumed to be 
        # dominating, otherwise they'd have been eliminated with almost certainty (same idea as with the points in reservoir)
        i = 0
        while i <len(remaining_stream):
            total_pool.append(remaining_stream[i])
            remaining_stream.remove(remaining_stream[i])
        print((total_skylinepoints//2)+1,len(remaining_stream),len(dominated_reservoir),len(total_pool))
            
            

In [42]:
stream = stream_creation.stream_creation(2,100)
print(streaming_RAND.find_skylinepoints(stream)) 

1 43 24 24
2 17 24 48
3 0 24 65
None


In [24]:
stream = stream_creation.stream_creation(2,10000)
print(streaming_RAND.find_skylinepoints(stream)) 

1 3556 24 24
2 1456 24 48
4 630 48 96
8 268 96 192
16 109 192 384
32 47 109 493
64 19 47 540
64 0 47 559
None


In [32]:
# this system uses a fixed windows whose % is defined in advance 
# it is basically the same algorithm as the streaming_RAND algorithm, at the exception that the amount of skylinepoints 
# to be run each time is already defined 
# the idea is to see how efficient it is 
class fixed_window:
    def __init__(self, stream):
        self._stream = stream 

    def find_skylinepoints(stream,percentage):
        remaining_stream = eliminate_points_algorithm.transform_stream_tuple_in_list(stream)
        total_pool = []
        length_previous_stream = len([])
        window = int((percentage*len(stream))//24)
        if window == 0:
            window = 1
        while len(remaining_stream) > 20:  
            non_dominated_reservoir = eliminate_points_algorithm.reservoir_sampling(remaining_stream,window)
            dominated_reservoir_and_stream_without_reservoir = eliminate_points_algorithm.random_domination_from_stream_on_reservoir(remaining_stream,non_dominated_reservoir)
            dominated_reservoir = dominated_reservoir_and_stream_without_reservoir[0]
            stream_list_with_no_reservoir_points = dominated_reservoir_and_stream_without_reservoir[1]
            dominated_stream = eliminate_points_algorithm.elimination_dominated_by_S_components_from_stream(stream_list_with_no_reservoir_points,dominated_reservoir)
            remaining_stream = dominated_stream
            length_previous_stream = len(dominated_stream)
            for component in dominated_reservoir:
                total_pool.append(dominated_reservoir)
            print(len(remaining_stream),len(dominated_reservoir),len(total_pool))

        i = 0
        while i <len(remaining_stream):
            total_pool.append(remaining_stream[i])
            remaining_stream.remove(remaining_stream[i])
        print(len(remaining_stream),len(dominated_reservoir),len(total_pool))

In [25]:
# with 0.001 it is rather quickly, but few points are left at the end as only 2% remains consistently 
# while it was more like 5% with the streaming_RAND algorithm the speed was a bit better to the streaming_RAND 
stream = stream_creation.stream_creation(2,10000)
percentage = 0.001
print(fixed_window.find_skylinepoints(stream,percentage)) 

3582 24 24
1434 24 48
625 24 72
284 24 96
127 24 120
56 24 144
25 24 168
12 24 192
0 24 204
None


In [26]:
# more similar to the streaming_RAND in computing time and results, 1% less efficient on a consistent base 
stream = stream_creation.stream_creation(2,10000)
percentage = 0.01
print(fixed_window.find_skylinepoints(stream,percentage)) 

3494 96 96
1387 96 192
587 96 288
251 96 384
103 96 480
44 96 576
17 44 620
0 44 637
None


In [27]:
# very slow and very bad results, only to show that it is important to define how many skylinepoints are needed 
# is an important question that has to be balanced with the speed and total dominating points obtained 
# an important point to remember from this being that a fixed window is only good when we have an idea about 
# how many skyline points we have or need 
# but if we do not have that information it would be better to run one streaming_RAND algorithm or multiple fixed_window algorithms 
stream = stream_creation.stream_creation(2,10000)
percentage = 0.1
print(fixed_window.find_skylinepoints(stream,percentage))

3305 984 984
1198 984 1968
450 984 2952
180 450 3402
77 180 3582
32 77 3659
14 32 3691
0 32 3705
None
