In [1]:
from rtree import index
from collections import deque
import math

In [36]:
def _get_cost(bounds, nPoints, fanout = 2):
    h = math.log( (nPoints+1) /fanout, fanout) + 1
    DA  = h + math.sqrt(nPoints)*2/(math.sqrt(fanout) - 1) + nPoints/(fanout - 1) + 1
    return DA * nPoints
def cost_base_partition(rtree, maxCost, eps):
    mbr = rtree.bounds
    partition_list = []
    queue = deque()
    queue.append(mbr)
    while len(queue):
        br = queue.popleft()
        nPoints = rtree.count(br)
        if get_cost(br, nPoints)> maxCost:
            (subbr1, subbr2) = _cost_base_split(rtree, br, eps)
            queue.append(subbr1)
            queue.append(subbr2)
        else:
            partition_list.append(br)
    return partition_list

def _cost_base_split(rtree, bounds, eps):
    (xmin, ymin, xmax, ymax) = bounds
    #vertical split  
    ymin_diff = float('inf')    
    ysplit = ymin + (ymax - ymin)/2
    ybest_split = ((xmin, ymin, xmax, ysplit), (xmin, ysplit, xmax, ymax)) 
    while( ysplit + eps * 2<= ymax):      
        lowerbr = (xmin, ymin, xmax, ysplit)
        lowercost = _get_cost(lowerbr, rtree.count(lowerbr))
        
        upperbr = (xmin, ysplit, xmax, ymax)
        uppercost = _get_cost(upperbr, rtree.count(upperbr))
        costdiff = abs(uppercost - lowercost)
        if costdiff < ymin_diff:
            ymin_diff = costdiff
            ybest_split = (lowerbr, upperbr)
            if uppercost < lowercost:
                ysplit = ymin + (ysplit - ymin)/2
            else:
                ysplit = ysplit + (ymax - ysplit)/2
        else:
            break
    
    #horizontal split
    xmin_diff = float('inf')    
    xsplit = xmin + (xmax - xmin)/2
    xbest_split = ((xmin, ymin, xsplit, ysplit), (xsplit, ymin, xmax, ymax))
    while( xsplit + eps * 2<= xmax):   
        lowerbr = (xmin, ymin, xsplit, ymax)
        lowercost = _get_cost(lowerbr, rtree.count(lowerbr))
        
        upperbr = (xsplit, ymin, xmax, ymax)
        uppercost = _get_cost(upperbr, rtree.count(upperbr))
        costdiff = abs(uppercost - lowercost)
        if costdiff < xmin_diff:
            xmin_diff = costdiff
            xbest_split = (lowerbr, upperbr)
            if uppercost < lowercost:
                xsplit = xmin + (xsplit - xmin)/2
            else:
                xsplit = xsplit + (xmax - xsplit)/2        
        else:
            break
    
    #compare ysplit and xsplit
    if xmin_diff < ymin_diff:
        return xbest_split
    else:
        return ybest_split

In [41]:
def reduced_boundary_partition(rtree, maxPoints, eps):
    mbr = rtree.bounds
    partition_list = []
    queue = deque()
    queue.append(mbr)
    while len(queue):
        br = queue.popleft()
        nPoints = rtree.count(br)
        if nPoints > maxPoints:
            (br1, br2) = _reduced_boundary_split(rtree, br, eps)
            queue.append(br1)
            queue.append(br2)
        else:
            partition_list.append(br)
    return partition_list
def _reduced_boundary_split(rtree, br, eps):
    (xmin, ymin, xmax, ymax) = br
    
    #vertical splitline candidates
    ymin_score = float('inf')
    ysplit = ymin + (ymax - ymin)/2
    ybest_split = ((xmin, ymin, xmax, ysplit), (xmin, ysplit, xmax, ymax)) 
    while(ysplit + eps*2 <= ymax):
        br1 = (xmin,ymin, xmax, ysplit)
        br2 = (xmin, ysplit, xmax, ymax)
        point_diff = abs(rtree.count(br1) - rtree.count(br2))
        score = point_diff * rtree.count((xmin, ysplit-eps, xmax, ysplit+eps))
        if score < ymin_score:
            ymin_score = score
            ybest_split = (br1, br2)
            if rtree.count(br1) > rtree.count(br2):
                ysplit = ymin + (ysplit - ymin)/2
            else:
                ysplit = ysplit + (ymax - ysplit)/2
        else:
            break
        
    #horizontal splitline candidates
    xsplit = xmin + eps * 2
    xmin_score = float('inf')
    xbest_split = ((xmin, ymin, xsplit, ymax), (xsplit, ymin, xmax, ymax)) 
    while( xsplit + eps * 2<= xmax):
        br1 = (xmin , ymin, xsplit, ymax)
        br2 = (xsplit, ymin, xmax, ymax)
        point_diff = abs(rtree.count(br1) - rtree.count(br2))
        score = point_diff * rtree.count((xmin - eps, ymin, xmin + eps, ymax))
        if score < xmin_score:
            xmin_score = score
            xbest_split = (br1, br2)
            if rtree.count(br1) > rtree.count(br2):
                xsplit = xmin + (xsplit - xmin)/2
            else:
                xsplit = xsplit + (xmax - xsplit)/2
        else:
            break

    if xmin_score < ymin_score:
        return xbest_split
    else:
        return ybest_split

In [53]:
#construct rtree index
p = index.Property()
idx = index.Index(properties=p)

with open('s1.txt','r') as f:
    count = 0
    for l in f.readlines(): 
        ls = l.split()
        idx.insert(count, (int(ls[0]), int(ls[1]), int(ls[0]), int(ls[1])))
        count += 1

In [54]:
#split test
cost_based = cost_base_partition(idx, 1000000, 50)
reduced_boundary = reduced_boundary_partition(idx, 500, 50)

pointcnt = 0
print("Cost-based split:")
for i in cost_based:
    pointcnt += idx.count(i)
    print('nPoints: ', idx.count(i), "\tboundary: ", i)
print("total points: ", pointcnt)

pointcnt = 0
print("\nReduced-boundary split:")
for i in reduced_boundary:
    pointcnt += idx.count(i)
    print('nPoints: ', idx.count(i), "\tboundary: ", i)
print("total points: ", pointcnt)

Cost-based split:
nPoints:  429 	boundary:  (19835.0, 51121.0, 490893.0, 281029.75)
nPoints:  691 	boundary:  (19835.0, 281029.75, 490893.0, 510938.5)
nPoints:  659 	boundary:  (19835.0, 510938.5, 490893.0, 625892.875)
nPoints:  650 	boundary:  (19835.0, 625892.875, 490893.0, 970756.0)
nPoints:  616 	boundary:  (490893.0, 51121.0, 961951.0, 281029.75)
nPoints:  685 	boundary:  (490893.0, 281029.75, 961951.0, 510938.5)
nPoints:  634 	boundary:  (490893.0, 510938.5, 726422.0, 970756.0)
nPoints:  636 	boundary:  (726422.0, 510938.5, 961951.0, 970756.0)
total points:  5000

Reduced-boundary split:
nPoints:  268 	boundary:  (19835.0, 625892.875, 961951.0, 740847.25)
nPoints:  350 	boundary:  (19835.0, 855801.625, 961951.0, 970756.0)
nPoints:  44 	boundary:  (19835.0, 51121.0, 961951.0, 108598.1875)
nPoints:  476 	boundary:  (19835.0, 108598.1875, 961951.0, 166075.375)
nPoints:  470 	boundary:  (19835.0, 166075.375, 961951.0, 223552.5625)
nPoints:  55 	boundary:  (19835.0, 223552.5625, 96195