In [50]:
import pandas as pd
import numpy as np
import math
from scipy.spatial import distance
import scipy

In [34]:
# Author: Jake VanderPlas
# License: BSD
#   The figure produced by this code is published in the textbook
#   "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
#   For more information, see http://astroML.github.com
#   To report a bug or issue, use the following forum:
#    https://groups.google.com/forum/#!forum/astroml-general
import numpy as np





# We'll create a BallTree class which will recursively subdivide the
# space into circular regions.  Note that this is just an example and
# shouldn't be used for real computation; instead use the optimized
# code in scipy.spatial.cKDTree or sklearn.neighbors.BallTree

class BallTree_node:
    
    def __init__(self,data):
        
        self.data = np.asarray(data)

        # data should be two-dimensional
        assert self.data.shape[1] == 2

        self.loc = data.mean(0)
        self.radius = np.sqrt(np.max(np.sum((self.data - self.loc) ** 2, 1)))
        
        self.child1 = None
        self.child2 = None
        self.points = []
        self.height = 0
        self.id = 0
        
    def __str__(self):
        return "(x="+str(self.x)+",y="+str(self.y)+")"
    
    
class BallTree:
    """Simple Ball tree class"""

    # class initialization function
    def __init__(self, data,treeHeight):
        self.data = np.asarray(data)
        self.treeHeight = treeHeight
        self.nodeIdList = []
        for i in range(2**treeHeight):
             self.nodeIdList.append(0)
        self.documentMap = {}   
        
        for i in range(len(data)):
            d = data[i]
            self.documentMap[tuple(d)] = np.zeros(self.treeHeight+1, dtype=int)
            
        self.root = self.__buildTree(data,0)
        
    def __buildTree(self,data,height):
        print(len(data))
        if height == self.treeHeight:
            return None
        self.nodeIdList[height] = self.nodeIdList[height] + 1
        if len(data) > 1:
            # sort on the dimension with the largest spread
            n = BallTree_node(data)
            n.points = data
            n.height = height
            n.id = self.nodeIdList[height]
            
            largest_dim = np.argmax(data.max(0) - data.min(0))
            i_sort = np.argsort(data[:, largest_dim])
            data[:] = data[i_sort, :]

            # find split point
            N = data.shape[0]
            half_N = int(N / 2)
            split_point = 0.5 * (data[half_N, largest_dim]
                                 + data[half_N - 1, largest_dim])

            # recursively create subnodes
            n.child1 = self.__buildTree(data[half_N:],n.height+1)
            n.child2 = self.__buildTree(data[:half_N],n.height+1)
            return n
        return None
    
    def traverse(self, node):
        members = []
        if node:
            members += self.traverse(node.child1)
            members.append(node)
            members += self.traverse(node.child2)
        return members

In [35]:
def min_max_distance(cluster1, cluster2):
    #start = timeit.default_timer()
    Z =distance.cdist(cluster1, cluster2, 'euclidean')
    #print(Z.min()**2,Z.max()**2)
    #stop = timeit.default_timer()
    #print('Time for mmr: ', stop - start)

    return Z.min()**2,Z.max()**2


    mindis = float('inf')
    maxdis = -1
    for i in cluster1:
        for j in cluster2:
            dis = distance_euclidean(i, j)
            if mindis > dis:
                mindis = dis
                item = (i, j)
            if maxdis < dis:
                maxdis = dis
    return mindis,maxdis

In [44]:
def createLevelMatrix():
    nodeId = []
    for i in range(0,len(nodes)):
        nodeId.append(-1)
    levelMatrix = np.empty(
                shape=(height + 1, 2 ** height + 1), dtype=BallTree_node)
    for i in range(0,len(nodes)):
        levelMatrix[nodes[i].height][nodes[i].id] = nodes[i]
        
    return levelMatrix

In [45]:
def createDistanceMatrix():
    dismatrix = np.empty(
        shape=(height + 1, 2 ** height + 1,
               2 ** height + 1),
        dtype=tuple)
    for l in range(0,height):
            for i in range(1,2**l+1 ):
                for j in range(1,2**l+1 ):
                    if (len(levelMatrix[l][i].points) == 0 or len(levelMatrix[l][j].points) == 0):
                        return dismatrix
                    dismatrix[l, i, j] = min_max_distance(levelMatrix[l][i].points, levelMatrix[l][j].points)
                    print(l,i,j,min_max_distance(levelMatrix[l][i].points, levelMatrix[l][j].points))
    return dismatrix

In [46]:
#------------------------------------------------------------
# Create a set of structured random points in two dimensions
np.random.seed(0)
X = np.random.random((30, 2)) * 2 - 1
X[:, 1] *= 0.1
X[:, 1] += X[:, 0] ** 2

#------------------------------------------------------------
# Use our Ball Tree class to recursively divide the space
height = 3
BT = BallTree(X,3)

30
15
8
4
4
7
4
3
15
8
4
4
7
4
3


In [47]:
nodes = BT.traverse(BT.root)

In [51]:
levelMatrix = createLevelMatrix()
levelMatrix

array([[None, <__main__.BallTree_node object at 0x000001C2A700B1F0>,
        None, None, None, None, None, None, None],
       [None, <__main__.BallTree_node object at 0x000001C2A700BD30>,
        <__main__.BallTree_node object at 0x000001C2A700B670>, None,
        None, None, None, None, None],
       [None, <__main__.BallTree_node object at 0x000001C2A700B940>,
        <__main__.BallTree_node object at 0x000001C2A700B850>,
        <__main__.BallTree_node object at 0x000001C2A700B310>,
        <__main__.BallTree_node object at 0x000001C2A700B610>, None,
        None, None, None],
       [None, None, None, None, None, None, None, None, None]],
      dtype=object)

In [52]:
dismatrix = createDistanceMatrix()

0 1 1 (0.0, 3.766048207238371)
1 1 1 (0.0, 1.8172152322982524)
1 1 2 (0.013324693308435597, 3.766048207238371)
1 2 1 (0.013324693308435597, 3.766048207238371)
1 2 2 (0.0, 1.8648612441858192)
2 1 1 (0.0, 1.4355294587012863)
2 1 2 (0.00408754766508184, 1.8172152322982524)
2 1 3 (0.39833158831285786, 3.766048207238371)
2 1 4 (0.04715499192168629, 2.5111893710087707)
2 2 1 (0.00408754766508184, 1.8172152322982524)
2 2 2 (0.0, 0.13055998612545994)
2 2 3 (0.3499110770904938, 2.6801282143767082)
2 2 4 (0.013324693308435597, 0.5859751120524317)
2 3 1 (0.39833158831285786, 3.766048207238371)
2 3 2 (0.3499110770904938, 2.6801282143767082)
2 3 3 (0.0, 0.7439007935172708)
2 3 4 (0.0384332038815713, 1.8648612441858192)
2 4 1 (0.04715499192168629, 2.5111893710087707)
2 4 2 (0.013324693308435597, 0.5859751120524317)
2 4 3 (0.0384332038815713, 1.8648612441858192)
2 4 4 (0.0, 0.11526838781444206)
