In [149]:
from scipy.spatial import KDTree
import numpy as np

data_path = 'Data/Input/points.txt'
K = 3

In [160]:
def read_file(filepath):
    '''Function to read a file and output points
    
    Arguments:
    1. filepath: the path to the file to read
    
    Returns:
    1. points: a numpy array containing points'''
    # data input done
    points = []
    with open(filepath, 'r') as f:
        lines = f.readlines()
        
    for line in lines:
        pt = list(map(float, line.strip().split(',')))
        points.append(pt)
        
    points = np.array(points)
    return points


def init_centroids(data, k):
    '''Function to initialize 'k' random centroids from the data
    
    Arguments:
    1. data: input data for kmeans.
    2. k: number of centroids to output
    
    Returns:
    1. a (k x dim) array containing randomly initialized centroids'''
    ind = np.random.choice(np.arange(len(data)), size = k)
    return data[ind]

def map_(fpath, indices, centroid):
    pts = read_file(fpath)[indices]  
    tree = KDTree(centroid)
    _, cent_ind = tree.query(pts)
    value = pts       # (x, 1) pairs
    return cent_ind, value

def partition_(key, value, n_reducers = 2):
    '''
    - All kv pairs having same key sent to same partition                       -- DONE
    - Different keys distributed equally among each partition (key%nreducers)   -- DONE
    
    '''
    partitions = {}
    for k, v in zip(key, value):
        partition_id = k%n_reducers
        if partition_id not in partitions.keys():
            partitions[partition_id] = []
        partitions[partition_id].append([k, v])
        
    return partitions

In [151]:
points = read_file(data_path)
centroids = init_centroids(points, K)

indices = np.random.randint(0, len(points), len(points)//3)

In [161]:
k, v = map_(data_path, indices, centroids)

In [169]:
partitions = partition_(k,v,2)
p = partitions[0]

In [167]:
def shuffle_sort(part):
    part = sorted(part, key = lambda x: x[0])
    for 

[[0, array([9.1, 3.1])],
 [2, array([9.8, 1.2])],
 [2, array([8.9, 0.2])],
 [2, array([9.5, 1.5])],
 [2, array([11.2, -1.2])]]