In [None]:
import matplotlib.pyplot as plt

import threading

import os
import time

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.python.client import device_lib

from sklearn.datasets import make_classification

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

def make_data(filepath, n_obs, n_dim, seed, K):

    try:
        os.remove(filepath)
    except:
        print('file not found')
    finally:
        (X, Y) = make_classification(n_samples            = n_obs    , 
                                     n_features           = n_dim    ,
                                     n_informative        = n_dim    ,
                                     n_redundant          = 0        ,
                                     n_classes            = K        ,
                                     n_clusters_per_class = 1        ,
                                     shuffle              = True     ,
                                     class_sep            = 1.5      ,
                                     random_state         = seed      )
        
        np.savez(filepath, X=X, Y=Y)

    return True

In [None]:
n_obs = 200000000
n_dim = 2
K     = 3
GPU_names = get_available_gpus()
n_max_iters = 20
seed = 800594

make_data('test-data.npz', n_obs, n_dim, seed, K)

In [None]:
with np.load('test-data.npz') as data:
    data_X = data['X']
    data_Y = data['Y']

    
maxsize = 2 * 1024 * 1024 * 1024
size_of_each = data_X.shape[1] * data_X.dtype.itemsize

initial_centers = data_X[0:K, :]

In [None]:
data_placeholder = tf.placeholder(data_X.dtype, data_X.shape)

dataset = tf.data.Dataset.from_tensor_slices(data_placeholder)
dataset = dataset.batch(np.floor(maxsize / size_of_each))

iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

In [None]:
def distributed_kmeans(batch_data):
    return tf.Print(batch_data, [batch_data])
    
    

In [None]:
config = tf.ConfigProto( allow_soft_placement = True )
config.gpu_options.allow_growth = True
config.gpu_options.allocator_type = 'BFC'

with tf.Session(config = config) as sess:
    sess.run(iterator.initializer, feed_dict={data_placeholder: data_X})
    
    while True:
        try:
            item  = sess.run(distributed_kmeans(next_element))
            print(item)
        except tf.errors.OutOfRangeError:
            break