In [None]:
import matplotlib.pyplot as plt

import os
import time

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.python.client import device_lib

from sklearn.datasets import make_classification

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']


In [None]:
n_obs = 500000
n_dim = 3
K     = 15
GPU_names = get_available_gpus()
n_max_iters = 20
seed = 800594

In [None]:
def make_data(n_obs, n_dim, seed):
    (X, Y) = make_classification(n_samples            = n_obs    , 
                                 n_features           = n_dim    ,
                                 n_informative        = n_dim    ,
                                 n_redundant          = 0        ,
                                 n_classes            = 3        ,
                                 n_clusters_per_class = 1        ,
                                 shuffle              = True     ,
                                 random_state         = seed      )
    return (X, Y)

In [None]:
def distribuited_fuzzy_C_means(X, K, GPU_names, initial_centers, n_max_iters, M = 2):
    setup_ts = time.time()
    number_of_gpus = len(GPU_names)
    
    X_list = np.split( X[ (X.shape[0] % number_of_gpus)  :, : ], number_of_gpus )
    
    partial_Mu_sum_list = []
    partial_Mu_X_sum_list = []
    
    with tf.name_scope('global'):
        with tf.device('/cpu:0'):
            global_centroids = tf.Variable(initial_centers)
            
    for GPU_num in range(number_of_gpus):
        GPU_name = GPU_names[GPU_num]
        
        (X_mat) = X_list.pop()
        (N, M) = X_mat.shape
  
        with tf.name_scope('scope_' + str(GPU_num)):
            with tf.device(GPU_name) :
                ####
                # In the coments we denote :
                # => N = Number of Observations
                # => M = Number of Dimensions
                # => K = Number of Centers
                ####
                # Data for GPU GPU_num to Clusterize
                X = tf.constant(X_mat)

                # Reshapes rep_centroids and  rep_points to format N x K x M so that 
                # the 2 matrixes have the same size
                rep_centroids = tf.reshape(tf.tile(global_centroids, [N, 1]), [N, K, M])
                rep_points = tf.reshape(tf.tile(X, [1, K]), [N, K, M])

                # Calculates sum_squares, a matrix of size N x K
                # This matrix is just(X-Y)^2
                dist_to_centers = tf.sqrt( tf.reduce_sum(tf.square(tf.subtract( rep_points, rep_centroids) ), 
                                                         reduction_indices = 2) )
                
                # Calculates cluster_membership, a matrix of size N x K
                tmp = tf.pow(dist_to_centers, -2 / (M - 1))
                cluster_membership_with_nan = tf.div( tf.transpose(tmp), tf.reduce_sum(tmp, 1))
                
                # Error treatment for when there are zeros in count_means_aux
                cluster_membership = tf.where(tf.is_nan(cluster_membership_with_nan), tf.zeros_like(cluster_membership_with_nan), cluster_membership_with_nan);
                
                MU = tf.pow(cluster_membership, M)
                
                # Calculates auxiliar matrixes 
                # Mu_X_sum of size 
                Mu_X_sum = tf.matmul(MU, X)
                Mu_sum = tf.reduce_sum(MU, 1)
                
                partial_Mu_sum_list.append( Mu_sum )
                partial_Mu_X_sum_list.append( Mu_X_sum )
                
    with tf.name_scope('global') :
        with tf.device('/cpu:0') :
            global_Mu_sum = tf.add_n( partial_Mu_sum_list )
            global_Mu_X_sum = tf.transpose(  tf.add_n(partial_Mu_X_sum_list) )
            
            new_centers = tf.transpose( tf.div(global_Mu_X_sum, global_Mu_sum) )
            
            update_centroid = tf.group( global_centroids.assign(new_centers) )
        
    setup_time = float( time.time() - setup_ts )
    initialization_ts = time.time()
    
    sess = tf.Session( config = tf.ConfigProto( log_device_placement = True ) )
    
    init = tf.global_variables_initializer()
    sess.run(init)
    
    initialization_time = float( time.time() - initialization_ts ) 
    
    cost_and_computation_dataframe = pd.DataFrame()
    computation_time = 0.0
    for i in range(n_max_iters):
        aux_ts = time.time()
        [result, _] = sess.run([global_centroids, update_centroid])
        computation_time += float(time.time() - aux_ts)
        
    end_time_result_df = pd.DataFrame()
    end_time_result_df = end_time_result_df.append({    'setup_time'          : setup_time         ,
                                                        'initialization_time' : initialization_time,
                                                        'computation_time'    : computation_time    },
                                                        ignore_index = True)
    
    end_resut = {   'end_center'         : result            ,
                    'init_center'        : initial_centers   ,
                    'end_time_result_df' : end_time_result_df }
    return end_resut

In [None]:
(X, Y) = make_data( n_obs = n_obs,
                        n_dim = n_dim,
                        seed  = seed  )
initial_centers = X[5 : 5+K, :]

plt.scatter(X[1:1000, 0], X[1:1000, 1], alpha = 0.8, c = Y[1:1000], marker = (5, 2))
plt.scatter(initial_centers[:, 0], initial_centers[:, 1], alpha = 1, c = 'red', marker = (5, 3))
plt.show()

result = distribuited_fuzzy_C_means(X, K, GPU_names, initial_centers, n_max_iters)
print('result', result)
centers = result['end_center']

plt.scatter(X[1:1000, 0], X[1:1000, 1], alpha = 0.6, c = Y, marker = (3, 1))
plt.scatter(centers[:, 0], centers[:, 1], alpha = 1, c = 'red', marker = (5, 3))
plt.show()

In [None]:
def distribuited_k_means(X, K, GPU_names, initial_centers, n_max_iters):
    setup_ts = time.time()
    number_of_gpus = len(GPU_names)

    X_list = np.split( X[ (X.shape[0] % number_of_gpus)  :, : ], number_of_gpus )
    
    partial_directions = []
    partial_values = []
    partial_results = []
    #partial_cost = [] ## Commented for performance
    
    with tf.name_scope('global'):
        with tf.device('/cpu:0'):
            global_centroids = tf.Variable(initial_centers)
            
    for GPU_num in range(number_of_gpus):
        GPU_name = GPU_names[GPU_num]
        
        (X_mat) = X_list.pop()
        (N, M) = X_mat.shape
  
        with tf.name_scope('scope_' + str(GPU_num)):
            with tf.device(GPU_name) :
                ####
                # In the coments we denote :
                # => N = Number of Observations
                # => M = Number of Dimensions
                # => K = Number of Centers
                ####
                # Data for GPU GPU_num to Clusterize
                X = tf.constant(X_mat)

                # Reshapes rep_centroids and  rep_points to format N x K x M so that 
                # the 2 matrixes have the same size
                rep_centroids = tf.reshape(tf.tile(global_centroids, [N, 1]), [N, K, M])
                rep_points = tf.reshape(tf.tile(X, [1, K]), [N, K, M])

                # Calculates sum_squares, a matrix of size N x K
                # This matrix is not sqrt((X-Y)^2), it is just(X-Y)^2
                # Since we need just the argmin(sqrt((X-Y)^2)) wich is equal to 
                # argmin((X-Y)^2), it would be a waste of computation
                sum_squares = tf.reduce_sum(tf.square(tf.subtract( rep_points, rep_centroids) ), 
                                                reduction_indices = 2)

                # Use argmin to select the lowest-distance point
                # This gets a matrix of size N x 1
                best_centroids = tf.argmin(sum_squares, axis = 1)

                # This Sums vector X by the best_centroids indexes(assigned clusters)
                # And returns a vector of size K x M
                total_means_aux = tf.unsorted_segment_sum(X, best_centroids, K)

                # This counts how many data points by best_centroids indexes(assigned clusters)
                # And returns a vector of size K x M
                count_means_aux = tf.unsorted_segment_sum(tf.ones_like(X), best_centroids, K)
                
                # Calculates the new Center for this GPU
                # Returns a matrix of size K x M
                means_with_nan = tf.div( total_means_aux, count_means_aux )

                # Error treatment for when there are zeros in count_means_aux
                means = tf.where(tf.is_nan(means_with_nan), tf.zeros_like(means_with_nan), means_with_nan);

                # Cost Function, wihch would used for stopping criteria
                #cost = tf.reduce_sum( tf.reduce_min(sum_squares, axis = 1) ) ## Commented for performance
                #partial_cost.append(cost) ## Commented for performance
                
            with tf.device('/cpu:0'):
                y_count = tf.bincount(tf.to_int32(best_centroids), maxlength = K, minlength = K)
                y_count_float = tf.cast(y_count, dtype = tf.float64)
                partial_mu =  tf.multiply( tf.transpose(means), y_count_float )

                partial_directions.append( y_count_float )
                partial_values.append( partial_mu )
                
    with tf.name_scope('global') :
        with tf.device('/cpu:0') :
            sum_direction = tf.add_n( partial_directions )
            sum_mu = tf.add_n( partial_values )
            #total_cost = tf.add_n( partial_cost )

            rep_sum_direction = tf.reshape(tf.tile(sum_direction, [M]), [M, K])
            new_centers = tf.transpose( tf.div(sum_mu, rep_sum_direction) )

            update_centroid = tf.group( global_centroids.assign(new_centers) )
        
    setup_time = float( time.time() - setup_ts )
    initialization_ts = time.time()
    
    sess = tf.Session( config = tf.ConfigProto( log_device_placement = True ) )
    
    init = tf.global_variables_initializer()
    sess.run(init)
    
    initialization_time = float( time.time() - initialization_ts ) 
    
    computation_time = 0.0
    for i in range(n_max_iters):
        aux_ts = time.time()
        [result, _] = sess.run([global_centroids, update_centroid])
        computation_time += float(time.time() - aux_ts)
    
    end_resut = {   'end_center'          : result             ,
                    'init_center'         : initial_centers    ,
                    'setup_time'          : setup_time         ,
                    'initialization_time' : initialization_time,
                    'computation_time'    : computation_time   ,
                    'n_iter'              : i
                }

    return end_resut


In [None]:
(X, Y) = make_data( n_obs = n_obs,
                        n_dim = n_dim,
                        seed  = seed  )
initial_centers = X[5 : 5+K, :]

plt.scatter(X[:, 0], X[:, 1], alpha = 0.6, c = Y, marker = (3, 1))
plt.scatter(initial_centers[:, 0], initial_centers[:, 1], alpha = 1, c = 'red', marker = (5, 3))
plt.show()

result = distribuited_k_means(X, K, GPU_names, initial_centers, n_max_iters)

centers = result['end_center']

plt.scatter(X[:, 0], X[:, 1], alpha = 0.6, c = Y, marker = (3, 1))
plt.scatter(centers[:, 0], centers[:, 1], alpha = 1, c = 'red', marker = (5, 3))
plt.show()