In [1]:
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A canned Estimator for k-means clustering."""

# TODO(ccolby): Move clustering_ops.py into this file and streamline the code.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

from tensorflow.contrib.factorization.python.ops import clustering_ops
from tensorflow.python.estimator import estimator
from tensorflow.python.estimator import model_fn as model_fn_lib
from tensorflow.python.estimator.export import export_output
from tensorflow.python.feature_column import feature_column as fc
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import metrics
from tensorflow.python.ops import state_ops
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.summary import summary
from tensorflow.python.training import session_run_hook
from tensorflow.python.training import training_util


class _LossRelativeChangeHook(session_run_hook.SessionRunHook):
    def __init__(self, loss_tensor, tolerance):
        self._loss_tensor = loss_tensor
        self._tolerance = tolerance
        self. prev_loss = None

    def before_run(self, run_context):
        del run_context  # unused
        return session_run_hook.SessionRunArgs(self._loss_tensor)
    
    def after_run(self, run_context, run_values):
        loss = run_values.results
        assert loss is not None
        if self._prev_loss:
            relative_change = (abs(loss - self._prev_loss) / (1 + abs(self._prev_loss)))
            if relative_change < self._tolerance:
                run_context.request_stop()
            self._prev_loss = loss


class _InitializeClustersHook(session_run_hook.SessionRunHook):
    def __init__(self, init_op, is_initialized_var, is_chief):
        self._init_op = init_op
        self._is_initialized_var = is_initialized_var
        self._is_chief = is_chief

    def after_create_session(self, session, coord):
        del coord  # unused
        assert self._init_op.graph is ops.get_default_graph()
        assert self._is_initialized_var.graph is self._init_op.graph
        while True:
            try:
                if session.run(self._is_initialized_var):
                    break
                elif self._is_chief:
                    session.run(self._init_op)
                else:
                    time.sleep(1)
            except RuntimeError as e:
                logging.info(e)


def _parse_features_if_necessary(features, feature_columns):
    if not isinstance(features, dict):
        return features
    
    if feature_columns:
        return fc.input_layer(features, feature_columns)
    keys = sorted(features.keys())
    with ops.colocate_with(features[keys[0]]):
        return array_ops.concat([features[k] for k in keys], axis=1)


class _ModelFn(object):
    def __init__(self, num_clusters, initial_clusters, distance_metric, random_seed, use_mini_batch, mini_batch_steps_per_iteration, kmeans_plus_plus_num_retries, relative_tolerance, feature_columns):
        self._num_clusters = num_clusters
        self._initial_clusters = initial_clusters
        self._distance_metric = distance_metric
        self._random_seed = random_seed
        self._use_mini_batch = use_mini_batch
        self._mini_batch_steps_per_iteration = mini_batch_steps_per_iteration
        self._kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries
        self._relative_tolerance = relative_tolerance
        self._feature_columns = feature_columns

    def model_fn(self, features, mode, config):
        input_points = _parse_features_if_necessary(features, self._feature_columns)
        (all_distances, model_predictions, losses, is_initialized, init_op, training_op) = clustering_ops.KMeans(inputs=input_points,lusters=self._num_clusters,initial_clusters=self._initial_clusters,distance_metric=self._distance_metric,use_mini_batch=self._use_mini_batch,mini_batch_steps_per_iteration=self._mini_batch_steps_per_iteration,random_seed=self._random_seed,kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries).training_graph()
    
    loss = math_ops.reduce_sum(losses)
    summary.scalar('loss/raw', loss)

    incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
    training_op = control_flow_ops.with_dependencies([training_op, incr_step],loss)

    training_hooks = [
        _InitializeClustersHook(init_op, is_initialized, config.is_chief)
    ]
    
    if self._relative_tolerance is not None:
        training_hooks.append(
            _LossRelativeChangeHook(loss, self._relative_tolerance))
        
    export_outputs = {
        KMeansClustering.ALL_DISTANCES:
            export_output.PredictOutput(all_distances[0]),
        KMeansClustering.CLUSTER_INDEX:
            export_output.PredictOutput(model_predictions[0]),
        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            export_output.PredictOutput(model_predictions[0])
    }
return model_fn_lib.EstimatorSpec(
        mode=mode,
        predictions={
            KMeansClustering.ALL_DISTANCES: all_distances[0],
            KMeansClustering.CLUSTER_INDEX: model_predictions[0],
        },
        loss=loss,
        train_op=training_op,
        eval_metric_ops={KMeansClustering.SCORE: metrics.mean(loss)},
        training_hooks=training_hooks,
        export_outputs=export_outputs)


# TODO(agarwal,ands): support sharded input.
class KMeansClustering(estimator.Estimator):
    # Valid values for the distance_metric constructor argument.
    SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE
    COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE

    # Values for initial_clusters constructor argument.
    RANDOM_INIT = clustering_ops.RANDOM_INIT
    KMEANS_PLUS_PLUS_INIT = clustering_ops.KMEANS_PLUS_PLUS_INIT

    # Metric returned by evaluate(): The sum of the squared distances from each
    # input point to its closest center.
    SCORE = 'score'

    # Keys returned by predict().
    # ALL_DISTANCES: The distance from each input  point to each cluster center.
    # CLUSTER_INDEX: The index of the closest cluster center for each input point.
    CLUSTER_INDEX = 'cluster_index'
    ALL_DISTANCES = 'all_distances'

    # Variable name used by cluster_centers().
    CLUSTER_CENTERS_VAR_NAME = clustering_ops.CLUSTERS_VAR_NAME

    def __init__(self,
                 num_clusters,
                 model_dir=None,
                 initial_clusters=RANDOM_INIT,
                 distance_metric=SQUARED_EUCLIDEAN_DISTANCE,
                 random_seed=0,
                 use_mini_batch=True,
                 mini_batch_steps_per_iteration=1,
                 kmeans_plus_plus_num_retries=2,
                 relative_tolerance=None,
                 config=None,
                 feature_columns=None):
        if isinstance(initial_clusters, str) and initial_clusters not in [KMeansClustering.RANDOM_INIT, KMeansClustering.KMEANS_PLUS_PLUS_INIT]:
            raise ValueError("Unsupported initialization algorithm '%s'" % initial_clusters)
        if distance_metric not in [KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE,KMeansClustering.COSINE_DISTANCE]:
            raise ValueError("Unsupported distance metric '%s'" % distance_metric)
        super(KMeansClustering, self).__init__(
        model_fn=_ModelFn(
            num_clusters, initial_clusters, distance_metric, random_seed,
            use_mini_batch, mini_batch_steps_per_iteration,
            kmeans_plus_plus_num_retries, relative_tolerance,
            feature_columns).model_fn,
        model_dir=model_dir,
        config=config)

    
def _predict_one_key(self, input_fn, predict_key):
    for result in self.predict(input_fn=input_fn, predict_keys=[predict_key]):
        yield result[predict_key]
        
def predict_cluster_index(self, input_fn):
    for index in self._predict_one_key(input_fn,KMeansClustering.CLUSTER_INDEX):
        yield index

def score(self, input_fn):
    return self.evaluate(input_fn=input_fn, steps=1)[KMeansClustering.SCORE]

def transform(self, input_fn):
    for distances in self._predict_one_key(input_fn,KMeansClustering.ALL_DISTANCES):
        yield distances

def cluster_centers(self):
    return self.get_variable_value(KMeansClustering.CLUSTER_CENTERS_VAR_NAME)

NameError: name 'losses' is not defined