## Import necessary Libraries ##

In [1]:
#import necessary libraries 
import tensorflow as tf 
import numpy as np 
from datetime import datetime 
import shutil
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt 

from tensorflow import data
from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib.learn import make_export_strategy

  from ._conv import register_converters as _register_converters


## Data Preprocessing and Defining Parameters##

In [2]:
#read the dataset
train_filename = ["dummy_dataset.csv"]
test_filename = ["dummy_dataset_test.csv"]

model_name = "cluster-01"

resume = False
train = True

In [3]:
#print column values
HEADER = ['Unamed:0','Frequency','Recency', 'Monetary']
HEADER_DEFAULTS = [[0],[0.0],[0.0],[0.0]]
FEATURE_NAMES = ['Frequency','Monetary','Recency']
UNUSED_FEATURE_NAMES = list(set(HEADER) - set(FEATURE_NAMES))

print("Input features we have selected:{features}"
		.format(features=FEATURE_NAMES))
print("Unused Features:{}".format(UNUSED_FEATURE_NAMES))

Input features we have selected:['Frequency', 'Monetary', 'Recency']
Unused Features:['Unamed:0']


### a. Parsing and Pre-processing Logic###

In [4]:
#parsing and preprocessing logic
def parse_csv_row(csv_row):
	#decode csv, convert dataset into tensor
	columns = tf.decode_csv(csv_row, record_defaults=HEADER_DEFAULTS)
	columns = [tf.expand_dims(tensor, -1) for tensor in columns]
	features = dict(zip(HEADER, columns))

	for column in UNUSED_FEATURE_NAMES:
		features.pop(column)

	return features

def process_features(features):

	if process_features:
		features = features

	return features


### b. Data Pipeling input Function ###

In [7]:
#data pipeline input function
def csv_input_fn(file_names, mode=tf.estimator.ModeKeys.TRAIN,
				skip_header_lines = 0,
				num_epochs=None,
				batch_size=200):

		shuffle = False
		print("Data Input Function")
		print("=====================")
		print("Batch_Size:{}".format(batch_size))
		print("Epoch Count:{}".format(num_epochs))
		print("Shuffle:{}".format(shuffle))
		print("============================")

		dataset = data. TextLineDataset(filenames= train_filename)
		dataset = dataset.skip(skip_header_lines)

		if shuffle:
			dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)

		dataset = dataset.batch(batch_size)
		dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row))
		dataset = dataset.map(lambda features: process_features(features))

		dataset = dataset.repeat(num_epochs)
		iterator = dataset.make_one_shot_iterator()

		features = iterator.get_next()

		return features, None

In [8]:
features, _ = csv_input_fn(file_names = train_filename)
print("Features read from CSV:{}".format(list(features.keys())))

Data Input Function
Batch_Size:200
Epoch Count:None
Shuffle:False
Features read from CSV:['Monetary', 'Frequency', 'Recency']


### Build an Estimator ###

In [9]:
#build an estimator
def create_estimator(run_config, hparams):
	estimator = tf.contrib.learn.KMeansClustering(
        num_clusters = hparams.num_clusters,
        initial_clusters= tf.contrib.factorization.RANDOM_INIT,
        distance_metric= tf.contrib.factorization.SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=True,
        mini_batch_steps_per_iteration=1,
        kmeans_plus_plus_num_retries=10,
        relative_tolerance=None,
        config= run_config
    )

	print("")
	print("Estimator Type:{}".format(type(estimator)))

	return estimator

## Run a Experiment ##

### a. create a Serving function ###

In [115]:
def csv_serving_input_fn():
    
    SERVING_HEADER = ['renancy','freq','monetary']
    SERVING_HEADER_DEFAULTS = [[0.0],[0.0],[0.0]]
    
    rows_string_tensor = tf.placeholder(dtype=tf.string,
                                        shape=[None],
                                        name="csv_rows")
    
    receive_tensor = {'csv_rows':rows_string_tensor}
    
    row_columns = tf.expand_dims(rows_string_tensor, -1)
    columns = tf.decode_csv(row_columns, record_defaults=SERVING_HEADER_DEFAULTS)
    columns = [tf.expand_dims(tensor, -1) for tensor in columns]
    features = dict(zip(SERVING_HEADER, columns))
    
    
    return tf.contrib.learn.InputFnOps(
        process_features(features),
        None,
        receive_tensor
    )

### b. Create Experiment Function ###

In [102]:
def generate_experiment_fn(**experiment_args):
    
    def _experiment_fn(run_config, hparams):
        
        train_input_fn = lambda: csv_input_fn(
            train_filename,
            mode = tf.contrib.learn.ModeKeys.TRAIN,
            num_epochs = hparams.num_epochs,
            batch_size = hparams.batch_size*10
        )
        
        eval_input_fn = lambda: csv_input_fn(
            train_filenames,
            mode = tf.contrib.learn.ModeKeys.EVAL,
            num_epochs=1,
            batch_size=hparams.batch_size
        )
        
        estimator = create_estimator(run_config, hparams)
        
        return tf.contrib.learn.Experiment(
            estimator,
            train_input_fn = train_input_fn,
            eval_input_fn = eval_input_fn,
            eval_steps = None,
            **experiment_args
        )
    
    return _experiment_fn

## creating Hyperparameter Tuning ##

In [10]:
#set HParam and RunConfig
hparams = tf.contrib.training.HParams(
	num_epochs=1000,
	batch_size=500,
	num_clusters=3)

model_dir = "trained_models/{}".format(model_name)

run_config = tf.contrib.learn.RunConfig(
	save_checkpoints_steps=100,
	tf_random_seed=100000,
	model_dir = model_dir)

print("Model is Stored in Directory:{}".format(run_config.model_dir))

Instructions for updating:
When switching to tf.estimator.Estimator, use tf.estimator.RunConfig instead.
Model is Stored in Directory:trained_models/cluster-01


### d.Run Experiement ###

In [103]:
if not resume:
	print("Removing Previous Artifacts....")
	shutil.rmtree(model_dir, ignore_errors=True)
else:
	print("Resuming Training....")


if train:
	tf.logging.set_verbosity(tf.logging.INFO)
	time_start = datetime.utcnow()
	print("Training Started at {}".format(time_start.strftime("%H:%M:%S")))
	print(".......................................")

	learn_runner.run(
        experiment_fn = generate_experiment_fn(
            
            export_strategies=[make_export_strategy(
                csv_serving_input_fn,
                exports_to_keep =1
            )]
        ), #not executing export_savedmodel()
        run_config = run_config,
        schedule="train",
        hparams=hparams
    ) 

	time_end = datetime.utcnow()
	print(".......................................")
	print("Training Finished at {}".format(time_end. strftime("%H:%M:%S")))
	print("")

	time_elapsed = time_end - time_start
	print("Training elapsed time:{} Seconds".format(time_elapsed.total_seconds()))

Removing Previous Artifacts....
Training Started at 12:37:18
.......................................
Instructions for updating:
Switch to tf.estimator.Exporter and associated utilities.
Instructions for updating:
Please switch to tf.estimator.train_and_evaluate, and use tf.estimator.Exporter.
Instructions for updating:
Use tf.estimator.train_and_evaluate.
INFO:tensorflow:Using config: {'_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_is_chief': True, '_save_checkpoints_secs': None, '_task_type': None, '_train_distribute': None, '_master': '', '_model_dir': 'trained_models/cluster-01', '_save_checkpoints_steps': 100, '_evaluation_master': '', '_task_id': 0, '_environment': 'local', '_log_step_count_steps': 100, '_num_worker_replicas': 0, '_tf_random_seed': 100000, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_num_ps_replicas': 0, '_session_config': None, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec

INFO:tensorflow:global_step/sec: 73.3561
INFO:tensorflow:loss = 60110.855, step = 2901 (1.364 sec)
INFO:tensorflow:Saving checkpoints for 3000 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 67.0894
INFO:tensorflow:loss = 59812.46, step = 3001 (1.490 sec)
INFO:tensorflow:Saving checkpoints for 3100 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 73.7998
INFO:tensorflow:loss = 60108.535, step = 3101 (1.356 sec)
INFO:tensorflow:Saving checkpoints for 3200 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 69.3259
INFO:tensorflow:loss = 59810.34, step = 3201 (1.442 sec)
INFO:tensorflow:Saving checkpoints for 3300 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 75.1824
INFO:tensorflow:loss = 60106.6, step = 3301 (1.331 sec)
INFO:tensorflow:Saving checkpoints for 3400 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 69.5359
INFO:tensorflow:loss = 59808.234,

INFO:tensorflow:loss = 60085.77, step = 7301 (1.538 sec)
INFO:tensorflow:Saving checkpoints for 7400 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 72.1873
INFO:tensorflow:loss = 59784.85, step = 7401 (1.386 sec)
INFO:tensorflow:Saving checkpoints for 7500 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 77.6063
INFO:tensorflow:loss = 60084.42, step = 7501 (1.289 sec)
INFO:tensorflow:Saving checkpoints for 7600 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 74.3825
INFO:tensorflow:loss = 59783.406, step = 7601 (1.342 sec)
INFO:tensorflow:Saving checkpoints for 7700 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 72.7634
INFO:tensorflow:loss = 60083.203, step = 7701 (1.374 sec)
INFO:tensorflow:Saving checkpoints for 7800 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 78.0887
INFO:tensorflow:loss = 59782.008, step = 7801 (1.282 sec)
INFO:tensorflow

## Perform Predictions ##

In [107]:
#perform predictions
train_input_fn = lambda: csv_input_fn(
	train_filename,
	num_epochs=1,
	batch_size=1500)

test_input_fn = lambda: csv_input_fn(
    test_filename,
    num_epochs=1,
    batch_size = 500
    )

estimator = create_estimator(run_config, hparams)


train_assignments = list(estimator.predict_cluster_idx(input_fn=train_input_fn))
test_assignments = list(estimator.predict_cluster_idx(input_fn=test_input_fn))

INFO:tensorflow:Using config: {'_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_is_chief': True, '_save_checkpoints_secs': None, '_task_type': None, '_train_distribute': None, '_master': '', '_model_dir': 'trained_models/cluster-01', '_save_checkpoints_steps': 100, '_evaluation_master': '', '_task_id': 0, '_environment': 'local', '_log_step_count_steps': 100, '_num_worker_replicas': 0, '_tf_random_seed': 100000, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_num_ps_replicas': 0, '_session_config': None, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001F1C017C780>, '_keep_checkpoint_max': 5}

Estimator Type:<class 'tensorflow.contrib.learn.python.learn.estimators.kmeans.KMeansClustering'>
Data Input Function
Batch_Size:1500
Epoch Count:1
Shuffle:False
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from trained_models/cluster-01\model.ckpt-8000
INFO:tensorflow

In [108]:
#print cluster centroids
clusters = estimator.clusters()
print("Cluster Centroids:")
print("=====================")
print(clusters)

Cluster Centroids:
[[2.3320048 5.58356   4.7135463]
 [6.9585557 7.193507  5.0743465]
 [6.0553403 2.397514  5.261766 ]]


## Serving via the Saved model ##

In [120]:
export_dir  = model_dir + "/export"

estimator.export_savedmodel(
    export_dir_base = export_dir,
    serving_input_fn = csv_serving_input_fn,
    as_text=False
)

INFO:tensorflow:Restoring parameters from trained_models/cluster-01\model.ckpt-8000
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: trained_models/cluster-01/export\temp-1534856415\saved_model.pb


b'trained_models/cluster-01/export\\1534856415'

## Now Predict the cluster to the Test Data in Saved Model ##

In [117]:
features

{'Frequency': <tf.Tensor 'IteratorGetNext:0' shape=(?, 1) dtype=float32>,
 'Monetary': <tf.Tensor 'IteratorGetNext:1' shape=(?, 1) dtype=float32>,
 'Recency': <tf.Tensor 'IteratorGetNext:2' shape=(?, 1) dtype=float32>}

In [None]:
from tensorflow.contrib import predictor

predict_fn = predictor.from_saved_model(export_dir)
predictions = predict_fn(
    {"x": [[6.4, 3.2, 4.5, 1.5],
           [5.8, 3.1, 5.0, 1.7]]})
print(predictions['scores'])


'renancy','freq','monetary'

In [122]:
from tensorflow.contrib import predictor

path_to_pb = "trained_models/cluster-01/export/1534856415"
predict_fn = predictor.from_saved_model(path_to_pb)
predictions = predict_fn({
        "renancy":[5.0,4.0,6.0,7.0],
        "freq":[7.0,3.0,6.0,8,0],
        "monetary":[8.0,4.0,7.0,5.0]
    })

print(predictions)

INFO:tensorflow:Restoring parameters from trained_models/cluster-01/export/1534856415\variables\variables


ValueError: Got unexpected keys in input_dict: {'freq', 'monetary', 'renancy'}
expected: {'csv_rows'}