## Import necessary Libraries ##

In [72]:
#import necessary libraries 
import tensorflow as tf 
import numpy as np 
from datetime import datetime 
import shutil

from tensorflow import data
from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib.learn import make_export_strategy

## Data Preprocessing and Defining Parameters##

In [73]:
#read the dataset
train_filename = ["dummy_dataset.csv"]
test_filename = ["dummy_dataset_test.csv"]

model_name = "cluster-01"

resume = False
train = True

In [74]:
#print column values
HEADER = ['Unamed:0','Frequency','Recency', 'Monetary']
HEADER_DEFAULTS = [[0],[0.0],[0.0],[0.0]]
FEATURE_NAMES = ['Frequency','Monetary','Recency']
UNUSED_FEATURE_NAMES = list(set(HEADER) - set(FEATURE_NAMES))

print("Input features we have selected:{features}"
		.format(features=FEATURE_NAMES))
print("Unused Features:{}".format(UNUSED_FEATURE_NAMES))

Input features we have selected:['Frequency', 'Monetary', 'Recency']
Unused Features:['Unamed:0']


### a. Parsing and Pre-processing Logic###

In [75]:
#parsing and preprocessing logic
def parse_csv_row(csv_row):
	#decode csv, convert dataset into tensor
	columns = tf.decode_csv(csv_row, record_defaults=HEADER_DEFAULTS)
	columns = [tf.expand_dims(tensor, -1) for tensor in columns]
	features = dict(zip(HEADER, columns))

	for column in UNUSED_FEATURE_NAMES:
		features.pop(column)

	return features

def process_features(features):

	if process_features:
		features = features

	return features


### b. Data Pipeling input Function ###

In [76]:
#data pipeline input function
def csv_input_fn(file_names, mode=tf.estimator.ModeKeys.TRAIN,
				skip_header_lines = 0,
				num_epochs=None,
				batch_size=200):

		shuffle = False
		print("Data Input Function")
		print("=====================")
		print("Batch_Size:{}".format(batch_size))
		print("Epoch Count:{}".format(num_epochs))
		print("Shuffle:{}".format(shuffle))
		print("============================")

		dataset = data. TextLineDataset(filenames= train_filename)
		dataset = dataset.skip(skip_header_lines)

		if shuffle:
			dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)

		dataset = dataset.batch(batch_size)
		dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row))
		dataset = dataset.map(lambda features: process_features(features))

		dataset = dataset.repeat(num_epochs)
		iterator = dataset.make_one_shot_iterator()

		features = iterator.get_next()

		return features, None

In [77]:
features, _ = csv_input_fn(file_names = train_filename)
print("Features read from CSV:{}".format(list(features.keys())))

Data Input Function
Batch_Size:200
Epoch Count:None
Shuffle:False
Features read from CSV:['Monetary', 'Frequency', 'Recency']


### Build an Estimator ###

In [78]:
#build an estimator
def create_estimator(run_config, hparams):
	estimator = tf.contrib.learn.KMeansClustering(
        num_clusters = hparams.num_clusters,
        initial_clusters= tf.contrib.factorization.RANDOM_INIT,
        distance_metric= tf.contrib.factorization.SQUARED_EUCLIDEAN_DISTANCE,
        use_mini_batch=True,
        mini_batch_steps_per_iteration=1,
        kmeans_plus_plus_num_retries=10,
        relative_tolerance=None,
        config= run_config
    )

	print("")
	print("Estimator Type:{}".format(type(estimator)))

	return estimator

## Run a Experiment ##

### a. create a Serving function ###

In [79]:
def csv_serving_input_fn():
    
    SERVING_HEADER = ['renancy','freq','monetary']
    SERVING_HEADER_DEFAULTS = [[0.0],[0.0],[0.0]]
    
    #shape=(?,), dtype=string
    rows_string_tensor = tf.placeholder(dtype=tf.string,
                                        shape=[None],
                                        name="csv_rows")
    
    #feeding rows_string_tensor value in the dictionary
    receive_tensor = {'csv_rows':rows_string_tensor}
    
    #shape=(?,1), dtype=string
    row_columns = tf.expand_dims(rows_string_tensor, -1)
    
    #<tf.Tensor 'DecodeCSV:0' shape=(?,1) dtype=float32>,<tf.Tensor 'DecodeCSV:1' shape=(?,1) dtype=float32>
    #<tf.Tensor 'DecodeCSV:2' shape=(?,1) dtype=float32>
    columns = tf.decode_csv(row_columns, record_defaults=SERVING_HEADER_DEFAULTS)
    
    #<tf.Tensor 'Expand_dims_1:0' shape=(?,1,1) dtype=float32>,<tf.Tensor 'Expand_dims_2:0' shape=(?,1,1) dtype=float32>
    #<tf.Tensor 'Expand_dims_3:0' shape=(?,1,1) dtype=float32>
    columns = [tf.expand_dims(tensor, -1) for tensor in columns]
    
    #{"renancy":<tf.Tensor 'Expand_dims_1:0' shape=(?,1,1) dtype=float32>,
    #"freq":<tf.Tensor 'Expand_dims_2:0' shape=(?,1,1) dtype=float32> 
    #"monetary":<tf.Tensor 'Expand_dims_1:0' shape=(?,1,1) dtype=float32>}
    features = dict(zip(SERVING_HEADER, columns))
    
    
    #InputFnOps(features=None, labels=None, default_inputs={'csv_rows':<tf.Tensor 'csv_rows:0' shape=(?,) dtype=string>})
    return tf.contrib.learn.InputFnOps(
        process_features(features),
        None,
        receive_tensor
    )

## b. creating a Serve Input Function in Updated Function ##

In [87]:
def csv_serving_input_fn_vtwo():
    feature_placeholders = {
        'renancy': tf.placeholder(tf.float32, [None]),
        'freq': tf.placeholder(tf.float32, [None]),
        'monetary': tf.placeholder(tf.float32, [None])
    }
    features = feature_placeholders
    
    return tf.estimator.export.ServingInputReceiver(features,
                                                    feature_placeholders)

### b. Create Experiment Function ###

In [80]:
def generate_experiment_fn(**experiment_args):
    
    def _experiment_fn(run_config, hparams):
        
        train_input_fn = lambda: csv_input_fn(
            train_filename,
            mode = tf.contrib.learn.ModeKeys.TRAIN,
            num_epochs = hparams.num_epochs,
            batch_size = hparams.batch_size*10
        )
        
        eval_input_fn = lambda: csv_input_fn(
            train_filenames,
            mode = tf.contrib.learn.ModeKeys.EVAL,
            num_epochs=1,
            batch_size=hparams.batch_size
        )
        
        estimator = create_estimator(run_config, hparams)
        
        return tf.contrib.learn.Experiment(
            estimator,
            train_input_fn = train_input_fn,
            eval_input_fn = eval_input_fn,
            eval_steps = None,
            **experiment_args
        )
    
    return _experiment_fn

## creating Hyperparameter Tuning ##

In [81]:
#set HParam and RunConfig
hparams = tf.contrib.training.HParams(
	num_epochs=1000,
	batch_size=500,
	num_clusters=3)

model_dir = "trained_models/{}".format(model_name)

run_config = tf.contrib.learn.RunConfig(
	save_checkpoints_steps=100,
	tf_random_seed=100000,
	model_dir = model_dir)

print("Model is Stored in Directory:{}".format(run_config.model_dir))

Model is Stored in Directory:trained_models/cluster-01


### d.Run Experiement ###

In [90]:
if not resume:
	print("Removing Previous Artifacts....")
	shutil.rmtree(model_dir, ignore_errors=True)
else:
	print("Resuming Training....")


if train:
	tf.logging.set_verbosity(tf.logging.INFO)
	time_start = datetime.utcnow()
	print("Training Started at {}".format(time_start.strftime("%H:%M:%S")))
	print(".......................................")

	learn_runner.run(
        experiment_fn = generate_experiment_fn(
            
            export_strategies=[make_export_strategy(
               csv_serving_input_fn_vtwo,
                exports_to_keep =1
            )]
        ), #not executing export_savedmodel()
        run_config = run_config,
        schedule="train",
        hparams=hparams
    ) 

	time_end = datetime.utcnow()
	print(".......................................")
	print("Training Finished at {}".format(time_end. strftime("%H:%M:%S")))
	print("")

	time_elapsed = time_end - time_start
	print("Training elapsed time:{} Seconds".format(time_elapsed.total_seconds()))

Removing Previous Artifacts....
Training Started at 11:39:36
.......................................
INFO:tensorflow:Using config: {'_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_log_step_count_steps': 100, '_model_dir': 'trained_models/cluster-01', '_environment': 'local', '_master': '', '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_is_chief': True, '_save_checkpoints_steps': 100, '_num_ps_replicas': 0, '_train_distribute': None, '_task_type': None, '_task_id': 0, '_tf_random_seed': 100000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000015F2F6C54A8>, '_num_worker_replicas': 0, '_evaluation_master': '', '_device_fn': None, '_session_config': None, '_save_checkpoints_secs': None, '_keep_checkpoint_every_n_hours': 10000}

Estimator Type:<class 'tensorflow.contrib.learn.python.learn.estimators.kmeans.KMeansClustering'>
Data Input Function
Batch_Size:5000
Epoch Count:1000
Shuffle:False
INFO:tensorflow:Create Checkpoin

INFO:tensorflow:global_step/sec: 29.6546
INFO:tensorflow:loss = 59806.348, step = 3601 (3.373 sec)
INFO:tensorflow:Saving checkpoints for 3700 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 28.3113
INFO:tensorflow:loss = 60103.17, step = 3701 (3.532 sec)
INFO:tensorflow:Saving checkpoints for 3800 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 31.6638
INFO:tensorflow:loss = 59804.734, step = 3801 (3.157 sec)
INFO:tensorflow:Saving checkpoints for 3900 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 31.511
INFO:tensorflow:loss = 60101.83, step = 3901 (3.175 sec)
INFO:tensorflow:Saving checkpoints for 4000 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 28.4478
INFO:tensorflow:loss = 59803.36, step = 4001 (3.515 sec)
INFO:tensorflow:Saving checkpoints for 4100 into trained_models/cluster-01\model.ckpt.
INFO:tensorflow:global_step/sec: 29.7502
INFO:tensorflow:loss = 60100.71, 

.......................................
Training Finished at 11:43:54

Training elapsed time:257.488087 Seconds


## Perform Predictions ##

In [91]:
#perform predictions
train_input_fn = lambda: csv_input_fn(
	train_filename,
	num_epochs=1,
	batch_size=1500)

test_input_fn = lambda: csv_input_fn(
    test_filename,
    num_epochs=1,
    batch_size = 500
    )

estimator = create_estimator(run_config, hparams)


train_assignments = list(estimator.predict_cluster_idx(input_fn=train_input_fn))
test_assignments = list(estimator.predict_cluster_idx(input_fn=test_input_fn))

INFO:tensorflow:Using config: {'_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_log_step_count_steps': 100, '_model_dir': 'trained_models/cluster-01', '_environment': 'local', '_master': '', '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_is_chief': True, '_save_checkpoints_steps': 100, '_num_ps_replicas': 0, '_train_distribute': None, '_task_type': None, '_task_id': 0, '_tf_random_seed': 100000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000015F2F6C54A8>, '_num_worker_replicas': 0, '_evaluation_master': '', '_device_fn': None, '_session_config': None, '_save_checkpoints_secs': None, '_keep_checkpoint_every_n_hours': 10000}

Estimator Type:<class 'tensorflow.contrib.learn.python.learn.estimators.kmeans.KMeansClustering'>
Data Input Function
Batch_Size:1500
Epoch Count:1
Shuffle:False
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from trained_models/cluster-01\model.ckpt-8000
INFO:tensorflow

In [84]:
#print cluster centroids
clusters = estimator.clusters()
print("Cluster Centroids:")
print("=====================")
print(clusters)

Cluster Centroids:
[[2.3320048 5.58356   4.7135463]
 [6.9585557 7.193507  5.0743465]
 [6.0553403 2.397514  5.261766 ]]


## Serving via the Saved model ##

In [86]:
"""
estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn,
                            strip_default_attrs=True)
"""

export_dir = model_dir + "/export"

estimator.export_savedmodel(
    export_dir_base = export_dir,
    serving_input_fn=csv_serving_input_fn,
    as_text=False
)

Instructions for updating:
Switch to tf.estimator.Exporter and associated utilities.
Instructions for updating:
Switch to tf.estimator.Exporter and associated utilities.
Instructions for updating:
Switch to tf.estimator.Exporter and associated utilities.
INFO:tensorflow:Restoring parameters from trained_models/cluster-01\model.ckpt-8000
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: trained_models/cluster-01/export\temp-1535367508\saved_model.pb


b'trained_models/cluster-01/export\\1535367508'

In [93]:
estimator.export_savedmodel(export_dir, csv_serving_input_fn_vtwo)

ValueError: too many values to unpack (expected 2)

## Now Predict the cluster to the Test Data in Saved Model ##

In [117]:
features

{'Frequency': <tf.Tensor 'IteratorGetNext:0' shape=(?, 1) dtype=float32>,
 'Monetary': <tf.Tensor 'IteratorGetNext:1' shape=(?, 1) dtype=float32>,
 'Recency': <tf.Tensor 'IteratorGetNext:2' shape=(?, 1) dtype=float32>}

In [9]:
%%bash

MODEL_NAME = 'cluster-01'
LAST = $(ls trained_models/${MODEL_NAME}/export | tail -1)

Couldn't find program: 'bash'


In [None]:
!gcloud init