In [None]:
# %load regressor.py
import numpy as np
import tensorflow as tf
import itertools
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn import preprocessing
import math as math


tf.logging.set_verbosity(tf.logging.INFO)

# Learning rate for the model
LEARNING_RATE = 0.01

TRAIN_STEPS = 40000

FILE_PATH = "../output/trainFeatures.csv"
PRED_PATH = "../output/predictions.csv"
SAVE_PATH = "temp"
COLUMNS = ["dist", "elev", "hilly", "cs", "atl", "ctl", "time"]
FEATURES = ["dist", "elev", "hilly", "cs", "atl", "ctl"]
# FEATURES = ["dist", "elev"]
LABEL = "time"

def plotStatistics():
	training_set, test_set, prediction_set = loadData()
	# training_set, test_set, prediction_set = normalize(training_set, test_set, prediction_set)
	dataset = pd.concat([training_set, test_set])
	# training_set.hist()


	# dataset.plot(kind='density', subplots=True, layout=(3,3), sharex=False)

	# pd.plotting.scatter_matrix(dataset)

	cax = plt.matshow(dataset.corr(), vmin=-1, vmax=1)
	plt.colorbar(cax)
	locs, labs = plt.xticks()
	plt.xticks(locs[1:-1], COLUMNS)
	plt.yticks(locs[1:-1], COLUMNS)


	# plt.scatter(dataset['dist'], dataset['CS'])


	plt.show()

def clearOldFiles():
	if tf.gfile.Exists(SAVE_PATH):
   		tf.gfile.DeleteRecursively(SAVE_PATH) 

def normalize(train, test, pred):
	label_train = train[LABEL]
	label_test = test[LABEL]

	# mean, std = train[FEATURES].mean(axis=0), train[FEATURES].std(axis=0, ddof=0)
	# train = (train[FEATURES] - mean) /std
	# test = (test[FEATURES] - mean) / std
	# train = pd.concat([X_train, label_train], axis=1)
	# test = pd.concat([X_test, label_test], axis=1)

	# minmax_scale = preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit(train[FEATURES])
	# train[FEATURES] = minmax_scale.transform(train[FEATURES])
	# test[FEATURES] = minmax_scale.transform(test[FEATURES])
	# pred[FEATURES] = minmax_scale.transform(pred[FEATURES])

	std_scale = preprocessing.StandardScaler().fit(train[FEATURES])
	train[FEATURES] = std_scale.transform(train[FEATURES])
	test[FEATURES] = std_scale.transform(test[FEATURES])
	pred[FEATURES] = std_scale.transform(pred[FEATURES])

	

	# print(train)
	return train, test, pred


def get_input_fn(data_set, num_epochs=None, shuffle=True):
	return tf.estimator.inputs.pandas_input_fn(x=pd.DataFrame({k: data_set[k].values for k in FEATURES}), 
  		y = pd.Series(data_set[LABEL].values), num_epochs=num_epochs, shuffle=shuffle)


def loadData():
	

	data = pd.read_csv(FILE_PATH, skipinitialspace=True, skiprows=1, names=COLUMNS)
	predData = pd.read_csv(PRED_PATH, skipinitialspace=True, skiprows=1, names=COLUMNS)
	# data = data.sample(frac=1).reset_index(drop=True)
	numRows = len(data.index)
	train_rows = math.floor(numRows * 0.8)
	test_rows = numRows - train_rows
	training_set = data[:train_rows]
	test_set = data[train_rows:train_rows + test_rows].reset_index(drop=True)
	# print(data)
	print(train_rows, test_rows)
	# print(training_set)
	# print(test_set)
	# print(predData)

	training_set = pd.DataFrame(training_set, columns=COLUMNS)
	test_set = pd.DataFrame(test_set, columns=COLUMNS)
	prediction_set = pd.DataFrame(predData, columns=COLUMNS)

	return training_set, test_set, prediction_set


def model_fn(features, labels, mode, params):
	# Logic to do the following:
	# 1. Configure the model via TensorFlow operations
	# 2. Define the loss function for training/evaluation
	# 3. Define the training operation/optimizer
	# 4. Generate predictions
	# 5. Return predictions/loss/train_op/eval_metric_ops in EstimatorSpec object
 

	# Connect the first hidden layer to input layer
	feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
	input_layer = tf.feature_column.input_layer(features=features, feature_columns=feature_cols)


	# Connect the first hidden layer to second hidden layer with relu
	hidden_layer = tf.layers.dense(input_layer, 10, activation=tf.nn.relu, 
		kernel_regularizer=tf.contrib.layers.l1_l2_regularizer(scale_l1=0.9, scale_l2=0.9), name='hidden_1')

	h1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'hidden_1')
	tf.summary.histogram('kernel_1', h1_vars[0])
	tf.summary.histogram('bias_1', h1_vars[1])
	tf.summary.histogram('activation_1', hidden_layer)

	if mode == tf.estimator.ModeKeys.TRAIN:
		hidden_layer = tf.layers.dropout(hidden_layer, rate=0.3, name='dropout_1')
		tf.summary.scalar('dropout_1', tf.nn.zero_fraction(hidden_layer))



	# Connect the second hidden layer to first hidden layer with relu
	hidden_layer = tf.layers.dense(hidden_layer, 10, activation=tf.nn.relu, 
		kernel_regularizer=tf.contrib.layers.l1_l2_regularizer(scale_l1=0.9, scale_l2=0.9), name='hidden_2')

	h2_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'hidden_2')
	tf.summary.histogram('kernel_2', h2_vars[0])
	tf.summary.histogram('bias_2', h2_vars[1])
	tf.summary.histogram('activation_2', hidden_layer)

	if mode == tf.estimator.ModeKeys.TRAIN:
		hidden_layer = tf.layers.dropout(hidden_layer, rate=0.3, name='dropout_2')
		tf.summary.scalar('dropout_2', tf.nn.zero_fraction(hidden_layer))



	# Connect the output layer to second hidden layer (no activation fn)
	output_layer = tf.layers.dense(hidden_layer, 1, name='output')

	# Reshape output layer to 1-dim Tensor to return predictions
	predictions = tf.reshape(output_layer, [-1])

	# Provide an estimator spec for `ModeKeys.PREDICT`.
	if mode == tf.estimator.ModeKeys.PREDICT:
		return tf.estimator.EstimatorSpec(mode=mode,predictions={LABEL: predictions})


	# Calculate loss using mean squared error
	loss = tf.losses.mean_squared_error(labels, predictions)

	reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
	loss = tf.add_n([loss] + reg_losses)

	
	tf.summary.scalar("reg_loss", reg_losses[0])
	tf.summary.scalar("train_error", loss)



	optimizer = tf.train.AdamOptimizer(learning_rate=params["learning_rate"])
	train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
	# grad= optimizer.compute_gradients(loss)
	# train_op = optimizer.apply_gradients(grad, global_step=tf.train.get_global_step())
	alpha_t = optimizer._lr * tf.sqrt(1-optimizer._beta2_power) / (1-optimizer._beta1_power)
	tf.summary.scalar("learning_rate", alpha_t)
	
	# for g, v in enumerate(grad):
	# 	tf.summary.scalar("gradient", g)

	# Calculate root mean squared error as additional eval metric
	eval_metric_ops = {
	  "rmse": tf.metrics.root_mean_squared_error(tf.cast(labels, tf.float64), tf.cast(predictions, tf.float64))
	}
	
	# Provide an estimator spec for `ModeKeys.EVAL` and `ModeKeys.TRAIN` modes.
	return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)


	return EstimatorSpec(mode, predictions, loss, train_op, eval_metric_ops)


def main(unused_argv):
	predictionOnly = False;
	clearOldFiles()

	training_set, test_set, prediction_set = loadData()

	# prediction_set = pd.DataFrame([(10000, 20, 0.02, 0, 39.5, 45, 37)], columns=COLUMNS)

	training_set, test_set, prediction_set = normalize(training_set, test_set, prediction_set)

	# Set model params
	model_params = {"learning_rate": LEARNING_RATE}

	# Instantiate Estimator
	nn = tf.estimator.Estimator(model_fn=model_fn, params=model_params, model_dir=SAVE_PATH, config=tf.estimator.RunConfig().replace(save_summary_steps=500))

	if not predictionOnly:
		train_input_fn = get_input_fn(training_set, num_epochs=None, shuffle=True)

		# Train
		nn.train(input_fn=train_input_fn, steps=TRAIN_STEPS)

		# Score accuracy
		test_input_fn = get_input_fn(test_set, num_epochs=1, shuffle=False)
		ev = nn.evaluate(input_fn=test_input_fn)
		print("Loss: %s" % ev['loss'])
		print("Root Mean Squared Error: %s" % ev["rmse"])


	# Print out predictions

	predict_input_fn = get_input_fn(prediction_set, num_epochs=1, shuffle=False)
	predictions = nn.predict(input_fn=predict_input_fn)
	pred = list()
	for i, p in enumerate(predictions):
		print("Prediction %s: %s" % (i + 1, p[LABEL]))
		pred.append(p[LABEL])
		print("Seconds away: %s" % ((prediction_set[LABEL][i] - p[LABEL]) * 60))

	
	# plt.scatter(prediction_set.loc[:,'dist'], pred)
	# plt.scatter(training_set.loc[:,'dist'], training_set.loc[:,'time'])
	# plt.show()



if __name__ == "__main__":
	tf.app.run()
	# plotStatistics()

Training set = fastest kmeans data + races

In [1]:
%run regressor.py

train size:  26  test size:  7
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'temp', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into temp\model.ckpt.
INFO:tensorflow:loss = 2485.09, step = 1
INFO:tensorflow:global_step/sec: 492.315
INFO:tensorflow:loss = 145.56, step = 101 (0.203 sec)
INFO:tensorflow:global_step/sec: 581.816
INFO:tensorflow:loss = 73.625, step = 201 (0.172 sec)
INFO:tensorflow:global_step/sec: 533.296
INFO:tensorflow:loss = 59.6757, step = 301 (0.188 sec)
INFO:tensorflow:global_step/sec: 639.992
INFO:tensorflow:loss = 50.1076, step = 401 (0.156 sec)
INFO:tensorflow:global_step/sec: 533.359
INFO:tensorflow:loss = 44.1553, step = 501 (0.187 sec)
INFO:tensorflow

INFO:tensorflow:global_step/sec: 533.299
INFO:tensorflow:loss = 10.0267, step = 8101 (0.188 sec)
INFO:tensorflow:global_step/sec: 640.028
INFO:tensorflow:loss = 10.6061, step = 8201 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.819
INFO:tensorflow:loss = 9.8917, step = 8301 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.775
INFO:tensorflow:loss = 9.78464, step = 8401 (0.188 sec)
INFO:tensorflow:global_step/sec: 581.85
INFO:tensorflow:loss = 9.79476, step = 8501 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.95
INFO:tensorflow:loss = 9.51233, step = 8601 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.035
INFO:tensorflow:loss = 9.549, step = 8701 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.99
INFO:tensorflow:loss = 9.76454, step = 8801 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.997
INFO:tensorflow:loss = 10.0715, step = 8901 (0.156 sec)
INFO:tensorflow:global_step/sec: 533.327
INFO:tensorflow:loss = 9.66786, step = 9001 (0.188 sec)
INFO:tensorflow:global_step/sec: 457

INFO:tensorflow:global_step/sec: 581.839
INFO:tensorflow:loss = 8.44349, step = 16501 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.001
INFO:tensorflow:loss = 7.70783, step = 16601 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.807
INFO:tensorflow:loss = 7.30349, step = 16701 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.996
INFO:tensorflow:loss = 7.98107, step = 16801 (0.141 sec)
INFO:tensorflow:global_step/sec: 639.998
INFO:tensorflow:loss = 8.06738, step = 16901 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.953
INFO:tensorflow:loss = 8.15697, step = 17001 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.849
INFO:tensorflow:loss = 8.13268, step = 17101 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.951
INFO:tensorflow:loss = 7.66008, step = 17201 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.846
INFO:tensorflow:loss = 7.97661, step = 17301 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.989
INFO:tensorflow:loss = 7.90704, step = 17401 (0.156 sec)
INFO:tensorflow:glob

INFO:tensorflow:global_step/sec: 581.812
INFO:tensorflow:loss = 7.50833, step = 24901 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.773
INFO:tensorflow:loss = 7.72309, step = 25001 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.033
INFO:tensorflow:loss = 7.96165, step = 25101 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.773
INFO:tensorflow:loss = 6.99115, step = 25201 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.024
INFO:tensorflow:loss = 7.56089, step = 25301 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.005
INFO:tensorflow:loss = 7.56802, step = 25401 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.999
INFO:tensorflow:loss = 7.68601, step = 25501 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.82
INFO:tensorflow:loss = 7.46095, step = 25601 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.951
INFO:tensorflow:loss = 6.55576, step = 25701 (0.156 sec)
INFO:tensorflow:global_step/sec: 711.146
INFO:tensorflow:loss = 6.85332, step = 25801 (0.156 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 581.819
INFO:tensorflow:loss = 6.58111, step = 33301 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.95
INFO:tensorflow:loss = 6.79684, step = 33401 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.038
INFO:tensorflow:loss = 6.45559, step = 33501 (0.156 sec)
INFO:tensorflow:global_step/sec: 492.3
INFO:tensorflow:loss = 7.33295, step = 33601 (0.187 sec)
INFO:tensorflow:global_step/sec: 639.954
INFO:tensorflow:loss = 6.51202, step = 33701 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.032
INFO:tensorflow:loss = 6.76642, step = 33801 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.931
INFO:tensorflow:loss = 6.55279, step = 33901 (0.156 sec)
INFO:tensorflow:global_step/sec: 533.372
INFO:tensorflow:loss = 6.45983, step = 34001 (0.187 sec)
INFO:tensorflow:global_step/sec: 639.995
INFO:tensorflow:loss = 6.87234, step = 34101 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.991
INFO:tensorflow:loss = 6.51304, step = 34201 (0.156 sec)
INFO:tensorflow:global_

Training set = races only

In [1]:
%run regressor.py

train size:  13  test size:  4
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'temp', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into temp\model.ckpt.
INFO:tensorflow:loss = 2016.85, step = 1
INFO:tensorflow:global_step/sec: 457.219
INFO:tensorflow:loss = 107.49, step = 101 (0.219 sec)
INFO:tensorflow:global_step/sec: 639.954
INFO:tensorflow:loss = 69.2009, step = 201 (0.156 sec)
INFO:tensorflow:global_step/sec: 492.292
INFO:tensorflow:loss = 55.9281, step = 301 (0.203 sec)
INFO:tensorflow:global_step/sec: 492.304
INFO:tensorflow:loss = 46.3761, step = 401 (0.203 sec)
INFO:tensorflow:global_step/sec: 640.048
INFO:tensorflow:loss = 38.2793, step = 501 (0.156 sec)
INFO:tensorflo

INFO:tensorflow:global_step/sec: 581.777
INFO:tensorflow:loss = 5.19477, step = 8101 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.802
INFO:tensorflow:loss = 5.10395, step = 8201 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.817
INFO:tensorflow:loss = 5.11006, step = 8301 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.042
INFO:tensorflow:loss = 4.96877, step = 8401 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.809
INFO:tensorflow:loss = 4.95749, step = 8501 (0.156 sec)
INFO:tensorflow:global_step/sec: 711.114
INFO:tensorflow:loss = 5.00214, step = 8601 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.775
INFO:tensorflow:loss = 4.91996, step = 8701 (0.156 sec)
INFO:tensorflow:global_step/sec: 533.303
INFO:tensorflow:loss = 4.92212, step = 8801 (0.188 sec)
INFO:tensorflow:global_step/sec: 640.065
INFO:tensorflow:loss = 4.87541, step = 8901 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.818
INFO:tensorflow:loss = 4.77077, step = 9001 (0.172 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:global_step/sec: 639.965
INFO:tensorflow:loss = 3.44099, step = 16501 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.028
INFO:tensorflow:loss = 3.31768, step = 16601 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.957
INFO:tensorflow:loss = 3.28963, step = 16701 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.829
INFO:tensorflow:loss = 3.49393, step = 16801 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.016
INFO:tensorflow:loss = 3.42718, step = 16901 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.951
INFO:tensorflow:loss = 3.27412, step = 17001 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.993
INFO:tensorflow:loss = 3.22646, step = 17101 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.001
INFO:tensorflow:loss = 3.24117, step = 17201 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.988
INFO:tensorflow:loss = 3.32598, step = 17301 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.846
INFO:tensorflow:loss = 3.2585, step = 17401 (0.172 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 639.981
INFO:tensorflow:loss = 2.86463, step = 24901 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.856
INFO:tensorflow:loss = 2.84238, step = 25001 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.991
INFO:tensorflow:loss = 2.92105, step = 25101 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.943
INFO:tensorflow:loss = 2.81456, step = 25201 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.851
INFO:tensorflow:loss = 2.80813, step = 25301 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.955
INFO:tensorflow:loss = 3.08217, step = 25401 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.842
INFO:tensorflow:loss = 2.77695, step = 25501 (0.172 sec)
INFO:tensorflow:global_step/sec: 640.001
INFO:tensorflow:loss = 2.75255, step = 25601 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.991
INFO:tensorflow:loss = 2.77877, step = 25701 (0.156 sec)
INFO:tensorflow:global_step/sec: 711.057
INFO:tensorflow:loss = 2.79434, step = 25801 (0.156 sec)
INFO:tensorflow:glob

INFO:tensorflow:global_step/sec: 639.995
INFO:tensorflow:loss = 2.63202, step = 33301 (0.172 sec)
INFO:tensorflow:global_step/sec: 581.847
INFO:tensorflow:loss = 2.59432, step = 33401 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.978
INFO:tensorflow:loss = 2.63632, step = 33501 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.011
INFO:tensorflow:loss = 2.50608, step = 33601 (0.172 sec)
INFO:tensorflow:global_step/sec: 492.271
INFO:tensorflow:loss = 2.51563, step = 33701 (0.188 sec)
INFO:tensorflow:global_step/sec: 640.038
INFO:tensorflow:loss = 2.50281, step = 33801 (0.156 sec)
INFO:tensorflow:global_step/sec: 639.993
INFO:tensorflow:loss = 2.55165, step = 33901 (0.156 sec)
INFO:tensorflow:global_step/sec: 640.002
INFO:tensorflow:loss = 2.59418, step = 34001 (0.172 sec)
INFO:tensorflow:global_step/sec: 639.986
INFO:tensorflow:loss = 2.80028, step = 34101 (0.156 sec)
INFO:tensorflow:global_step/sec: 581.812
INFO:tensorflow:loss = 2.55945, step = 34201 (0.172 sec)
INFO:tensorflow:glob