In [1]:
import os, logging, pandas

import tensorflow as tf

logging.getLogger("tensorflow").setLevel(logging.ERROR)

In [2]:
if not os.path.isdir("/tmp/data"):

  os.makedirs("/tmp/data")

In [None]:
!gsutil cp gs://cloud-training-demos/feat_eng/data/taxi*.csv /tmp/data

In [4]:
os.listdir("/tmp/data")

['taxi-train.csv', 'taxi-test.csv', 'taxi-valid.csv']

In [5]:
sample = pandas.read_table('/tmp/data/taxi-train.csv', sep=',')

sample.shape[1]

8

In [6]:
sample.columns.tolist()[:4]

['fare_amount', 'passenger_count', 'pickup_longitude', 'pickup_latitude']

In [7]:
sample.columns.tolist()[4:]

['dropoff_longitude', 'dropoff_latitude', 'hourofday', 'dayofweek']

In [8]:
LabelColumn = sample.columns.tolist()[0]

FeatureCols = sample.columns.tolist()[1:]

len(FeatureCols)

7

In [9]:
from tensorflow.data.experimental import make_csv_dataset

def FeatureLabel(dataset):
  """
  Feature & Label
  """
  label = dataset.pop(LabelColumn)

  return dataset, label

def CreateShuffleDataset(pattern, batches, mode='eval'):
  """
  Create Shuffle Dataset

  >>> CreateShuffleDataset('taxi-train.csv', 2, 'train')

  >>> CreateShuffleDataset('taxi-train.csv', 2)
  """
  dataset = make_csv_dataset(pattern, batches)
  dataset = dataset.map(FeatureLabel).cache()
  if mode == 'train':
    # Shuffle Train Dataset
    dataset = dataset.shuffle(1000).repeat()
    dataset = dataset.prefetch(1)
  return dataset

In [10]:
from tensorflow.keras import layers, models

from tensorflow import feature_column as fc

def BuildModel():
  """
  Functional Model
  """
  inputs = {}; feature_columns = {}
  # Input Layer
  for colname in FeatureCols:
    inputs[colname] = layers.Input(name=colname, shape=(1,), dtype='float32')
  # Set Feature Columns
  for colname in FeatureCols:
    feature_columns[colname] = fc.numeric_column(colname)
  # Hidden Layer
  # Dense Features Error to Deprecate, Find Something Similar in Function !
  numerical_inputs = [inputs[colname] for colname in FeatureCols]
  dnn_inputs = layers.Concatenate()(numerical_inputs)
  first_layer = layers.Dense(32, activation='relu')(dnn_inputs)
  second_layer = layers.Dense(8, activation='relu')(first_layer)
  output = layers.Dense(1, activation='linear')(second_layer)
  # Create Model
  model = models.Model(inputs, output)
  # Model Compile
  model.compile(optimizer='adam', loss='mse', metrics=['mse'])
  return model

In [11]:
TrainExamples = len(pandas.read_table('/tmp/data/taxi-train.csv', sep=','))

ValidExamples = len(pandas.read_table('/tmp/data/taxi-valid.csv', sep=','))

TrainBatchSize = 32

steps_per_epoch = TrainExamples // TrainBatchSize

In [12]:
train = CreateShuffleDataset('/tmp/data/taxi-train.csv', TrainBatchSize, 'train')

test = CreateShuffleDataset('/tmp/data/taxi-valid.csv', 1000, 'eval').take(ValidExamples // 1000)

In [13]:
model = BuildModel()

hist = model.fit(train, validation_data=test, epochs=18, steps_per_epoch=steps_per_epoch, verbose=False)

In [15]:
model.evaluate(test, verbose=False)

[100.49900817871094, 100.49900817871094]

In [None]:
FeatCols = ['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude']

ScaleLongitude = lambda lon : (lon + 78) / 8.0

ScaleLatitude = lambda lat : (lat - 37) / 8.0

def EucliDistance(params):
  """
  Euclidean Distance
  """
  lon_origin, lat_origin, lon_goal, lat_goal = params
  londiff = lon_goal - lon_origin
  latdiff = lat_goal - lat_origin
  result = tf.sqrt(londiff * londiff + latdiff * latdiff)
  return result