In [None]:
# import tensorflow v2 - disable eager execution to build explicit graph for deployment
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
tf.compat.v1.disable_eager_execution()

# import other libraries
import pandas as pd
import numpy as np
from combined_estimator import CombinedEstimator
from model_fns import * # all model_fns are using python closures

### Hyperparameters

In [None]:
LEARNING_RATE=0.001
BATCH_SIZE = 256
LEAKY_RELU_ALPHA = 0.1

### Variables

In [None]:
PREDICT_FILE = 'sample_test.csv'
OUTPUT_PREDICT_FILE = 'sample_predictions.csv'
MODEL_DIR="o2d"
SAVE_CHECKPOINT_SECS = 900 # once in 15 minutes
SAVE_SUMMARY_STEPS = 4000 # on 4000 epochs
KEEP_CHECKPOINT_MAX = 5 # keep small number to not explode disk space

### Features

In [None]:
features_fm_regressor = ['banner_factor','weekday','hour','fm_avg_Rz_1H','fm_avg_Rid_1Day','fm_avg_Rid_hour_30D','rest_zone_rain_mode__2H','fm_avg_Rz_dow_hour_7D','fm_avg_Rz_3H','fm_avg_Rz_dow_hour_30D','fm_avg_Rz_3D','fm_avg_Rz_1D','fm_avg_Rh_1D','fm_avg_Rh_hour_3D','fm_avg_Rh_dow_hour_7D']
features_lm_regressor  = ['restaurant_customer_distance','p2d_avg_Rz_3D','p2d_avg_Rz_1H','p2d_avg_Cz_3D','p2d_avg_Rid_20D','p2d_avg_Rid_1H','p2d_avg_Cz_1H','p2d_avg_Rid_3D','r2d_avg_Cg_7D','p2d_avg_Ch_20D','p2d_sd_Ch_20D','rest_zone_rain_mode__2H','cust_zone_rain_mode__2H','embedding_dayofweek_LM_0','embedding_dayofweek_LM_1','embedding_dayofweek_LM_2','embedding_hour_LM_0','embedding_hour_LM_1','embedding_hour_LM_2','embedding_hour_LM_3','embedding_hour_LM_4','embedding_city_LM_0','embedding_city_LM_1','embedding_city_LM_2','embedding_city_LM_3','embedding_city_LM_4','embedding_Rz_LM_0','embedding_Rz_LM_1','embedding_Rz_LM_2','embedding_Rz_LM_3','embedding_Rz_LM_4','embedding_Rz_LM_5','embedding_Rz_LM_6','embedding_Rz_LM_7','embedding_Rz_LM_8','embedding_Rz_LM_9','embedding_Rz_LM_10','embedding_Rz_LM_11','embedding_Rz_LM_12','embedding_Rz_LM_13','embedding_Rz_LM_14','embedding_Rz_LM_15','embedding_Rz_LM_16','embedding_Rz_LM_17','embedding_Rz_LM_18','embedding_Rz_LM_19','embedding_Rz_LM_20','embedding_Rz_LM_21','embedding_Rz_LM_22','embedding_Rz_LM_23','embedding_Rz_LM_24']
features_o2a_regressor = ['banner_factor','weekday','hour','o2as_avg_res_1h','o2a_zoneid_last15mins_averages','o2a_zoneid_last5mins_averages','o2as_avg_Rid_hour_dow_30d','o2as_avg_Rz_hour_dow_30d','o2as_avg_Rz_hour_dow_9d','o2as_avg_Rz_hour_dow_lag','rest_zone_rain_mode__2H','o2as_avg_Rid_hour_3D','o2as_avg_Rz_dow_30D','o2as_avg_Rz_7D','o2as_avg_Rh_dow_7D','o2as_avg_Rh_7D','o2as_avg_Rid_dow_hour_7D']
features_o2d_regressor = ['bill_amount','banner_factor','hour','restaurant_customer_distance','o2as_avg_res_1h','o2a_zoneid_last15mins_averages','o2a_zoneid_last5mins_averages','o2as_avg_Rid_hour_dow_30d','o2as_avg_Rz_hour_dow_30d','o2as_avg_Rz_hour_dow_9d','o2as_avg_Rz_hour_dow_lag','o2as_avg_Rid_hour_3D','o2as_avg_Rz_dow_30D','o2as_avg_Rz_7D','o2as_avg_Rh_dow_7D','o2as_avg_Rh_7D','o2as_avg_Rid_dow_hour_7D','rest_zone_rain_mode__2H','cust_zone_rain_mode__2H','fm_avg_Rz_1H','fm_avg_Rid_1Day','fm_avg_Rz_dow_hour_7D','fm_avg_Rz_3H','fm_avg_Rz_dow_hour_30D','fm_avg_Rz_3D','fm_avg_Rz_1D','fm_avg_Rh_1D','fm_avg_Rh_hour_3D','fm_avg_Rh_dow_hour_7D','ar2p_avg_res_hour_20d','ar2p_avg_res_20d','ar2p_avg_res_1h','order_item_count','order_total_quantity','restaurant_active_orders','restaurant_banner_factor','items_past','o2p_value_res_lag1','item_o2p_lag','item_o2p_avg','item_slot_o2p_avg','item_slot_o2p_avg_1D','o2p_avg_1H','avg_item','high_item','rare_item','lag_delta_I','lag_delta_II','item_lag_delta_I','rest_placed_order_count__1h','restaurant_delta','o2mf_value_res_lag1','o2mf_avg_1H','p2d_avg_Rz_3D','p2d_avg_Rz_1H','p2d_avg_Cz_3D','p2d_avg_Rid_20D','p2d_avg_Rid_1H','p2d_avg_Cz_1H','p2d_avg_Rid_3D','r2d_avg_Cg_7D','p2d_avg_Ch_20D','p2d_sd_Ch_20D','embedding_hour_LM_0','embedding_hour_LM_1','embedding_hour_LM_2','embedding_hour_LM_3','embedding_hour_LM_4','embedding_city_LM_0','embedding_city_LM_1','embedding_city_LM_2','embedding_city_LM_3','embedding_city_LM_4','embedding_Rz_LM_0','embedding_Rz_LM_1','embedding_Rz_LM_2','embedding_Rz_LM_3','embedding_Rz_LM_4','embedding_Rz_LM_5','embedding_Rz_LM_6','embedding_Rz_LM_7','embedding_Rz_LM_8','embedding_Rz_LM_9','embedding_Rz_LM_10','embedding_Rz_LM_11','embedding_Rz_LM_12','embedding_Rz_LM_13','embedding_Rz_LM_14','embedding_Rz_LM_15','embedding_Rz_LM_16','embedding_Rz_LM_17','embedding_Rz_LM_18','embedding_Rz_LM_19','embedding_Rz_LM_20','embedding_Rz_LM_21','embedding_Rz_LM_22','embedding_Rz_LM_23','embedding_Rz_LM_24','embedding_hour_O2P_0','embedding_hour_O2P_1','embedding_hour_O2P_2','embedding_hour_O2P_3','embedding_hour_O2P_4','embedding_week_O2P_0','embedding_week_O2P_1','embedding_week_O2P_2','embedding_restaurant_O2P_0','embedding_restaurant_O2P_1','embedding_restaurant_O2P_2','embedding_restaurant_O2P_3','embedding_restaurant_O2P_4','embedding_restaurant_O2P_5','embedding_restaurant_O2P_6','embedding_restaurant_O2P_7','embedding_restaurant_O2P_8','embedding_restaurant_O2P_9','embedding_city_O2P_0','embedding_city_O2P_1','embedding_city_O2P_2','embedding_city_O2P_3','embedding_city_O2P_4']
features_o2p_regressor = ['bill_amount','banner_factor','hour','weekday','order_item_count','order_total_quantity','restaurant_active_orders','restaurant_banner_factor','items_past','o2p_value_res_lag1','item_o2p_lag','item_o2p_avg','item_slot_o2p_avg','item_slot_o2p_avg_1D','o2p_avg_1H','avg_item','high_item','rare_item','lag_delta_I','lag_delta_II','item_lag_delta_I','rest_placed_order_count__1h','restaurant_delta','o2mf_value_res_lag1','o2mf_avg_1H','ar2p_avg_res_hour_20d','ar2p_avg_res_20d','ar2p_avg_res_1h','rest_zone_rain_mode__2H','embedding_hour_O2P_0','embedding_hour_O2P_1','embedding_hour_O2P_2','embedding_hour_O2P_3','embedding_hour_O2P_4','embedding_week_O2P_0','embedding_week_O2P_1','embedding_week_O2P_2','embedding_restaurant_O2P_0','embedding_restaurant_O2P_1','embedding_restaurant_O2P_2','embedding_restaurant_O2P_3','embedding_restaurant_O2P_4','embedding_restaurant_O2P_5','embedding_restaurant_O2P_6','embedding_restaurant_O2P_7','embedding_restaurant_O2P_8','embedding_restaurant_O2P_9','embedding_city_O2P_0','embedding_city_O2P_1','embedding_city_O2P_2','embedding_city_O2P_3','embedding_city_O2P_4']
features_wt_regressor = ['banner_factor','weekday','hour','ar2p_avg_res_hour_20d','ar2p_avg_res_20d','ar2p_avg_res_1h','rest_zone_rain_mode__2H']

input_features = list(set(features_fm_regressor + features_lm_regressor + features_o2a_regressor + features_o2d_regressor + features_o2p_regressor + features_wt_regressor))

### Predict input_fn

In [None]:
def predict_input_fn():
    
    #select only input features during prediction
    dataset = tf.data.experimental.make_csv_dataset(
      file_pattern=PREDICT_FILE,
      batch_size=BATCH_SIZE,
      num_epochs=1,
      ignore_errors=True, 
      select_columns=input_features, 
      column_defaults=['float64' for _ in range(len(input_features))],
      shuffle=False)
    
    return dataset

### Define RunConfig

In [None]:
# this defines at what configuration does training run
rc = tf.estimator.RunConfig(save_checkpoints_secs = SAVE_CHECKPOINT_SECS,
                           model_dir = MODEL_DIR,
                           save_summary_steps = SAVE_SUMMARY_STEPS,
                           keep_checkpoint_max = KEEP_CHECKPOINT_MAX)

### Create estimators

In [None]:
o2a_estimator = tf.estimator.Estimator(
    model_fn=o2a_model_fn(features_o2d_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                         LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

fm_estimator = tf.estimator.Estimator(
    model_fn=fm_model_fn(features_fm_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                        LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

wt_estimator = tf.estimator.Estimator(
    model_fn=wt_model_fn(features_wt_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                        LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

o2p_estimator = tf.estimator.Estimator(
    model_fn=o2p_model_fn(features_o2p_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                        LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

lm_estimator = tf.estimator.Estimator(
    model_fn=lm_model_fn(features_lm_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                        LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

o2d_estimator = tf.estimator.Estimator(
    model_fn=o2d_model_fn(features_o2d_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                        LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

o2d_beef_estimator = tf.estimator.Estimator(
    model_fn=o2d_beef_model_fn(features_o2d_regressor, LEAKY_RELU_ALPHA, LEARNING_RATE,
                        LOSS='custom'),
    model_dir=MODEL_DIR,
    config=rc)

### Create combined estimator

In [None]:
estimator = CombinedEstimator([o2a_estimator, fm_estimator, wt_estimator, o2p_estimator, 
                               lm_estimator, o2d_estimator, o2d_beef_estimator],
                              ['O2As','FM','Ar2P','O2P','P2D','O2D','O2D_beef'], 
                              MODEL_DIR)

### Predict!

In [None]:
# checkpoint_path - if None, the latest checkpoint in model_dir is used
# specific checkpoint on which prediction has to be done can be given
# checkpoint_path = "o2d/model.ckpt-15847321"
predict_iterator = estimator.predict(input_fn = predict_input_fn, 
                                     yield_single_examples=False, 
                                     checkpoint_path = None)

In [None]:
Y_hat = pd.DataFrame()

for batch_dict in predict_iterator:
    for k in batch_dict:
        batch_dict[k] = batch_dict[k][:,0]
    batch_df = pd.DataFrame(batch_dict)
    Y_hat = Y_hat.append(batch_df, ignore_index=True)

In [None]:
X_test = pd.read_csv(PREDICT_FILE)
assert X_test.shape[0]== Y_hat.shape[0], "Number of rows not equal"
Y_pred = pd.concat([X_test, Y_hat], axis= 1, ignore_index = False)

Y_pred.to_csv(OUTPUT_PREDICT_FILE, header = True, index=False)

### Offline analysis

In [None]:
from sklearn.metrics import mean_absolute_error

print('O2A', mean_absolute_error(Y_pred['O2As'], Y_pred['predicted_o2a']))
print('FM', mean_absolute_error(Y_pred['FM'], Y_pred['predicted_fm']))
print('Ar2P', mean_absolute_error(Y_pred['Ar2P'], Y_pred['predicted_wt']))
print('O2P', mean_absolute_error(Y_pred['O2P'], Y_pred['predicted_O2P']))
print('LM', mean_absolute_error(Y_pred['P2D'], Y_pred['predicted_last_mile']))
print('O2D', mean_absolute_error(Y_pred['O2D'], Y_pred['predicted_O2D_accurate']))
print('O2D_beef', mean_absolute_error(Y_pred['O2D_beef'], Y_pred['predicted_O2D']))