# Setup

## Install Packages

In [1]:
# !pip install autokeras
# pip install git+https://github.com/keras-team/keras-tuner.git
# pip install autokeras
# !pip install tensorflow-gpu

## Check tensorflow gpu installation

In [1]:
import tensorflow as tf 

if tf.test.gpu_device_name(): 

    print('''Default GPU Device:
    {}'''.format(tf.test.gpu_device_name()))

else:
   print("Please install GPU version of TF")

Default GPU Device:
    /device:GPU:0


# Import Modules

In [2]:
import numpy as np
import pandas as pd

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import json

import autokeras as ak

# Read Data

In [4]:
data = {}
for h in [0, 30, 120, 180]:
    data['processed_dep_h{}'.format(h)] = pd.read_csv("/mnt/data/Christophe/processed_dep_h{}.csv".format(h))
data.keys()

dict_keys(['processed_dep_h0', 'processed_dep_h30', 'processed_dep_h120', 'processed_dep_h180'])

In [5]:
with open("/mnt/data/Christophe/csv_docs.json", "r") as f:
    docs = json.load(f)

# Select Data

In [6]:
h=30
df = data['processed_dep_h{}'.format(h)]

X_train = df[df['dtype']=="TRAIN"]
X_train.pop("dtype")
y_train = X_train.pop("t_taxi")

X_val = df[df['dtype']=="VALIDATE"]
X_val.pop("dtype")
y_val = X_val.pop("t_taxi")

X_test = df[df['dtype']=="TEST"]
X_test.pop("dtype")
y_test = X_test.pop("t_taxi")

# Evaluation Function

In [7]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.metrics import mean_squared_error, mean_absolute_error
import time

def model_eval(y, y_pred, name=None, file=None, verbose=True, **kwargs):
    report = {}
    if name:
        report['name'] = name
        if verbose:
            print(name)
    
    report["RMSE"] = mean_squared_error(y, y_pred, squared=False)
    report["MAE"] = mean_absolute_error(y, y_pred)
    report["% <2 min"] = sum(abs(y-y_pred) < 2*60)/len(y)*100
    report["% <5 min"] = sum(abs(y-y_pred) < 5*60)/len(y)*100
    report["% <7 min"] = sum(abs(y-y_pred) < 7*60)/len(y)*100
    report["time"] = str(pd.Timestamp(round(time.time()), unit='s'))
    
    for kwarg in kwargs:
        report[kwarg] = kwargs[kwarg]
    
    if file is not None:
        with open(file, "a") as f:
            f.write(str(report)+"\n")
    if verbose:
        print(report)
    return(report)

# Train Model

In [8]:
import time
t0 = time.time()
print(pd.to_datetime(t0, unit='s'))

reg = ak.StructuredDataRegressor(loss='mean_absolute_error', metrics='mean_absolute_error', max_trials=100, overwrite=True)

reg.fit(X_train, y_train, epochs=20)

predicted_y = reg.predict(X_test)

print(reg.evaluate(X_test, y_test))

model_eval(y_test, predicted_y.T[0])

t1 = time.time()

print(pd.to_datetime(t1, unit='s'))
print(pd.to_datetime(t1, unit='s') - pd.to_datetime(t0, unit='s'))

model = reg.export_model()
model.summary()
try:
    model.save("model_autokeras", save_format="tf")
except Exception:
    model.save("model_autokeras.h5")

Trial 65 Complete [00h 08m 33s]
val_loss: 150.2834014892578

Best val_loss So Far: 143.3001251220703
Total elapsed time: 09h 11m 53s
INFO:tensorflow:Oracle triggered exit
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
INFO:tensorflow:Assets written to: ./structured_data_regressor/best_model/assets












[157.28707885742188, 157.28707885742188]
{'RMSE': 203.75463612009068, 'MAE': 157.2871280878591, '% <2 min': 46.33132446247787, '% <5 min': 88.22125425144698, '% <7 min': 96.15529963999442, 'time': 1618298622}
2021-04-13 07:23:42.494271232
0 days 09:22:06.537653504






Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 39)]              0         
_________________________________________________________________
multi_category_encoding (Mul (None, 39)                0         
_________________________________________________________________
normalization (Normalization (None, 39)                79        
_________________________________________________________________
dense (Dense)                (None, 32)                1280      
_________________________________________________________________
re_lu (ReLU)                 (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
re_lu_1 (ReLU)               (None, 16)                0     

INFO:tensorflow:Assets written to: model_autokeras/assets


# Evaluate Model on Test Data at Different h

In [13]:
for h in [0, 30, 120, 180]:
    df = data['processed_dep_h{}'.format(h)]

    X_train = df[df['dtype']=="TRAIN"]
    X_train.pop("dtype")
    y_train = X_train.pop("t_taxi")

    X_val = df[df['dtype']=="VALIDATE"]
    X_val.pop("dtype")
    y_val = X_val.pop("t_taxi")

    X_test = df[df['dtype']=="TEST"]
    X_test.pop("dtype")
    y_test = X_test.pop("t_taxi")
    
    model_eval(reg.predict(X_test).T[0], y_test, name="autokeras_T100_E20_best_h30, SIMPLE_TEST, h{}".format(h), file="model_autokeras.txt")
    







autokeras_T60_E20_best_h30, SIMPLE_TEST, h0
{'name': 'autokeras_T60_E20_best_h30, SIMPLE_TEST, h0', 'RMSE': 195.81607001831085, 'MAE': 151.7737178142043, '% <2 min': 47.302311737275524, '% <5 min': 89.44935013394186, '% <7 min': 96.99176505605715, 'time': 1618302693}






autokeras_T60_E20_best_h30, SIMPLE_TEST, h30
{'name': 'autokeras_T60_E20_best_h30, SIMPLE_TEST, h30', 'RMSE': 203.75463612009068, 'MAE': 157.2871280878591, '% <2 min': 46.33132446247787, '% <5 min': 88.22125425144698, '% <7 min': 96.15529963999442, 'time': 1618302702}






autokeras_T60_E20_best_h30, SIMPLE_TEST, h120
{'name': 'autokeras_T60_E20_best_h30, SIMPLE_TEST, h120', 'RMSE': 240.45009484934386, 'MAE': 183.27531821357843, '% <2 min': 41.89331817812779, '% <5 min': 82.00454656166274, '% <7 min': 91.86896159779167, 'time': 1618302710}








autokeras_T60_E20_best_h30, SIMPLE_TEST, h180
{'name': 'autokeras_T60_E20_best_h30, SIMPLE_TEST, h180', 'RMSE': 240.9778929633861, 'MAE': 185.00963529837583, '% <2 min': 41.151475252857736, '% <5 min': 81.57878987372075, '% <7 min': 92.04021784666946, 'time': 1618302716}


# Model Summary

In [65]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 39)]              0         
_________________________________________________________________
multi_category_encoding (Mul (None, 39)                0         
_________________________________________________________________
normalization (Normalization (None, 39)                79        
_________________________________________________________________
dense (Dense)                (None, 256)               10240     
_________________________________________________________________
re_lu (ReLU)                 (None, 256)               0         
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              263168