## Remote training

In [None]:
%%bash

SOURCE="${BASH_SOURCE[0]}"

now=$(date +"%Y%m%d_%H%M%S")

VERSION=edml_trainer_manual_submit_$now
APP="${APP:-edml-trainer}"
BUCKET="${BUCKET:-edml}"
PACKAGE="${PACKAGE:-../../edml-trainer/trainer/}"
MODULE="${MODULE:-trainer.task}"
REGION="${REGION:-europe-west1}"

JOB_NAME=$(echo ${APP}-${VERSION} | tr '-' '_' | tr '.' '_')

JOB_ID=${JOB_NAME}

JOB_DIR=gs://${BUCKET}/ai-platform/models/${APP}/${VERSION}

PYTHON_VERSION="${PYTHON_VERSION:-3.5}"
RUNTIME_VERSION="${RUNTIME_VERSION:-1.15}"

OUTDIR=${JOB_DIR}/model

echo "+  SUBMIT PARAMETERS +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
echo "+  JOB_NAME = ${JOB_NAME}"
echo "+  JOB_ID = ${JOB_ID}"
echo "+  VERSION = ${VERSION}"
echo "+  PACKAGE = ${PACKAGE}"
echo "+  MODULE = ${MODULE}"
echo "+  REGION = ${REGION}"
echo "+  JOB_DIR = ${JOB_DIR}"
echo "+  PYTHON_VERSION = ${PYTHON_VERSION}"
echo "+  RUNTIME_VERSION = ${RUNTIME_VERSION}"
echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"

TRAIN_NNSIZE="${TRAIN_NNSIZE:-10 5}"
TRAIN_NEMBEDS="${TRAIN_NEMBEDS:-3}"
TRAIN_BATCH_SIZE="${TRAIN_BATCH_SIZE:-32}"
TRAIN_EVALSTEP="${TRAIN_EVALSTEP:-3}"
TRAIN_STEPS="${TRAIN_STEPS:-2800000}"

echo ""
echo ""
echo ""
echo ""
echo "+  TRAIN PARAMETERS ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
echo "+  BUCKET = ${BUCKET}"
echo "+  OUTDIR = ${OUTDIR}"
echo "+  VERSION = ${VERSION}"
echo "+  TRAIN_EVALSTEP = ${TRAIN_EVALSTEP}"
echo "+  TRAIN_NNSIZE = ${TRAIN_NNSIZE}"
echo "+  TRAIN_NEMBEDS = ${TRAIN_NEMBEDS}"
echo "+  TRAIN_BATCH_SIZE = ${TRAIN_BATCH_SIZE}"
echo "+  TRAIN_STEPS = ${TRAIN_STEPS}"
echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"

gcloud ai-platform jobs submit training ${JOB_ID} \
     --job-dir ${JOB_DIR} \
     --package-path ${PACKAGE} \
     --module-name ${MODULE} \
     --region ${REGION} \
     --python-version ${PYTHON_VERSION} \
     --runtime-version ${RUNTIME_VERSION} \
     -- \
     --bucket=${BUCKET} \
     --output-dir=${OUTDIR} \
     --nembeds ${TRAIN_NEMBEDS} \
     --nnsize ${TRAIN_NNSIZE} \
     --batch-size=${TRAIN_BATCH_SIZE} \
     --train-steps=${TRAIN_STEPS} \
     --eval-steps=${TRAIN_EVALSTEP}

gcloud ai-platform jobs describe ${JOB_ID}

gcloud ai-platform jobs stream-logs ${JOB_ID}