In [55]:
# Reference: Code adapted from open source webpage
# https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive/09_sequence/poetry.ipynb

In [56]:
with open("../data/myTrain_uniquePairs.tsv", 'r') as f:
    lines = f.readlines()
    inputs = []
    targets = []
for line in lines:
    spl = line.split("\t")
    inputs.append(spl[0].strip())
    targets.append(spl[1].strip())
print(len(inputs))
print(targets[0])


5374
"Except for this small vocal minority, we have not gotten a lot of groundswell against this," says APA president Zimbardo.


In [57]:
%%bash
pip freeze | grep tensor

mesh-tensorflow==0.0.5
tensor2tensor==1.10.0
tensorboard==1.10.0
tensorflow==1.10.0
tensorflow-hub==0.4.0


In [58]:
# Choose a version of TensorFlow that is supported on TPUs
TFVERSION='1.10'
import os
os.environ['TFVERSION'] = TFVERSION

In [59]:
%%bash
pip install tensor2tensor==${TFVERSION} tensorflow==${TFVERSION}



In [60]:
%%bash
pip freeze | grep tensor

mesh-tensorflow==0.0.5
tensor2tensor==1.10.0
tensorboard==1.10.0
tensorflow==1.10.0
tensorflow-hub==0.4.0


In [61]:
import os
PROJECT = 'spring2019iw' # REPLACE WITH YOUR PROJECT ID
BUCKET = 'springiwkhyatiab1' # REPLACE WITH YOUR BUCKET NAME
REGION = 'us-east1' # REPLACE WITH YOUR BUCKET REGION e.g. us-central1

# this is what this notebook is demonstrating
PROBLEM= 'verbosity_problem'

# for bash
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION
os.environ['PROBLEM'] = PROBLEM


In [62]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

Updated property [core/project].
Updated property [compute/region].


In [63]:
%%bash
mkdir -p verbosity/trainer

In [64]:
%%bash
ls

mlengine.json
README.md
t2t_data
Train1.ipynb
training-data-analyst
=.transformer.transformer_verbosity.verbosity_problem.beam4.alpha0.6.decodes
=.transformer.transformer_verbosity.verbosity_problem.beam4.alpha0.6.inputs
=.transformer.transformer_verbosity.verbosity_problem.beam4.alpha0.6.targets
verbosity


In [65]:
%%writefile verbosity/trainer/problem.py
import os
import tensorflow as tf
from tensor2tensor.utils import registry
from tensor2tensor.models import transformer
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.data_generators import text_problems
from tensor2tensor.data_generators import generator_utils

tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file

# Define train and test values


@registry.register_problem
class VerbosityProblem(text_problems.Text2TextProblem):

  @property
  def approx_vocab_size(self):
    return 2**13  # ~8k

  @property
  def is_generate_per_split(self):
    # Only use train.tsv for now
    return False

  @property
  def dataset_splits(self):
    """Splits of data to produce and number of output shards for each."""
    # 10% evaluation data
    return [{
        "split": problem.DatasetSplit.TRAIN,
        "shards": 90,
    }, {
        "split": problem.DatasetSplit.EVAL,
        "shards": 10,
    }]

  def generate_samples(self, data_dir, tmp_dir, dataset_split):
    with open("../data/myTrain.tsv", 'r') as f:
      lines = f.readlines()
      for line in lines:
        spl = line.split("\t")    
        yield {
                "inputs": spl[0],
                "targets": spl[1].strip()
              }         


# Smaller than the typical translate model, and with more regularization
@registry.register_hparams
def transformer_verbosity():
  hparams = transformer.transformer_base()
  # using cross validated hyperparameters
  hparams.num_hidden_layers = 3
  hparams.hidden_size = 256
  hparams.attention_dropout = 0.6
  hparams.layer_prepostprocess_dropout = 0.54
  hparams.learning_rate = 0.1925
  return hparams

@registry.register_hparams
def transformer_verbosity_tpu():
  hparams = transformer_poetry()
  transformer.update_hparams_for_tpu(hparams)
  return hparams

# hyperparameter tuning ranges
@registry.register_ranged_hparams
def transformer_verbosity_range(rhp):
  rhp.set_float("learning_rate", 0.05, 0.25, scale=rhp.LOG_SCALE)
  rhp.set_int("num_hidden_layers", 2, 4)
  rhp.set_discrete("hidden_size", [128, 256, 512])
  rhp.set_float("attention_dropout", 0.4, 0.7)

Overwriting verbosity/trainer/problem.py


In [66]:
print("done")

done


In [67]:
%%writefile verbosity/trainer/__init__.py
from . import problem

Overwriting verbosity/trainer/__init__.py


In [68]:
%%writefile verbosity/setup.py
from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = [
  'tensor2tensor'
]

setup(
    name='verbosity',
    version='0.1',
    author = 'Google',
    author_email = 'training-feedback@cloud.google.com',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='Verbosity Reduction Problem',
    requires=[]
)

Overwriting verbosity/setup.py


In [69]:
!touch verbosity/__init__.py

In [70]:
!find verbosity

verbosity
verbosity/setup.py
verbosity/trainer
verbosity/trainer/problem.py
verbosity/trainer/__pycache__
verbosity/trainer/__pycache__/problem.cpython-35.pyc
verbosity/trainer/__pycache__/__init__.cpython-35.pyc
verbosity/trainer/__init__.py
verbosity/__init__.py


In [71]:
%%bash
DATA_DIR=./t2t_data
TMP_DIR=$DATA_DIR/tmp
rm -rf $DATA_DIR $TMP_DIR
mkdir -p $DATA_DIR $TMP_DIR
# Generate data
t2t-datagen \
  --t2t_usr_dir=./verbosity/trainer \
  --problem=$PROBLEM \
  --data_dir=$DATA_DIR \
  --tmp_dir=$TMP_DIR

  from ._conv import register_converters as _register_converters
INFO:tensorflow:Importing user module trainer from path /content/datalab/notebooks/verbosity
INFO:tensorflow:Generating problems:
    verbosity:
      * verbosity_problem
INFO:tensorflow:Generating data for verbosity_problem.
INFO:tensorflow:Generating vocab file: ./t2t_data/vocab.verbosity_problem.8192.subwords
INFO:tensorflow:Trying min_count 500
INFO:tensorflow:Iteration 0
INFO:tensorflow:vocab_size = 1698
INFO:tensorflow:Iteration 1
INFO:tensorflow:vocab_size = 883
INFO:tensorflow:Iteration 2
INFO:tensorflow:vocab_size = 943
INFO:tensorflow:Iteration 3
INFO:tensorflow:vocab_size = 931
INFO:tensorflow:Trying min_count 250
INFO:tensorflow:Iteration 0
INFO:tensorflow:vocab_size = 3068
INFO:tensorflow:Iteration 1
INFO:tensorflow:vocab_size = 1449
INFO:tensorflow:Iteration 2
INFO:tensorflow:vocab_size = 1525
INFO:tensorflow:Iteration 3
INFO:tensorflow:vocab_size = 1510
INFO:tensorflow:Trying min_count 125
INFO:tensorflow:I

In [72]:
!ls t2t_data | head

tmp
verbosity_problem-dev-00000-of-00010
verbosity_problem-dev-00001-of-00010
verbosity_problem-dev-00002-of-00010
verbosity_problem-dev-00003-of-00010
verbosity_problem-dev-00004-of-00010
verbosity_problem-dev-00005-of-00010
verbosity_problem-dev-00006-of-00010
verbosity_problem-dev-00007-of-00010
verbosity_problem-dev-00008-of-00010


In [73]:
%%bash
DATA_DIR=./t2t_data
gsutil -m rm -r gs://${BUCKET}/verbosity/
gsutil -m cp ${DATA_DIR}/${PROBLEM}* ${DATA_DIR}/vocab* gs://${BUCKET}/verbosity/data

Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00000-of-00010#1556594474403838...
Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00001-of-00010#1556594474310792...
Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00002-of-00010#1556594474334587...
Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00003-of-00010#1556594474418673...
Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00004-of-00010#1556594474369550...
Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00005-of-00010#1556594474516623...
/ [1/113 objects]   0% Done                                                     Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00006-of-00010#1556594474553126...
/ [2/113 objects]   1% Done                                                     Removing gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00007-of-00010#1556594474646536...
/ [3/1

In [74]:
%%bash
gcloud config list

[compute]
region = us-east1
[core]
account = 895454266418-compute@developer.gserviceaccount.com
disable_usage_reporting = True
pass_credentials_to_gsutil = true
project = spring2019iw



Your active configuration is: [default]


In [75]:
%%bash
PROJECT_ID=$PROJECT
AUTH_TOKEN=$(gcloud auth print-access-token)
SVC_ACCOUNT=$(curl -X GET -H "Content-Type: application/json" \
    -H "Authorization: Bearer $AUTH_TOKEN" \
    https://ml.googleapis.com/v1/projects/${PROJECT_ID}:getConfig \
    | python -c "import json; import sys; response = json.load(sys.stdin);\
    print(response['serviceAccount'])")

echo "Authorizing the Cloud ML Service account $SVC_ACCOUNT to access files in $BUCKET"
gsutil -m defacl ch -u $SVC_ACCOUNT:R gs://$BUCKET
gsutil -m acl ch -u $SVC_ACCOUNT:R -r gs://$BUCKET  # error message (if bucket is empty) can be ignored
gsutil -m acl ch -u $SVC_ACCOUNT:W gs://$BUCKET

Authorizing the Cloud ML Service account service-895454266418@cloud-ml.google.com.iam.gserviceaccount.com to access files in springiwkhyatiab1


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   235    0   235    0     0    533      0 --:--:-- --:--:-- --:--:--   532
No changes to gs://springiwkhyatiab1/
No changes to gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00000-of-00010
No changes to gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00001-of-00010
No changes to gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00002-of-00010
No changes to gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00003-of-00010
No changes to gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00004-of-00010
No changes to gs://springiwkhyatiab1/verbosity/data/verbosity_problem-dev-00005-of-00010
No changes to gs://springiwkhyat

In [76]:
%%bash
BASE=gs://${BUCKET}/verbosity/data
OUTDIR=gs://${BUCKET}/verbosity/subset
gsutil -m rm -r $OUTDIR
gsutil -m cp \
    ${BASE}/${PROBLEM}-train-0008* \
    ${BASE}/${PROBLEM}-dev-00000*  \
    ${BASE}/vocab* \
    $OUTDIR

CommandException: 1 files/objects could not be removed.
Copying gs://springiwkhyatiab1/verbosity/data/verbosity_problem-train-00080-of-00090 [Content-Type=application/octet-stream]...
Copying gs://springiwkhyatiab1/verbosity/data/verbosity_problem-train-00081-of-00090 [Content-Type=application/octet-stream]...
/ [0 files][    0.0 B/ 15.4 KiB]                                                / [0 files][    0.0 B/ 30.9 KiB]                                                Copying gs://springiwkhyatiab1/verbosity/data/verbosity_problem-train-00082-of-00090 [Content-Type=application/octet-stream]...
/ [0 files][    0.0 B/ 46.5 KiB]                                                Copying gs://springiwkhyatiab1/verbosity/data/verbosity_problem-train-00083-of-00090 [Content-Type=application/octet-stream]...
Copying gs://springiwkhyatiab1/verbosity/data/verbosity_problem-train-00084-of-00090 [Content-Type=application/octet-stream]...
/ [0 files][    0.0 B/ 62.5 KiB]                             

In [77]:
%%bash
OUTDIR=gs://${BUCKET}/verbosity/subset
gsutil -m rm -r $OUTDIR

Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-dev-00000-of-00010#1556595194064657...
Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00080-of-00090#1556595193602150...
Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00081-of-00090#1556595193624376...
Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00082-of-00090#1556595193608702...
Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00083-of-00090#1556595193625731...
Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00084-of-00090#1556595193684211...
/ [1/12 objects]   8% Done                                                      Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00085-of-00090#1556595193855863...
/ [2/12 objects]  16% Done                                                      Removing gs://springiwkhyatiab1/verbosity/subset/verbosity_problem-train-00086-of-00

In [None]:
with open("../data/myTest.tsv", 'r') as f:
    lines = f.readlines()
fw = open("../data/test.txt", "w")
splold = ""
for line in lines:
  spl = line.split("\t")
  if (spl[0] != splold):
    fw.write(spl[0].strip() + "\n")
    splold = spl[0]
fw.close()

In [None]:
with open("../data/myTrain_uniquePairs.tsv", 'r') as f:
    lines = f.readlines()
fw = open("../data/train.txt", "w")
splold = ""
for line in lines:
  spl = line.split("\t")
  if (spl[0] != splold):
    fw.write(spl[0].strip() + "\n")
    splold = spl[0]
fw.close()
print(len(lines))

In [10]:
%%bash
chmod 110 ../data/test.txt

In [11]:
%%bash
chmod 110 ../data/train.txt

In [78]:
%%bash
# GPU="--train_steps=250000 --cloud_mlengine --worker_gpu=1 --hparams_set=transformer_verbosity"

DATADIR=gs://${BUCKET}/verbosity/data
OUTDIR=gs://${BUCKET}/verbosity/model_full_validated_30k
JOBNAME=verbosity_$(date -u +%y%m%d_%H%M%S)
# echo $OUTDIR $REGION $JOBNAME
# gsutil -m rm -rf $OUTDIR
echo 'Y' | t2t-trainer \
  --data_dir=$DATADIR \
  --t2t_usr_dir=./verbosity/trainer \
  --problem=$PROBLEM \
  --model=transformer \
  --output_dir=$OUTDIR \
  --train_steps=30000 \
  --worker_gpu=1\
  --cloud_mlengine\
  --hparams_set=transformer_verbosity 

:::MLPv0.5.0 transformer 1556595261.143214226 (/usr/local/envs/py3env/bin/t2t-trainer:28) run_start
:::MLPv0.5.0 transformer 1556595261.143814564 (/usr/local/envs/py3env/bin/t2t-trainer:28) run_set_random_seed
Confirm (Y/n)? > 

  from ._conv import register_converters as _register_converters
INFO:tensorflow:Importing user module trainer from path /content/datalab/notebooks/verbosity
INFO:tensorflow:Launching job transformer_verbosity_problem_t2t_20190430_033421 with ML Engine spec:
{'trainingInput': {'region': 'us-east1', 'masterType': 'standard_p100', 'pythonVersion': '3.5', 'runtimeVersion': '1.9', 'scaleTier': 'CUSTOM', 'args': ['--model=transformer', '--tmp_dir=/tmp/t2t_datagen', '--eval_use_test_set=False', '--hparams=', '--use_tpu_estimator=False', '--ps_gpu=0', '--export_saved_model=False', '--tfdbg=False', '--eval_early_stopping_metric_delta=0.1', '--tpu_num_shards=8', '--timit_paths=', '--locally_shard_to_cpu=False', '--registry_help=False', '--keep_checkpoint_max=20', '--iterations_per_loop=100', '--eval_run_autoregressive=False', '--sync=False', '--eval_early_stopping_metric=loss', '--ps_replicas=0', '--profile=False', '--disable_ffmpeg=False', '--worker_replicas=1', '--schedule=continuous_train_an

In [91]:
%%bash
# same as the above training job ...
TOPDIR=gs://${BUCKET}
OUTDIR=${TOPDIR}/verbosity/model_full_validated_30k
DATADIR=${TOPDIR}/verbosity/data
MODEL=transformer
HPARAMS=transformer_verbosity

# the file with the test lines
DECODE_FILE= '../data/test.txt'

BEAM_SIZE=4
ALPHA=0.6
echo 'Y' | t2t-decoder \
  --data_dir=$DATADIR \
  --problem=$PROBLEM \
  --model=$MODEL \
  --hparams_set=$HPARAMS \
  --output_dir=$OUTDIR \
  --t2t_usr_dir=./verbosity/trainer \
  --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
  --decode_from_file=$DECODE_FILE

:::MLPv0.5.0 transformer 1556601453.711891174 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/utils/expert_utils.py:231) model_hp_layer_postprocess_dropout: 0.0
:::MLPv0.5.0 transformer 1556601453.713026762 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_hidden_layers: 3
:::MLPv0.5.0 transformer 1556601453.713855505 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_attention_num_heads: 8
:::MLPv0.5.0 transformer 1556601453.714758396 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_attention_dropout: 0.0
:::MLPv0.5.0 transformer 1556601454.134764433 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/layers/transformer_layers.py:182) model_hp_ffn_filter: {"use_bias": "True", "filter_size": 2048, "activation": "relu"}
:::MLPv0.5.0 transformer 1556601454.135778904 (/usr/local/envs/py3env/lib/python3.5

../data/test.txt: line 1: Following: command not found
../data/test.txt: line 2:  I felt like I was a big part of the team. I went through the struggles last year and I wanted to be a part of it in the good times. : command not found
../data/test.txt: line 3: Property: command not found
../data/test.txt: line 4: Two: command not found
../data/test.txt: line 14: syntax error near unexpected token `('
../data/test.txt: line 14: `" We see that crimson ( from the Alabama loss ) every day, " tight end Lorenzo Diamond said, " and that's a big motivational factor for us.'
  from ._conv import register_converters as _register_converters
INFO:tensorflow:Importing user module trainer from path /content/datalab/notebooks/verbosity
Instructions for updating:
When switching to tf.estimator.Estimator, use tf.estimator.RunConfig instead.
INFO:tensorflow:Configuring DataParallelism to replicate the model.
INFO:tensorflow:schedule=continuous_train_and_eval
INFO:tensorflow:worker_gpu=1
INFO:tensorflow:s

In [10]:
%%bash
# same as the above training job ...
TOPDIR=gs://${BUCKET}
OUTDIR=${TOPDIR}/verbosity/model_full_validated
DATADIR=${TOPDIR}/verbosity/data
MODEL=transformer
HPARAMS=transformer_verbosity

# the file with the input lines
DECODE_FILE= ../data/train.txt

BEAM_SIZE=4
ALPHA=0.6
echo 'Y' | t2t-decoder \
  --data_dir=$DATADIR \
  --problem=$PROBLEM \
  --model=$MODEL \
  --hparams_set=$HPARAMS \
  --output_dir=$OUTDIR \
  --t2t_usr_dir=./verbosity/trainer \
  --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
  --decode_from_file=$DECODE_FILE\
  --decode_to_file = compressed2.en

:::MLPv0.5.0 transformer 1555041225.698273182 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/utils/expert_utils.py:231) model_hp_layer_postprocess_dropout: 0.0
:::MLPv0.5.0 transformer 1555041225.699419975 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_hidden_layers: 3
:::MLPv0.5.0 transformer 1555041225.700225353 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_attention_num_heads: 8
:::MLPv0.5.0 transformer 1555041225.700978518 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_attention_dropout: 0.0
:::MLPv0.5.0 transformer 1555041226.140788555 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/layers/transformer_layers.py:182) model_hp_ffn_filter: {"use_bias": "True", "activation": "relu", "filter_size": 2048}
:::MLPv0.5.0 transformer 1555041226.141757488 (/usr/local/envs/py3env/lib/python3.5

../data/train.txt: line 1:  Except for this small vocal minority, we have just not gotten a lot of groundswell against this from members, : command not found
../data/train.txt: line 2: Date: command not found
../data/train.txt: line 13: The: command not found
../data/train.txt: line 1207: Do: command not found
../data/train.txt: line 1218: The: command not found
../data/train.txt: line 1219: Timex: command not found
../data/train.txt: line 1220: The: command not found
../data/train.txt: line 1221: Critics: command not found
../data/train.txt: line 1224: Customers: command not found
../data/train.txt: line 1240: Sony: command not found
../data/train.txt: line 1241: Each: command not found
../data/train.txt: line 1242: The: command not found
../data/train.txt: line 1244: Forster: command not found
../data/train.txt: line 1244: A: command not found
../data/train.txt: line 1245: Much: command not found
../data/train.txt: line 1248: P.S.: command not found
../data/train.txt: line 1249: The:

In [7]:
%%bash
# same as the above training job ...
TOPDIR=gs://${BUCKET}
OUTDIR=${TOPDIR}/verbosity/model_full_validated_30k
DATADIR=${TOPDIR}/verbosity/data
MODEL=transformer
HPARAMS=transformer_verbosity

# the file with the input lines
DECODE_FILE= ../data/train.txt

BEAM_SIZE=4
ALPHA=0.6
echo 'Y' | t2t-decoder \
  --data_dir=$DATADIR \
  --problem=$PROBLEM \
  --model=$MODEL \
  --hparams_set=$HPARAMS \
  --output_dir=$OUTDIR \
  --t2t_usr_dir=./verbosity/trainer \
  --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
  --decode_from_file=$DECODE_FILE\
  --decode_to_file = compressed.en

:::MLPv0.5.0 transformer 1556585696.561118841 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/utils/expert_utils.py:231) model_hp_layer_postprocess_dropout: 0.0
:::MLPv0.5.0 transformer 1556585696.562500238 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_hidden_layers: 3
:::MLPv0.5.0 transformer 1556585696.563466787 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_attention_num_heads: 8
:::MLPv0.5.0 transformer 1556585696.564400911 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/models/transformer.py:101) model_hp_attention_dropout: 0.0
:::MLPv0.5.0 transformer 1556585697.010720730 (/usr/local/envs/py3env/lib/python3.5/site-packages/tensor2tensor/layers/transformer_layers.py:182) model_hp_ffn_filter: {"activation": "relu", "filter_size": 2048, "use_bias": "True"}
:::MLPv0.5.0 transformer 1556585697.012073517 (/usr/local/envs/py3env/lib/python3.5

../data/train.txt: line 1:  Except for this small vocal minority, we have just not gotten a lot of groundswell against this from members, : command not found
../data/train.txt: line 2: Date: command not found
../data/train.txt: line 13: The: command not found
../data/train.txt: line 1207: Do: command not found
../data/train.txt: line 1218: The: command not found
../data/train.txt: line 1219: Timex: command not found
../data/train.txt: line 1220: The: command not found
../data/train.txt: line 1221: Critics: command not found
../data/train.txt: line 1224: Customers: command not found
../data/train.txt: line 1240: Sony: command not found
../data/train.txt: line 1241: Each: command not found
../data/train.txt: line 1242: The: command not found
../data/train.txt: line 1244: Forster: command not found
../data/train.txt: line 1244: A: command not found
../data/train.txt: line 1245: Much: command not found
../data/train.txt: line 1248: P.S.: command not found
../data/train.txt: line 1249: The:

In [None]:
from google.datalab.ml import TensorBoard
TensorBoard().start('gs://{}/verbosity/model_full_validated_30k'.format(BUCKET))

In [12]:
for pid in TensorBoard.list()['pid']:
    TensorBoard().stop(pid)
    print('Stopped TensorBoard with pid {}'.format(pid))

Stopped TensorBoard with pid 3805


In [None]:
%%bash

DATADIR=gs://${BUCKET}/verbosity/data
OUTDIR=gs://${BUCKET}/verbosity/model_hparam
JOBNAME=verbosity_$(date -u +%y%m%d_%H%M%S)
# echo $OUTDIR $REGION $JOBNAME
# gsutil -m rm -rf $OUTDIR
echo 'Y' | t2t-trainer \
  --data_dir=$DATADIR \
  --t2t_usr_dir=./verbosity/trainer \
  --problem=$PROBLEM \
  --model=transformer \
  --cloud_mlengine\
  --hparams_set=transformer_verbosity \
  --output_dir=$OUTDIR \
  --hparams_range=transformer_verbosity_range \
  --autotune_objective='metrics-verbosity_line_problem/accuracy_per_sequence' \
  --autotune_maximize \
  --autotune_max_trials=4 \
  --train_steps=10000

In [None]:
# Exporting model for deployment

In [11]:
%%bash
pip install ipykernel

pip install tensorflow_hub

Collecting tensorflow_hub
  Downloading https://files.pythonhosted.org/packages/10/5c/6f3698513cf1cd730a5ea66aec665d213adf9de59b34f362f270e0bd126f/tensorflow_hub-0.4.0-py2.py3-none-any.whl (75kB)
Installing collected packages: tensorflow-hub
Successfully installed tensorflow-hub-0.4.0


In [None]:
%%bash
TOPDIR=gs://${BUCKET}
OUTDIR=${TOPDIR}/verbosity/model_full_validated_30k
DATADIR=${TOPDIR}/verbosity/data
MODEL=transformer
HPARAMS=transformer_verbosity
BEAM_SIZE=4
ALPHA=0.6

t2t-exporter \
  --model=$MODEL \
  --hparams_set=$HPARAMS \
  --problem=$PROBLEM \
  --t2t_usr_dir=./verbosity/trainer \
  --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
  --data_dir=$DATADIR \
  --output_dir=$OUTDIR

In [13]:
# Serving model

In [36]:
%%bash
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/verbosity/model_full_validated_30k/export | tail -1)
echo $MODEL_LOCATION
saved_model_cli show --dir $MODEL_LOCATION --tag_set serve --signature_def serving_default

gs://springiwkhyatiab1/verbosity/model_full_validated_30k/export/1556587334/
The given SavedModel SignatureDef contains the following input(s):
  inputs['input'] tensor_info:
      dtype: DT_STRING
      shape: (-1)
      name: serialized_example:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['batch_prediction_key'] tensor_info:
      dtype: DT_INT32
      shape: (-1, 1)
      name: DatasetToSingleElement:0
  outputs['outputs'] tensor_info:
      dtype: DT_INT32
      shape: (-1, -1)
      name: transformer/strided_slice_10:0
  outputs['scores'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1)
      name: transformer/strided_slice_11:0
Method name is: tensorflow/serving/predict


  from ._conv import register_converters as _register_converters


In [37]:
%%writefile mlengine.json
description: Verbosity reduction service on ML Engine
autoScaling:
    minNodes: 1  # We don't want this model to autoscale down to zero


Overwriting mlengine.json


In [30]:
%%bash
gcloud ai-platform models create verbosity

Created ml engine model [projects/spring2019iw/models/verbosity].


In [45]:
%%bash
MODEL_NAME="verbosity"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/verbosity/model_full_validated_30k/export | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
gcloud ai-platform versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
#gcloud ml-engine models create ${MODEL_NAME} --regions $REGION
gcloud alpha ai-platform versions create --machine-type=mls1-highcpu-4 ${MODEL_VERSION} \
       --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version=1.5 --config=mlengine.json

Deleting and deploying verbosity v1 from gs://springiwkhyatiab1/verbosity/model_full_validated_30k/export/1556587334/ ... this will take a few minutes


This will delete version [v1]...

Do you want to continue (Y/n)?  Please enter 'y' or 'n':  Please enter 'y' or 'n':  Please enter 'y' or 'n':  Please enter 'y' or 'n':  
Deleting version [v1]......
.done.


In [47]:
%%bash
gcloud components update --quiet
gcloud components install alpha --quiet


All components are up to date.

All components are up to date.


In [51]:
%%bash
MODEL_NAME="verbosity"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/verbosity/model_full_validated_30k/export | tail -1)
gcloud alpha ai-platform versions create --machine-type=mls1-highcpu-4 ${MODEL_VERSION} \
       --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version=1.5 --config=mlengine.json

ERROR: (gcloud.alpha.ai-platform.versions.create) ALREADY_EXISTS: Field: version.name Error: A version with the same name already exists.
- '@type': type.googleapis.com/google.rpc.BadRequest
  fieldViolations:
  - description: A version with the same name already exists.
    field: version.name


In [54]:
%%bash
git clone https://github.com/GoogleCloudPlatform/training-data-analyst/courses/machine_learning/deepdive/09_sequence/application/

Cloning into 'training-data-analyst'...
