<a href="https://colab.research.google.com/github/masies/CRA/blob/main/Finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x
!pip3 install --upgrade pip
#!pip install -qU t5
!pip3 install git+https://github.com/google-research/text-to-text-transfer-transformer.git #extra_id_x support

import functools
import os
import time
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import tensorflow.compat.v1 as tf
import tensorflow_datasets as tfds

import t5

#Set the base dir(Google cloud bucket)
BASE_DIR = "gs://example_comment" 

if not BASE_DIR or BASE_DIR == "gs://":
  raise ValueError("You must enter a BASE_DIR.")
ON_CLOUD = True


if ON_CLOUD:
  import tensorflow_gcs_config
  from google.colab import auth
  # Set credentials for GCS reading/writing from Colab and TPU.
  TPU_TOPOLOGY = "2x2"
  try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    TPU_ADDRESS = tpu.get_master()
    print('Running on TPU:', TPU_ADDRESS)
  except ValueError:
    raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
  auth.authenticate_user()
  tf.config.experimental_connect_to_host(TPU_ADDRESS)
  tensorflow_gcs_config.configure_gcs_from_colab_auth()

tf.disable_v2_behavior()

# Improve logging.
from contextlib import contextmanager
import logging as py_logging

if ON_CLOUD:
  tf.get_logger().propagate = False
  py_logging.root.setLevel('INFO')

@contextmanager
def tf_verbosity_level(level):
  og_level = tf.logging.get_verbosity()
  tf.logging.set_verbosity(level)
  yield
  tf.logging.set_verbosity(og_level)

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 4.9MB/s 
[?25hInstalling collected packages: pip
  Found existing installation: pip 19.3.1
    Uninstalling pip-19.3.1:
      Successfully uninstalled pip-19.3.1
Successfully installed pip-21.0.1
Collecting git+https://github.com/google-research/text-to-text-transfer-transformer.git
  Cloning https://github.com/google-research/text-to-text-transfer-transformer.git to /tmp/pip-req-build-m7t44n6l
  Running command git clone -q https://github.com/google-research/text-to-text-transfer-transformer.git /tmp/pip-req-build-m7t44n6l
Collecting mesh-tensorflow[transformer]>=0.1.13
  Downloading mesh_tensorflow-0.1.19-py3-none-any.whl (366 kB)
[K     |████████████████████████████████| 366 kB 6.5 MB/s 
Collecting rouge-score
  Downloading rouge_score-0.0.4-py2.py3-none-any

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
nq_tsv_path_code_code = {
    "train":      'gs://code_review_automation/dataset/new/fineTuningDatasets/code_code/train.tsv',
    "validation": 'gs://code_review_automation/dataset/new/fineTuningDatasets/code_code/val.tsv'
}
num_nq_examples_code_code = dict(train=134442, validation=16805)

nq_tsv_path_code_comment = {
    "train":      'gs://code_review_automation/dataset/new/fineTuningDatasets/code_comment/train.tsv',
    "validation": 'gs://code_review_automation/dataset/new/fineTuningDatasets/code_comment/val.tsv'
}
num_nq_examples_code_comment = dict(train=134442, validation=16805)

nq_tsv_path_codeANDcomment_code = {
    "train":      'gs://code_review_automation/dataset/new/fineTuningDatasets/codeANDcomment_code/train.tsv',
    "validation": 'gs://code_review_automation/dataset/new/fineTuningDatasets/codeANDcomment_code/val.tsv'
}
num_nq_examples_codeANDcomment_code = dict(train=134442, validation=16805)

nq_tsv_path_marked_code = {
    "train":      'gs://code_review_automation/dataset/new/fineTuningDatasets/marked_code/train.tsv',
    "validation": 'gs://code_review_automation/dataset/new/fineTuningDatasets/marked_code/val.tsv'
}
num_nq_examples_marked_code = dict(train=134442, validation=16805)



In [3]:
from t5.data import postprocessors as t5_postprocessors
from t5.seqio import Feature,SentencePieceVocabulary

# # Set the path of sentencepiece model and vocab files
# # Must be the same used for the pre-trained phase

vocab_model_path = 'gs://code_review_automation/CodeReviewModel/TestModel.model'
vocab_path = 'gs://code_review_automation/CodeReviewModel/TestModel.vocab'

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

def get_default_vocabulary():
  return SentencePieceVocabulary(vocab_model_path, 100)

DEFAULT_OUTPUT_FEATURES = {
    "inputs": Feature(
        vocabulary=get_default_vocabulary(), add_eos=True, required=False),

    "targets": Feature(
        vocabulary=get_default_vocabulary(), add_eos=True)
}


############### FIRST TASK ###############
print("1st TASK : code 2 code")

def nq_dataset_code_code(split, shuffle_files=True):
  # We only have one file for each split.
  del shuffle_files

  # Load lines from the text file as examples.
  ds = tf.data.TextLineDataset(nq_tsv_path_code_code[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE)
  
  ds = ds.map(lambda *ex: dict(zip(["input", "output"], ex)))
  return ds

print("A few raw validation examples...")
for ex in tfds.as_numpy(nq_dataset_code_code("validation").take(3)):
  print(ex)
print("A few raw training examples...")
for ex in tfds.as_numpy(nq_dataset_code_code("train").take(3)):
  print(ex)

def code_code_preprocessing(ds):
  def to_inputs_and_targets(ex):
        inputs = tf.strings.join(['code2code: ' + ex['input']], separator=' ')
        class_label = tf.strings.join([ex['output']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }
    
  return ds.map(to_inputs_and_targets, 
                num_parallel_calls=tf.data.experimental.AUTOTUNE)
  
t5.data.TaskRegistry.remove('code_code')
t5.data.TaskRegistry.add(
    "code_code",
    dataset_fn=nq_dataset_code_code,
    splits=["train", "validation"],
    text_preprocessor=[code_code_preprocessing],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples=num_nq_examples_code_code
)

nq_task = t5.data.TaskRegistry.get("code_code")
ds = nq_task.get_dataset(split="train", sequence_length={"inputs": 512, "targets": 512})
print("A few preprocessed training examples...")
for ex in tfds.as_numpy(ds.take(3)):
  print(ex)




############### SECOND TASK ###############
print("2nd TASK : code 2 comment")

def nq_dataset_code_comment(split, shuffle_files=False):
  # We only have one file for each split.
  del shuffle_files

  # Load lines from the text file as examples.
  ds = tf.data.TextLineDataset(nq_tsv_path_code_comment[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE)
  
  ds = ds.map(lambda *ex: dict(zip(["input", "output"], ex)))
  return ds

print("A few raw validation examples...")
for ex in tfds.as_numpy(nq_dataset_code_comment("validation").take(3)):
  print(ex)
print("A few raw training examples...")
for ex in tfds.as_numpy(nq_dataset_code_comment("train").take(3)):
  print(ex)

def code_comment_preprocessing(ds):
  def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['code2comment: ' + ex['input']], separator=' ')
        class_label = tf.strings.join([ex['output']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }
    
  return ds.map(to_inputs_and_targets, 
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

#Create a new training task
t5.data.TaskRegistry.remove('code_comment')
t5.data.TaskRegistry.add(
    "code_comment",
    dataset_fn=nq_dataset_code_comment,
    splits=["train", "validation"],
    text_preprocessor=[code_comment_preprocessing],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples=num_nq_examples_code_comment
)

nq_task = t5.data.TaskRegistry.get("code_comment")
ds = nq_task.get_dataset(split="train", sequence_length={"inputs": 512, "targets": 512})
print("A few preprocessed training examples...")
for ex in tfds.as_numpy(ds.take(3)):
  print(ex)



############### THIRD TASK ###############
print("3rd TASK : code and comment 2 code")

def nq_dataset_codeANDcomment_code(split, shuffle_files=False):
  # We only have one file for each split.
  del shuffle_files

  # Load lines from the text file as examples.
  ds = tf.data.TextLineDataset(nq_tsv_path_codeANDcomment_code[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE)
  
  ds = ds.map(lambda *ex: dict(zip(["input", "output"], ex)))
  return ds

print("A few raw validation examples...")
for ex in tfds.as_numpy(nq_dataset_codeANDcomment_code("validation").take(3)):
  print(ex)
print("A few raw training examples...")
for ex in tfds.as_numpy(nq_dataset_codeANDcomment_code("train").take(3)):
  print(ex)

def codeANDcomment_code_preprocessing(ds):
  
  def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['code&comment2code: ' + ex['input']], separator=' ')
        class_label = tf.strings.join([ex['output']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }
    
  return ds.map(to_inputs_and_targets, 
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

#Create a new training task
t5.data.TaskRegistry.remove('codeANDcomment_code')
t5.data.TaskRegistry.add(
    "codeANDcomment_code",
    dataset_fn=nq_dataset_codeANDcomment_code,
    splits=["train", "validation"],
    text_preprocessor=[codeANDcomment_code_preprocessing],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples=num_nq_examples_codeANDcomment_code
)

nq_task = t5.data.TaskRegistry.get("codeANDcomment_code")
ds = nq_task.get_dataset(split="train", sequence_length={"inputs": 512, "targets": 512})
print("A few preprocessed training examples...")
for ex in tfds.as_numpy(ds.take(3)):
  print(ex)



############### FOURTH TASK ###############
print("4th TASK : marked code 2 code")

def nq_dataset_marked_code(split, shuffle_files=False):
  # We only have one file for each split.
  del shuffle_files

  # Load lines from the text file as examples.
  ds = tf.data.TextLineDataset(nq_tsv_path_marked_code[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE)
  
  ds = ds.map(lambda *ex: dict(zip(["input", "output"], ex)))
  return ds

print("A few raw validation examples...")
for ex in tfds.as_numpy(nq_dataset_marked_code("validation").take(3)):
  print(ex)
print("A few raw training examples...")
for ex in tfds.as_numpy(nq_dataset_marked_code("train").take(3)):
  print(ex)

def marked_code_preprocessing(ds):
  
  def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['markedCode2code: ' + ex['input']], separator=' ')
        class_label = tf.strings.join([ex['output']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }
    
  return ds.map(to_inputs_and_targets, 
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

#Create a new training task
t5.data.TaskRegistry.remove('marked_code')
t5.data.TaskRegistry.add(
    "marked_code",
    dataset_fn=nq_dataset_marked_code,
    splits=["train", "validation"],
    text_preprocessor=[marked_code_preprocessing],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples=num_nq_examples_marked_code
)

nq_task = t5.data.TaskRegistry.get("marked_code")
ds = nq_task.get_dataset(split="train", sequence_length={"inputs": 512, "targets": 512})
print("A few preprocessed training examples...")
for ex in tfds.as_numpy(ds.take(3)):
  print(ex)

1st TASK : code 2 code
A few raw validation examples...
{'input': b'public boolean getRequiredParamBoolean(final String key) throws ActionParamsException { final String errMsg = "Required parameter \'" + key + "\' missing!"; final String val = getRequiredParam(key, errMsg); try { return Boolean.parseBoolean(val); } catch (Exception e) { throw new ActionParamsException(errMsg); } }', 'output': b'public boolean getRequiredParamBoolean(final String key) throws ActionParamsException { final String val = getRequiredParam(key); try { return Boolean.parseBoolean(val); } catch (Exception e) { throw new ActionParamsException(e.getMessage()); } }'}
{'input': b'private void close(final boolean clean) { if (state() == State.CREATED) { transitionTo(State.CLOSING); } else { if (state() == State.RUNNING) { if (clean) commit(); transitionTo(State.CLOSING); } if (state() == State.CLOSING) { StateManagerUtil.closeStateManager(log, logPrefix, clean, false, stateMgr, stateDirectory, "standby"); } else { t

  _tokenize, num_parallel_calls=tf.data.experimental.AUTOTUNE)


A few preprocessed training examples...
{'inputs_pretokenized': b'code2code: protected GuidedDecisionTableEditorPresenter getPresenter() { return new GuidedDecisionTableEditorPresenter(view, dtServiceCaller, docks, mock(PerspectiveManager.class), notification, decisionTableSelectedEvent, validationPopup, resourceType, editMenuBuilder, viewMenuBuilder, insertMenuBuilder, radarMenuBuilder, modeller, beanManager, placeManager, columnsPage, saveAndRenameCommandBuilder, alertsButtonMenuItemBuilder, downloadMenuItem) { @Override protected Command getSaveAndRenameCommand() { return mock(Command.class); } }; }', 'inputs': array([   89,   111,   220,    20,   395,  5749,    94,  9718,   645,
        2922, 10815,    99, 10815,    43,    15,    42,    33,  5749,
          94,  9718,   645,  2922, 10815,     9,  1239,    11,  6889,
         365,  7955,    11,  1402,  7982,    11,  2433,     9, 16549,
         433,     8,   142,   334,  2660,    11,  5833,   645,  3932,
         566,    11,  1204, 

In [4]:
def _rate_num_input_examples(task):
  if "train" in task.splits:
    return float(task.num_input_examples("train"))
  elif "validation" in task.splits:
    return float(task.num_input_examples("validation"))
  else:
    raise ValueError("Task %s does not have a train or validation split." % (task.name))


t5.data.MixtureRegistry.remove("all_tasks")
t5.data.MixtureRegistry.add(
    "all_tasks",
    # ["code_comment"],
    # ["marked_code"],
    # ["code_code"],
    # ["codeANDcomment_code"],
    ["codeANDcomment_code", "code_code", "marked_code", "code_comment"],
    default_rate=_rate_num_input_examples
     #default_rate=1.0
)


# from mesh_tensorflow.transformer.learning_rate_schedules import slanted_triangular 

# from mesh_tensorflow.transformer.learning_rate_schedules import truncated_rsqrt
 
# from tensorflow.keras.optimizers.schedules import PolynomialDecay

# starter_learning_rate = 0.05
# end_learning_rate = 0.001
# decay_steps = 10000

# learning_rate_fn = PolynomialDecay(
#     starter_learning_rate,
#     decay_steps,
#     end_learning_rate,
#     power=0.5)


MODEL_SIZE = "small"

############ CHANGE HERE ############
MODEL_DIR = 'gs://code_review_automation/fine_tuning/HP_tuning/big_model/mixture/constant'

# Specify the pre-trained dir which must contain the pre-trained models, the operative_config.gin file and the checkpoint file as well
PRETRAINED_DIR='gs://code_review_automation/model_dumps/'


model_parallelism, train_batch_size, keep_checkpoint_max = {
    "small": (1, 128, 60),
    "base": (2, 128, 8),
    "large": (8, 64, 4),
    "3B": (8, 16, 1),
    "11B": (8, 16, 1)}[MODEL_SIZE]

tf.io.gfile.makedirs(MODEL_DIR)

model = t5.models.MtfModel(
    model_dir=MODEL_DIR,
    tpu=TPU_ADDRESS,
    tpu_topology=TPU_TOPOLOGY,
    model_parallelism=model_parallelism,
    batch_size=train_batch_size,
    ############ CHANGE HERE ############
    learning_rate_schedule = 0.001,
    sequence_length={"inputs": 512, "targets": 512},
    save_checkpoints_steps=5000,
    keep_checkpoint_max=keep_checkpoint_max if ON_CLOUD else None,
    iterations_per_loop=100,
)

In [5]:
############ CHANGE HERE ############
!gsutil cp gs://code_review_automation/fine_tuning/constant/operative_config.gin ./operative_config.gin 
PATH_GIN_FILE = '/content/operative_config.gin'

Copying gs://code_review_automation/fine_tuning/constant/operative_config.gin...
/ [0 files][    0.0 B/ 12.6 KiB]                                                / [1 files][ 12.6 KiB/ 12.6 KiB]                                                
Operation completed over 1 objects/12.6 KiB.                                     


In [None]:
import gin

with gin.unlock_config():
    gin.parse_config_file(PATH_GIN_FILE)
    #RUN FINE-TUNING
    FINETUNE_STEPS = 50000
    model.finetune(
        mixture_or_task_name="all_tasks",
        pretrained_model_dir=PRETRAINED_DIR,
        finetune_steps=FINETUNE_STEPS
    )

INFO:root:system_path_file_exists:gs://code_review_automation/model_dumps/operative_config.gin
ERROR:root:Path not found: gs://code_review_automation/model_dumps/operative_config.gin


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/big_model/mixture/constant', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.44.40.162:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.44.40.162:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://10.44.40.162:8470', '_evaluation_ma

  _tokenize, num_parallel_calls=tf.data.experimental.AUTOTUNE)


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('heads', 'model'), ('vocab', 'model'), ('batch', 'batch'), ('d_ff', 'model'), ('experts', 'batch')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7f821c5c5fd0>
INF

In [7]:
# Use a larger batch size for evaluation, which requires less memory.
model.batch_size = 1024
model.eval(
    mixture_or_task_name="all_tasks",
    checkpoint_steps=[205000, 210000, 215000, 220000, 225000, 230000, 235000, 240000, 245000, 250000]
)

INFO:root:system_path_file_exists:gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained/operative_config.gin
ERROR:root:Path not found: gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained/operative_config.gin
INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
  _tokenize, num_parallel_calls=tf.data.experimental.AUTOTUNE)
INFO:absl:Skipping packing/padding for 'codeANDcomment_code' since sequence length is None.
INFO:absl:Setting sequence lengths to {'inputs': 869, 'targets': 179}
INFO:absl:Evaluating checkpoint step: 5000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa72808df50>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 5000: 0.000
INFO:absl:Evaluating checkpoint step: 10000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa7259d05d0>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 10000: 11.675
INFO:absl:Evaluating checkpoint step: 15000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa7262e0f90>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 15000: 12.668
INFO:absl:Evaluating checkpoint step: 20000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa727997110>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 20000: 11.967
INFO:absl:Evaluating checkpoint step: 25000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa723a51150>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 25000: 13.252
INFO:absl:Evaluating checkpoint step: 30000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa72493e110>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 30000: 13.018
INFO:absl:Evaluating checkpoint step: 35000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa724900dd0>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 35000: 14.186
INFO:absl:Evaluating checkpoint step: 40000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa727160cd0>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 40000: 13.602
INFO:absl:Evaluating checkpoint step: 45000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa72635a490>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 45000: 13.894
INFO:absl:Evaluating checkpoint step: 50000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa726ac8990>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 50000: 14.478
INFO:absl:Evaluating checkpoint step: 55000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa724fb4150>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 55000: 14.478
INFO:absl:Evaluating checkpoint step: 60000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa723ba3850>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 60000: 14.945
INFO:absl:Evaluating checkpoint step: 65000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa723b4c1d0>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 65000: 14.361
INFO:absl:Evaluating checkpoint step: 70000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa725aae110>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 70000: 14.478
INFO:absl:Evaluating checkpoint step: 75000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa724ff8510>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 75000: 14.419
INFO:absl:Evaluating checkpoint step: 80000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa723d27050>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 80000: 14.886
INFO:absl:Evaluating checkpoint step: 85000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa725db9a90>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 85000: 14.419
INFO:absl:Evaluating checkpoint step: 90000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa7251dc8d0>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 90000: 14.828
INFO:absl:Evaluating checkpoint step: 95000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa7278b2910>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 95000: 14.711
INFO:absl:Evaluating checkpoint step: 100000


INFO:tensorflow:Using config: {'_model_dir': 'gs://code_review_automation/fine_tuning/HP_tuning/final_version/comment_v1/codeANDcomment_code/non_pretrained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 5000, '_save_checkpoints_secs': None, '_session_config': graph_options {
  rewrite_options {
    disable_meta_optimizer: true
  }
}
cluster_def {
  job {
    name: "worker"
    tasks {
      key: 0
      value: "10.62.230.106:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({'worker': ['10.62.230.106:8470']}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': 'grpc://

INFO:absl:Automatically caching small dataset in memory: 'codeANDcomment_code:validation'
INFO:absl:Padding 'codeANDcomment_code' with sequence lengths: {'inputs': 869, 'targets': 179}


INFO:tensorflow:num_cores_per_replica: 1
INFO:tensorflow:computation_shape: [1, 1, 1, 1]
INFO:tensorflow:num_replicas: 8
INFO:tensorflow:device_assignment.topology.device_coordinates: [[[0 0 0 0]
  [0 0 0 1]
  [1 0 0 0]
  [1 0 0 1]
  [0 1 0 0]
  [0 1 0 1]
  [1 1 0 0]
  [1 1 0 1]]]
INFO:tensorflow:device_assignment.core_assignment: [[[0 0 0 0]]

 [[0 0 0 1]]

 [[1 0 0 0]]

 [[1 0 0 1]]

 [[0 1 0 0]]

 [[0 1 0 1]]

 [[1 1 0 0]]

 [[1 1 0 1]]]
INFO:tensorflow:auto_logical_to_physical_tpu logical_shape=[8] physical_shape=[2, 2, 2]
INFO:tensorflow:auto_logical_to_physical_tpu logical_to_physical = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (1, 0, 0), (1, 0, 1)]
INFO:tensorflow:SimdMeshImpl init: Shape[batch=8] LayoutRules{('ensemble', 'ensemble'), ('d_ff', 'model'), ('heads', 'model'), ('experts', 'batch'), ('batch', 'batch'), ('vocab', 'model')}
INFO:tensorflow:Device Assignment: <tensorflow.python.tpu.device_assignment.DeviceAssignment object at 0x7fa72657ea10>
INF

INFO:absl:eval/codeANDcomment_code/accuracy at step 100000: 14.945


In [None]:
if ON_CLOUD:
  %reload_ext tensorboard
  import tensorboard as tb
tb.notebook.start("--logdir " + MODEL_DIR)