In [None]:
%tensorflow_version 1.x
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [None]:
!mkdir -p drive
!google-drive-ocamlfuse drive
import os
os.chdir("drive/electra-paper/")

In [None]:
import datetime
import json
import os
import pprint
import random
import string
import sys
import tensorflow as tf

assert 'COLAB_TPU_ADDR' in os.environ, 'ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!'
TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
print('TPU address is', TPU_ADDRESS)

from google.colab import auth
auth.authenticate_user()

with tf.Session(TPU_ADDRESS) as session:
  print('TPU devices:')
  pprint.pprint(session.list_devices())

  # Upload credentials to TPU.
  with open('/content/adc.json', 'r') as f:
    auth_info = json.load(f)
  tf.contrib.cloud.configure_gcs(session, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.

In [None]:
FEATURES = ['title', 'keywords', 'abstract']
if "abstract" in FEATURES: 
    max_seq_length = 512
elif len(FEATURES) == 2: 
    max_seq_length = 256
else: 
    max_seq_length = 128

hparams = {
    "task_names": ["springer-paper"],
    "features": FEATURES,
    "model_dir": "gs://paper/electra-large-springer-{}/".format("+".join(FEATURES)), 
    "preprocessed_data_dir": "gs://paper/electra-large-springer-{}/".format("+".join(FEATURES)),

    "model_size": "large",
    "max_seq_length": max_seq_length,
    "vocab_file": "gs://bert-eng/electra_large/vocab.txt",
    "init_checkpoint": "gs://bert-eng/electra_large/electra_large",
    "do_lower_case": True, 
    "keep_all_models": True,

    "do_train": False,
    "train_batch_size": 32,
    "num_train_epochs": 10.0,
    "save_checkpoints_steps": 1000, 
    "iterations_per_loop": 1000,
    "keep_checkpoint_max": 40,
    "use_tfrecords_if_existing": False,

    "do_eval": True,
    "eval_key": "top1_accuracy",
    "do_test": True,
    "eval_batch_size": 32,
    "predict_batch_size": 32,
    "results_txt": "gs://paper/electra-large-springer-{}/results.txt".format("+".join(FEATURES)),
    "results_pkl": "gs://paper/electra-large-springer-{}/results.pkl".format("+".join(FEATURES)),
    
    "use_tpu": True,
    "num_tpu_cores": 8,
    "tpu_name": TPU_ADDRESS,
}
import json
with open("paper_config.json", "w") as outfile:
    json.dump(hparams, outfile)
!python3 run_finetuning.py \
    --data-dir=../data/springer-paper/ \
    --model-name=test \
    --hparams=paper_config.json