In [1]:
!pip install git+https://github.com/PatDunajewski/ZASN.git -q

[K     |████████████████████████████████| 1.2 MB 13.1 MB/s 
[K     |████████████████████████████████| 4.6 MB 42.7 MB/s 
[K     |████████████████████████████████| 1.4 MB 36.0 MB/s 
[K     |████████████████████████████████| 4.3 MB 57.2 MB/s 
[K     |████████████████████████████████| 367 kB 56.3 MB/s 
[K     |████████████████████████████████| 1.1 MB 38.9 MB/s 
[K     |████████████████████████████████| 5.8 MB 44.1 MB/s 
[K     |████████████████████████████████| 385 kB 50.1 MB/s 
[K     |████████████████████████████████| 193 kB 59.4 MB/s 
[K     |████████████████████████████████| 352 kB 51.4 MB/s 
[K     |████████████████████████████████| 981 kB 48.0 MB/s 
[K     |████████████████████████████████| 48 kB 4.0 MB/s 
[K     |████████████████████████████████| 79 kB 4.9 MB/s 
[K     |████████████████████████████████| 251 kB 52.2 MB/s 
[K     |████████████████████████████████| 191 kB 55.7 MB/s 
[K     |████████████████████████████████| 191 kB 69.7 MB/s 
[K     |███████████████████

In [2]:
from bigbird.core import flags, modeling, utils
from bigbird.classifier import run_classifier
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from tqdm import tqdm
import pandas as pd
import numpy as np
import time
import sys
import os

FLAGS = flags.FLAGS
if not hasattr(FLAGS, "f"): flags.DEFINE_string("f", "", "")
FLAGS(sys.argv)

tf.enable_v2_behavior()

In [3]:
FLAGS.data_dir = "tfds://imdb_reviews/plain_text"
FLAGS.attention_type = "block_sparse"
FLAGS.max_encoder_length = 2048  # reduce for quicker demo on free colab (4096)
FLAGS.learning_rate = 1e-5
FLAGS.num_train_steps = 1500 #(2000)
FLAGS.num_warmup_steps = 1
FLAGS.save_checkpoints_steps = 10
FLAGS.vocab_model_file = "gpt2"
FLAGS.norm_type = 'postnormAlbert'
FLAGS.embeddings_size = 32
FLAGS.hidden_size = 768
FLAGS.optimizer = "Adam"
FLAGS.train_batch_size = 2

bert_config = flags.as_dictionary()

In [4]:
model = modeling.BertModel(bert_config)
headl = run_classifier.ClassifierLossLayer(
        bert_config["hidden_size"], bert_config["num_labels"],
        bert_config["hidden_dropout_prob"],
        utils.create_initializer(bert_config["initializer_range"]),
        name=bert_config["scope"]+"/classifier")

In [5]:
@tf.function(experimental_compile=True)
def fwd_bwd(features, labels):
  with tf.GradientTape() as g:
    _, pooled_output = model(features, training=True)
    loss, log_probs = headl(pooled_output, labels, True)
  grads = g.gradient(loss, model.trainable_weights+headl.trainable_weights)
  return loss, log_probs, grads

In [None]:
train_input_fn = run_classifier.input_fn_builder(
        data_dir=FLAGS.data_dir,
        vocab_model_file=FLAGS.vocab_model_file,
        max_encoder_length=FLAGS.max_encoder_length,
        substitute_newline=FLAGS.substitute_newline,
        is_training=True)
dataset = train_input_fn({'batch_size': FLAGS.train_batch_size})

[1mDownloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to ~/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

In [None]:
#Traning 
opt = tf.keras.optimizers.Adam(FLAGS.learning_rate)
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

#my adds
print_number = 10 #number of prints
print_step = max(1, int(FLAGS.num_train_steps / print_number)) # creating 10 prints during traning

for i, ex in enumerate(tqdm(dataset.take(FLAGS.num_train_steps), position=0)):
  loss, log_probs, grads = fwd_bwd(ex[0], ex[1])
  opt.apply_gradients(zip(grads, model.trainable_weights+headl.trainable_weights))
  train_loss(loss)
  train_accuracy(tf.one_hot(ex[1], 2), log_probs)
  if i% print_step == 0:
    print('Loss = {:.4f}  Accuracy = {:.4f}'.format(train_loss.result().numpy(), train_accuracy.result().numpy()))

In [None]:
#Eval
@tf.function(experimental_compile=True)
def fwd_only(features, labels):
  _, pooled_output = model(features, training=False)
  loss, log_probs = headl(pooled_output, labels, False)
  return loss, log_probs

eval_input_fn = run_classifier.input_fn_builder(
        data_dir=FLAGS.data_dir,
        vocab_model_file=FLAGS.vocab_model_file,
        max_encoder_length=FLAGS.max_encoder_length,
        substitute_newline=FLAGS.substitute_newline,
        is_training=False)
eval_dataset = eval_input_fn({'batch_size': FLAGS.eval_batch_size})

eval_loss = tf.keras.metrics.Mean(name='eval_loss')
eval_accuracy = tf.keras.metrics.CategoricalAccuracy(name='eval_accuracy')

for ex in tqdm(eval_dataset, position=0):
  loss, log_probs = fwd_only(ex[0], ex[1])
  eval_loss(loss)
  eval_accuracy(tf.one_hot(ex[1], 2), log_probs)
print('Loss = {:.4f}  Accuracy = {:.4f}'.format(eval_loss.result().numpy(), eval_accuracy.result().numpy()))