# Setup

In [1]:
!pip install -q tf-models-official==2.3.0
!pip install transformers

[K     |████████████████████████████████| 849kB 16.9MB/s 
[K     |████████████████████████████████| 358kB 57.7MB/s 
[K     |████████████████████████████████| 37.6MB 88kB/s 
[K     |████████████████████████████████| 1.2MB 50.7MB/s 
[K     |████████████████████████████████| 102kB 5.9MB/s 
[K     |████████████████████████████████| 174kB 59.5MB/s 
[?25h  Building wheel for py-cpuinfo (setup.py) ... [?25l[?25hdone
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/88/b1/41130a228dd656a1a31ba281598a968320283f48d42782845f6ba567f00b/transformers-4.2.2-py3-none-any.whl (1.8MB)
[K     |████████████████████████████████| 1.8MB 15.5MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 40.5MB/s 
Collecting tokenizers==0.9.4
[?25l  Downloading https://files.pythonhosted.org/pac

In [2]:
import os
import json

import re

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from sklearn.model_selection import train_test_split

This directory contains the configuration, vocabulary, and a pre-trained checkpoint used in this tutorial:

# Dataset

In [3]:
# import sys
# !git clone "https://github.com/giuliofortini/NLP_SQuAD_Project"
# sys.path.append("NLP_SQuAD_Project/")

from google.colab import files
try:
  with open('training_set.json') as f:
    json_data = json.load(f)
except:
  files.upload()
  with open('training_set.json') as f:
    json_data = json.load(f)

Saving training_set.json to training_set.json


In [4]:
data = pd.json_normalize(json_data['data'])
data

Unnamed: 0,title,paragraphs
0,University_of_Notre_Dame,"[{'context': 'Architecturally, the school has ..."
1,Beyoncé,[{'context': 'Beyoncé Giselle Knowles-Carter (...
2,Montana,[{'context': 'Montana i/mɒnˈtænə/ is a state i...
3,Genocide,"[{'context': 'The phrase ""in whole or in part""..."
4,Antibiotics,[{'context': 'The emergence of resistance of b...
...,...,...
437,Police,[{'context': 'A police force is a constituted ...
438,"Punjab,_Pakistan","[{'context': 'Punjab (Urdu, Punjabi: پنجاب, pa..."
439,Infection,[{'context': ' Among the vast varieties of mic...
440,Hunting,[{'context': 'Hunting is the practice of killi...


In [5]:
train, test = train_test_split(data, test_size=0.2)

In [6]:
def preprocess_df(df):
  temp = []
  title_dict = {}
  contexts = []

  for i, row in df.iterrows():
    for context in row['paragraphs']:
      contexts.append(context['context'])
      for qa in context['qas']:
        question_id = qa['id']
        question = qa['question']
        for answer in qa['answers']:
          answer_text = answer['text']
          answer_start = answer['answer_start']
          answer_end = answer_start+len(answer_text)
          temp.append([question_id, question, answer_text, answer_start, answer_end, i, len(contexts)-1])


  context_dict = dict(enumerate(contexts))

  df = pd.DataFrame(temp, columns=['question_id', 'question_text', 'answer_text', 'answer_start', 'answer_end', 'title_id', 'context_id'])
  
  return df, context_dict

train_df, train_context_dict = preprocess_df(train)
test_df, test_context_dict = preprocess_df(test)

In [7]:
train_df

Unnamed: 0,question_id,question_text,answer_text,answer_start,answer_end,title_id,context_id
0,572fe570947a6a140053cdc4,Islamic World is another way to refer to what?,Muslim world,9,21,399,0
1,572fe570947a6a140053cdc5,"When speaking of religion, what does Ummah ref...",those who adhere to the teachings of Islam,187,229,399,0
2,572fe570947a6a140053cdc6,"When speaking about culture, what does Ummah r...",Islamic civilization,303,323,399,0
3,572fe570947a6a140053cdc7,"Community, or Nation are direct translations o...",Ummah,59,64,399,0
4,572fed4e04bcaa1900d76ecd,When did the Islamic Golden age start?,622,155,158,399,1
...,...,...,...,...,...,...,...
69702,57281099ff5b5019007d9c42,Which two wrote Cultural studies [were] ground...,Lindlof and Taylor,22,40,297,15029
69703,57281099ff5b5019007d9c43,"From a Marxist view, what did they believe had...",the economic base,1035,1052,297,15029
69704,572818dfff5b5019007d9d2a,How many ways did Raimon Panikkar believed cul...,29 ways,28,35,297,15030
69705,572818dfff5b5019007d9d2b,What is the term used to describe what Moderni...,enlightenment,398,411,297,15030


In [8]:
import random
def print_squad_sample(train_data, context_dict, line_length=120, separator_length=150):
  sample = train_data.sample(frac=1).head(1)
  context = context_dict[sample['context_id'].item()]
  print('='*separator_length)
  print('CONTEXT: ')
  print('='*separator_length)
  lines = [''.join(context[idx:idx+line_length]) for idx in range(0, len(context), line_length)]
  for l in lines:
      print(l)
  #print(context)
  print('='*separator_length)
  questions = train_data[train_data['context_id'] == sample['context_id'].item()]
  print('QUESTION:', ' '*(3*separator_length//4), 'ANSWER:')
  for idx, row in questions.iterrows():
    question = row.question_text
    answer = row.answer_text
    print(question, ' '*(3*separator_length//4-len(question)+9), answer)

print_squad_sample(train_df, train_context_dict)

CONTEXT: 
Alexandra was born days before Kerry began law school. In 1982, Julia asked Kerry for a separation while she was sufferi
ng from severe depression. They were divorced on July 25, 1988, and the marriage was formally annulled in 1997. "After 1
4 years as a political wife, I associated politics only with anger, fear and loneliness" she wrote in A Change of Heart,
 her book about depression. Thorne later married Richard Charlesworth, an architect, and moved to Bozeman, Montana, wher
e she became active in local environmental groups such as the Greater Yellowstone Coalition. Thorne supported Kerry's 20
04 presidential run. She died of cancer on April 27, 2006.
QUESTION:                                                                                                                  ANSWER:
When did Kerry and his wife separate?                                                                                      1982
When did Kerry and his wife divorce?                               

# Encoding

#Tokenizer

In [9]:
from transformers import BertTokenizer, BertModel

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

print('[[CLS], [SEP]] = {}'.format(tokenizer.convert_tokens_to_ids(['[CLS]', '[SEP]'])))

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…


[[CLS], [SEP]] = [101, 102]


In [10]:
# Split the sentence into tokens.
tokenized_text = tokenizer.tokenize(train_df['question_text'][0])

# Print out the tokens.
print(tokenized_text)

# Map the token strings to their vocabulary indeces.
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

# Display the words with their indeces.
for tup in zip(tokenized_text, indexed_tokens):
    print('{:<12} {:>6,}'.format(tup[0], tup[1]))

['islamic', 'world', 'is', 'another', 'way', 'to', 'refer', 'to', 'what', '?']
islamic       5,499
world         2,088
is            2,003
another       2,178
way           2,126
to            2,000
refer         6,523
to            2,000
what          2,054
?             1,029


In [11]:
def from_df_to_model_dict(df, context_dict):

  # initialize structures
  input_ids = []
  input_mask = []
  input_type_ids = []

  cut_counter = 0

  for i, row in df.iterrows():

    # print progress
    if i % 1000 == 0 and i >0:
      print(i)
      break

    # encode question and context
    encoded_question = tokenizer.encode(row['question_text'])
    encoded_context = tokenizer.encode(context_dict[row['context_id']])
    encoded_answer = tokenizer.encode(row['answer_text'])
    # concatenate input data and crop at 510 (bert max seq len = 512)
    encoded_input = encoded_question + encoded_context

    encoded_input = encoded_input[:(512-len(encoded_answer))]

    if len(encoded_input) > 512:
      cut_counter += 1

    
    # add answer_start and answer_end to encoded_input
    #encoded_input = encoded_input + tokenizer.encode([row['answer_start']] + [row['answer_end']]
    encoded_input = encoded_input + encoded_answer
                                                     
    # create mask of ones
    ones_mask = tf.ones_like(encoded_input)

    # add padding and convert to tensor
    encoded_input = tf.keras.preprocessing.sequence.pad_sequences([encoded_input], maxlen=512, padding='pre')
    encoded_input = tf.squeeze(tf.convert_to_tensor(encoded_input))

    # create input_type_ids
    type_ids = tf.concat([tf.zeros(tf.shape(encoded_input)[-1]-len(encoded_answer), dtype=tf.int32), tf.ones(len(encoded_answer), dtype=tf.int32)], axis=-1)

    # create mask of zeros
    zeros_mask = tf.zeros(tf.shape(encoded_input)[-1]-tf.shape(ones_mask)[-1], dtype=tf.int32)
    mask = tf.concat([zeros_mask, ones_mask], axis=-1)

    # append elements to lists
    input_ids.append(encoded_input)
    input_mask.append(mask)
    input_type_ids.append(type_ids)

  print("cut_counter: ", cut_counter)

  # save input data as dictionary
  inputs = {
    'input_ids': tf.convert_to_tensor(input_ids),
    'input_mask': tf.convert_to_tensor(input_mask),
    'input_type_ids': tf.convert_to_tensor(input_type_ids)
    }

  return inputs

In [12]:
train_dict = from_df_to_model_dict(train_df, train_context_dict)
test_dict = from_df_to_model_dict(test_df, test_context_dict)
train_dict

Token indices sequence length is longer than the specified maximum sequence length for this model (589 > 512). Running this sequence through the model will result in indexing errors


10000
cut_counter:  0
10000
cut_counter:  0


{'input_ids': <tf.Tensor: shape=(10000, 512), dtype=int32, numpy=
 array([[    0,     0,     0, ...,  5152,  2088,   102],
        [    0,     0,     0, ...,  1997,  7025,   102],
        [    0,     0,     0, ...,  5499, 10585,   102],
        ...,
        [    0,     0,     0, ..., 20098,  3900,   102],
        [    0,     0,     0, ...,  9474,  2813,   102],
        [    0,     0,     0, ...,  6205,  2509,   102]], dtype=int32)>,
 'input_mask': <tf.Tensor: shape=(10000, 512), dtype=int32, numpy=
 array([[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1]], dtype=int32)>,
 'input_type_ids': <tf.Tensor: shape=(10000, 512), dtype=int32, numpy=
 array([[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..

In [13]:
print(train_dict['input_ids'][0])
print(train_dict['input_mask'][0])
print(train_dict['input_type_ids'][0])

tf.Tensor(
[    0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0    

In [14]:
# Set up epochs and steps
epochs = 3
batch_size = 32

train_data_size = len(train_dict['input_ids'])
test_data_size = len(test_dict['input_ids'])
train_steps_per_epoch = int(train_data_size / batch_size)
test_steps_per_epoch = int(test_data_size / batch_size)

# creates an optimizer with learning rate schedule
# optimizer = nlp.optimization.create_optimizer(
#     2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)

In [15]:
from transformers import TFBertModel
model = TFBertModel.from_pretrained('bert-base-uncased')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=536063208.0, style=ProgressStyle(descri…




Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [16]:
print(train_dict['input_type_ids'].shape)
print(train_dict['input_ids'].shape)
print(train_dict['input_mask'].shape)

print(test_dict['input_type_ids'].shape)
print(test_dict['input_ids'].shape)
print(test_dict['input_mask'].shape)

(10000, 512)
(10000, 512)
(10000, 512)
(10000, 512)
(10000, 512)
(10000, 512)


In [17]:
def train_model(train_dict, steps, batch_size=32):
  for i in range(0, steps):
    input = {'input_ids' : train_dict['input_ids'][i:i+batch_size],
            'input_mask' : train_dict['input_mask'][i:i+batch_size],
            'input_type_ids' : train_dict['input_type_ids'][i:i+batch_size]
            }
    print('Batch: ' + str(i+1) + '/' + str(steps))
    model(input, training = True)

train_model(train_dict, train_steps_per_epoch)

Batch: 1/312
Batch: 2/312
Batch: 3/312
Batch: 4/312
Batch: 5/312
Batch: 6/312
Batch: 7/312
Batch: 8/312
Batch: 9/312
Batch: 10/312
Batch: 11/312
Batch: 12/312
Batch: 13/312
Batch: 14/312
Batch: 15/312
Batch: 16/312
Batch: 17/312
Batch: 18/312
Batch: 19/312
Batch: 20/312
Batch: 21/312
Batch: 22/312
Batch: 23/312
Batch: 24/312
Batch: 25/312
Batch: 26/312
Batch: 27/312
Batch: 28/312
Batch: 29/312
Batch: 30/312
Batch: 31/312
Batch: 32/312
Batch: 33/312
Batch: 34/312
Batch: 35/312
Batch: 36/312
Batch: 37/312
Batch: 38/312
Batch: 39/312
Batch: 40/312
Batch: 41/312
Batch: 42/312
Batch: 43/312
Batch: 44/312
Batch: 45/312
Batch: 46/312
Batch: 47/312
Batch: 48/312
Batch: 49/312
Batch: 50/312
Batch: 51/312
Batch: 52/312
Batch: 53/312
Batch: 54/312
Batch: 55/312
Batch: 56/312
Batch: 57/312
Batch: 58/312
Batch: 59/312
Batch: 60/312
Batch: 61/312
Batch: 62/312
Batch: 63/312
Batch: 64/312
Batch: 65/312
Batch: 66/312
Batch: 67/312
Batch: 68/312
Batch: 69/312
Batch: 70/312
Batch: 71/312
Batch: 72/312
B

In [20]:
def test_model(test_dict, steps, batch_size=32):
  output = []

  for i in range(0, steps):
    input = {'input_ids' : test_dict['input_ids'][i:i+batch_size],
            'input_mask' : test_dict['input_mask'][i:i+batch_size],
            'input_type_ids' : test_dict['input_type_ids'][i:i+batch_size]
            }
    print('Batch: ' + str(i+1) + '/' + str(steps))
    output.append(model(input))
  
  return output

test_answers = test_model(test_dict, test_steps_per_epoch)

Batch: 1/312
Batch: 2/312
Batch: 3/312
Batch: 4/312
Batch: 5/312
Batch: 6/312
Batch: 7/312
Batch: 8/312
Batch: 9/312
Batch: 10/312
Batch: 11/312
Batch: 12/312
Batch: 13/312
Batch: 14/312
Batch: 15/312
Batch: 16/312
Batch: 17/312
Batch: 18/312
Batch: 19/312
Batch: 20/312
Batch: 21/312
Batch: 22/312
Batch: 23/312
Batch: 24/312
Batch: 25/312
Batch: 26/312
Batch: 27/312
Batch: 28/312
Batch: 29/312
Batch: 30/312
Batch: 31/312


In [21]:
print(test_answers[0].last_hidden_state)

tf.Tensor(
[[[-0.4465253   0.48536447  0.06580014 ... -0.46521625  0.41014314
   -0.70420337]
  [-0.6818489  -0.2513821   0.5476869  ... -0.3495175   0.79528636
   -0.9575961 ]
  [-0.33148208  0.00827573  0.59227216 ... -0.5239812   0.08607539
   -1.0944345 ]
  ...
  [-1.0840538   0.35074574  0.7235429  ...  0.43597403  0.7232471
   -0.30706415]
  [-0.08872701 -1.2204639   1.1156192  ... -0.00800674 -0.06154624
   -1.140662  ]
  [ 0.55041736  0.35058394  0.09902531 ... -0.11558585 -0.48601955
   -0.3842366 ]]

 [[-0.44596645  0.5361833   0.12666115 ... -0.4712901   0.39068177
   -0.7523664 ]
  [-0.51936555 -0.13520418  0.48739904 ... -0.37786257  0.79161364
   -0.98738134]
  [-0.2422797   0.05043562  0.59685683 ... -0.52779245  0.10194599
   -1.0443174 ]
  ...
  [ 0.19997582  0.23308192 -0.20614423 ...  0.37549567 -0.14534032
    0.47882754]
  [-0.29459804  0.00491316 -0.3172179  ... -0.82158023  0.09964563
   -0.937211  ]
  [ 0.55980796  0.37440133  0.11636388 ... -0.08858828 -0.49803

In [22]:
idx = test_answers[0].last_hidden_state[0]
idx

<tf.Tensor: shape=(512, 768), dtype=float32, numpy=
array([[-0.4465253 ,  0.48536447,  0.06580014, ..., -0.46521625,
         0.41014314, -0.70420337],
       [-0.6818489 , -0.2513821 ,  0.5476869 , ..., -0.3495175 ,
         0.79528636, -0.9575961 ],
       [-0.33148208,  0.00827573,  0.59227216, ..., -0.5239812 ,
         0.08607539, -1.0944345 ],
       ...,
       [-1.0840538 ,  0.35074574,  0.7235429 , ...,  0.43597403,
         0.7232471 , -0.30706415],
       [-0.08872701, -1.2204639 ,  1.1156192 , ..., -0.00800674,
        -0.06154624, -1.140662  ],
       [ 0.55041736,  0.35058394,  0.09902531, ..., -0.11558585,
        -0.48601955, -0.3842366 ]], dtype=float32)>

In [23]:
tokenizer.decode(np.argmax(idx, axis=1))

'[unused200] [unused617] [unused303] [unused303] [unused303] [unused303] [unused268] [unused268] [unused268] [unused303] [unused268] [unused268] [unused268] [unused303] [unused268] [unused268] [unused268] [unused268] [unused303] [unused303] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused303] [unused268] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused303] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [unused268] [un

In [24]:
int_indexes = []
for id in idx:
  int_indexes.append([id[-2], id[-1]])

In [25]:
idx[0]

<tf.Tensor: shape=(768,), dtype=float32, numpy=
array([-4.46525306e-01,  4.85364467e-01,  6.58001378e-02,  4.71078724e-01,
       -4.78263587e-01, -1.06993869e-01,  1.12684917e+00, -3.90133917e-01,
        1.38233513e-01,  1.15629986e-01, -1.96294591e-01, -6.29772484e-01,
       -2.84878433e-01,  5.34518957e-01,  5.14786899e-01,  4.44629073e-01,
        1.03844553e-01,  6.91295564e-01, -2.27408230e-01,  9.72810313e-02,
        4.71048623e-01, -7.89988190e-02,  7.51327097e-01,  1.58075243e-01,
       -6.11185990e-02,  1.17476657e-02, -2.52782345e-01, -1.48210025e+00,
       -7.44268894e-01, -1.95111185e-01, -5.63248217e-01,  7.19994545e-01,
       -1.90814175e-02, -5.12300432e-01,  4.11216706e-01, -2.07561985e-01,
       -4.91175473e-01, -2.02412933e-01,  8.61362278e-01, -1.97490662e-01,
       -1.84415698e-01, -4.09926832e-01,  5.78411400e-01, -8.86907354e-02,
       -3.24657738e-01, -1.69748947e-01, -3.83694077e+00,  1.88484922e-01,
       -4.50488269e-01, -9.69166040e-01, -1.16077825

In [26]:
tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(idx[0]))

'[PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [unused0] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [UNK] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [UNK] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [unused0] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [unused0] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [unused0] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [UNK] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [UNK] [PAD] [unused0] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [UNK] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [UNK] [PAD] [PAD] [PAD] [PAD] [PAD] [unused0] [PAD] [PAD] [PA