In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

In [2]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
tf.__version__

'2.10.0'

In [4]:
from datasets import load_dataset, load_from_disk
data = load_dataset('conll2003')

  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset conll2003 (C:/Users/sajit/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98)
100%|██████████| 3/3 [00:00<00:00, 585.69it/s]


In [5]:
data['train'].features['ner_tags'].feature.names

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

In [6]:
oov_token = '[UNK]'
tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token=oov_token, lower=True)
tokenizer.fit_on_texts([' '.join(x) for x in data['train']['tokens']])
len(tokenizer.word_index)


17680

In [7]:
def tokenize_and_align_labels(examples, label_all_tokens=True):
  tokenized_inputs = [tokenizer.texts_to_sequences(token) for token in examples['tokens']]
  new_tokenized_inputs = []
  labels = []
  word_ids_list = []
  for i,tokenized_input in enumerate(tokenized_inputs):
    ner_tags = examples['ner_tags'][i]
    label_ids = []
    word_ids = []
    tokenized_sentence = []
    for j,tokenized_words in enumerate(tokenized_input):
      if tokenized_words:
        tokenized_sentence.extend(tokenized_words)
        word_ids.extend([j]*len(tokenized_words))
        label_ids.append(ner_tags[j])
        for k in range(len(tokenized_words)-1):
          label_ids.append(ner_tags[j] if label_all_tokens else -1)
    labels.append(label_ids)
    word_ids_list.append(word_ids)
    new_tokenized_inputs.append(tokenized_sentence)
  return {'input_ids': new_tokenized_inputs, 'word_ids': word_ids_list, 'labels': labels}


In [8]:
def input_and_label_pad_sequence(examples, maxlen=100):
  return { 'input_ids': tf.keras.preprocessing.sequence.pad_sequences(examples['input_ids'],maxlen = maxlen, padding='post', truncating='post'),
           'labels': tf.keras.preprocessing.sequence.pad_sequences(examples['labels'],maxlen = maxlen, padding='post', truncating='post',value=-1 )}

In [9]:
def create_padding_mask(examples):
    seq = 1 - (np.array(examples['input_ids'])==0)
    return {"attention_mask": seq}

In [10]:
maxlen = 100

data = data.map(tokenize_and_align_labels, batched=True).map(input_and_label_pad_sequence, batched=True).map(create_padding_mask, batched=True)

                                                                    

In [11]:
data

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'input_ids', 'word_ids', 'labels', 'attention_mask'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'input_ids', 'word_ids', 'labels', 'attention_mask'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'input_ids', 'word_ids', 'labels', 'attention_mask'],
        num_rows: 3453
    })
})

In [11]:
train = tf.data.Dataset.from_tensor_slices((data['train']['input_ids'],data['train']['attention_mask'],data['train']['labels'])).map(lambda x,y,z: (tf.concat((x,y),axis=-1),z)).shuffle(10000).batch(64)
validation = tf.data.Dataset.from_tensor_slices((data['validation']['input_ids'],data['validation']['attention_mask'],data['validation']['labels'])).map(lambda x,y,z: (tf.concat((x,y),axis=-1),z)).batch(64)
test = tf.data.Dataset.from_tensor_slices((data['test']['input_ids'],data['test']['attention_mask'])).map(lambda x,y: tf.concat((x,y),axis=-1)).batch(64)

In [21]:
transformer.predict(test)

NameError: name 'transformer' is not defined

In [13]:
# @tf.keras.utils.register_keras_serializable()
def positional_encoding(positions, d_model):

    position = np.arange(positions)[:, np.newaxis]
    k = np.arange(d_model)[np.newaxis, :]
    i = k // 2

    # initialize a matrix angle_rads of all the angles
    angle_rates = 1 / np.power(10000, (2 * i) / np.float32(d_model))
    angle_rads = position * angle_rates

    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

In [18]:
a = tf.constant([1,2,3])
a//2

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 1, 1])>

In [14]:
# @tf.keras.utils.register_keras_serializable()
def create_padding_mask(decoder_token_ids):

    seq = 1 - tf.cast(tf.math.equal(decoder_token_ids, 0), tf.float32)
    return seq[:, tf.newaxis, :]

In [12]:
@tf.keras.utils.register_keras_serializable()
def FullyConnected(embedding_dim, fully_connected_dim):
  return tf.keras.Sequential([
      tf.keras.layers.Dense(fully_connected_dim, activation='relu'),
      tf.keras.layers.Dense(embedding_dim)
  ])

In [19]:
tf.keras.utils.get_custom_objects().clear()

In [20]:
@tf.keras.utils.register_keras_serializable()
class EncoderLayer(tf.keras.layers.Layer):

  def __init__(self, embedding_dim, num_heads, fully_connected_dim,
               dropout_rate=0.1, layernorm_eps=1e-6):

    super(EncoderLayer, self).__init__()

    self.mha = tf.keras.layers.MultiHeadAttention(
        num_heads=num_heads,
        key_dim=embedding_dim,
        dropout=dropout_rate
    )

    self.ffn = FullyConnected(
        embedding_dim=embedding_dim,
        fully_connected_dim=fully_connected_dim
    )

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=layernorm_eps)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=layernorm_eps)

    self.dropout_ffn = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x, training, mask=None):

    self_mha_output = self.mha(x,x,x,mask)
    skip_x_attention = self.layernorm1(x+self_mha_output)
    ffn_output = self.ffn(skip_x_attention)
    ffn_output = self.dropout_ffn(ffn_output, training=training)
    encoder_layer_out = self.layernorm2(skip_x_attention+ffn_output)

    return encoder_layer_out
  
  def get_config(self):
    return {"mha": self.mha,
            "ffn": self.ffn,
            "layernorm1": self.layernorm1,
            "layernorm2": self.layernorm2,
            "dropout_ffn": self.dropout_ffn}

<function __main__.EncoderLayer.get_config(self)>

In [25]:
# @tf.keras.utils.register_keras_serializable()
class Encoder(tf.keras.layers.Layer):

  def __init__(self, num_layers, embedding_dim, num_heads, fully_connected_dim, vocab_size,
               maximum_position_encoding, dropout_rate=0.1, layernorm_eps=1e-6):
    super(Encoder, self).__init__()

    self.embedding_dim = embedding_dim

    self.num_layers = num_layers

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.pos_encoding = tf.keras.layers.Embedding(maximum_position_encoding,
                                            self.embedding_dim)

    self.enc_layers = [EncoderLayer(embedding_dim=embedding_dim,
                                    num_heads=num_heads,
                                    fully_connected_dim=fully_connected_dim,
                                    dropout_rate=dropout_rate,
                                    layernorm_eps=layernorm_eps
                                    )
                      for _ in range(num_layers)]

    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x, training, mask):
    seq_len = tf.shape(x)[1]

    x = self.embedding(x)

    positions = tf.range(start=0, limit=seq_len, delta=1)

    positional_encoding = self.pos_encoding(positions)

    x = x + positional_encoding

    x = self.dropout(x, training=training)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training, mask)

    return x

In [26]:
# @tf.keras.utils.register_keras_serializable()
class NERModel(tf.keras.Model):

  def __init__(self, num_tags, num_layers, embedding_dim, num_heads, fully_connected_dim,
               vocab_size, max_positional_encoding,dropout_rate=0.1,
               layernorm_eps=1e-6):

    super(NERModel, self).__init__()

    self.encoder = Encoder(     num_layers=num_layers,
                                embedding_dim=embedding_dim,
                                num_heads=num_heads,
                                fully_connected_dim=fully_connected_dim,
                                vocab_size=vocab_size,
                                maximum_position_encoding=max_positional_encoding,
                                dropout_rate=dropout_rate,
                                layernorm_eps=layernorm_eps)
    self.dropout1 = tf.keras.layers.Dropout(rate=dropout_rate)
    self.dropout2 = tf.keras.layers.Dropout(rate=dropout_rate)
    self.ffn = tf.keras.layers.Dense(fully_connected_dim, activation='relu')
    self.ffn_final = tf.keras.layers.Dense(num_tags,activation='softmax')

  def call(self, x, training):
    seq_length = tf.shape(x)[-1]//2
    mask = x[:,seq_length:]
    mask = mask[:,tf.newaxis,:]
    x = x[:,:seq_length]
    x = self.encoder(x,training, mask)
    x = self.dropout1(x, training = training)
    x = self.ffn(x)
    x = self.dropout2(x, training=training)
    x = self.ffn_final(x)
    return x




In [24]:
data['train'].features['ner_tags']

Sequence(feature=ClassLabel(names=['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC'], id=None), length=-1, id=None)

In [20]:
len(tokenizer.word_index)

17680

In [41]:
dummy = tf.keras.models.load_model('model')





ValueError: Unable to create a Keras model from SavedModel at model. This SavedModel was exported with `tf.saved_model.save`, and lacks the Keras metadata file. Please save your Keras model by calling `model.save` or `tf.keras.models.save_model`. Note that you can still load this SavedModel with `tf.saved_model.load`.

In [40]:
transformer.save('model', 'tf')



INFO:tensorflow:Assets written to: model\assets


INFO:tensorflow:Assets written to: model\assets


NotImplementedError: Learning rate schedule must override get_config

In [15]:
num_layers = 2
num_tags = 9
vocab_size = len(tokenizer.word_index)+1
embedding_dim = 100
fully_connected_dim = 100
num_heads = 3
positional_encoding_length = 150

# transformer = NERModel(
#     num_tags,
#     num_layers,
#     embedding_dim,
#     num_heads,
#     fully_connected_dim,
#     vocab_size,
#     positional_encoding_length,
# )


In [22]:
encoder_layer = EncoderLayer(embedding_dim=embedding_dim, num_heads=num_heads, fully_connected_dim=fully_connected_dim)
encoder_layer.build()


TypeError: build() missing 1 required positional argument: 'input_shape'

In [31]:
class CustomNonPaddingTokenLoss(tf.keras.losses.Loss):
    def __init__(self, name="custom_ner_loss"):
        super().__init__(name=name)
        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False, reduction='none')

    def call(self, y_true, y_pred):
        mask = tf.math.logical_not(tf.math.equal(y_true, -1))
        mask = tf.cast(mask, dtype=tf.int32)

        loss = self.loss_object(y_true*mask, y_pred)

        mask = tf.cast(mask, dtype=loss.dtype)
        loss *= mask

        return tf.reduce_sum(loss)/tf.reduce_sum(mask)




In [32]:
def acc(y_true, y_pred):
    mask = tf.math.logical_not(tf.math.equal(y_true, -1))
    mask = tf.cast(mask,dtype=tf.float32)
    pred = tf.cast(tf.argmax(y_pred,axis=-1),dtype=tf.float32)
    correct = tf.cast(tf.equal(y_true,pred),dtype=tf.float32)
    correct*=mask
    return tf.reduce_sum(correct)/tf.reduce_sum(mask)


In [33]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()
        self.d_model = tf.cast(d_model, dtype=tf.float32)
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        step = tf.cast(step, dtype=tf.float32)
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

learning_rate = CustomSchedule(embedding_dim)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [34]:
transformer.compile(optimizer=optimizer, loss=[CustomNonPaddingTokenLoss()], metrics=[acc])

In [35]:
transformer.fit(train,
                validation_data=(validation),
                epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x154e3fdd548>

In [36]:
transformer.variables

[<tf.Variable 'ner_model/encoder/embedding/embeddings:0' shape=(17681, 100) dtype=float32, numpy=
 array([[-0.02629833,  0.01534941, -0.035016  , ..., -0.01422877,
         -0.03357284,  0.04740101],
        [-0.0356002 ,  0.0200103 , -0.02456339, ..., -0.02154188,
          0.02853814,  0.01406108],
        [-0.02267813, -0.02439498,  0.01109215, ...,  0.02795291,
         -0.03260009, -0.04064712],
        ...,
        [ 0.02365335, -0.00689765,  0.05581006, ..., -0.00050443,
         -0.03156359, -0.05429408],
        [ 0.037636  , -0.03537535, -0.00998073, ...,  0.03594239,
         -0.00322863,  0.03882029],
        [-0.01710856, -0.03856236,  0.02623395, ..., -0.02852531,
         -0.01995139, -0.04506009]], dtype=float32)>,
 <tf.Variable 'ner_model/encoder/embedding_1/embeddings:0' shape=(150, 100) dtype=float32, numpy=
 array([[ 0.03082116,  0.00633969, -0.00461204, ...,  0.01809022,
         -0.00633068, -0.00925122],
        [ 0.00080076, -0.00333675, -0.01760201, ..., -0.010

In [98]:
transformer(test)

ValueError: Exception encountered when calling layer "ner_model_4" "                 f"(type NERModel).

Attempt to convert a value (<BatchDataset element_spec=TensorSpec(shape=(None, 200), dtype=tf.int32, name=None)>) with an unsupported type (<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>) to a Tensor.

Call arguments received by layer "ner_model_4" "                 f"(type NERModel):
  • x=<BatchDataset element_spec=TensorSpec(shape=(None, 200), dtype=tf.int32, name=None)>
  • training=None

In [41]:
example = data['test'][22]
example['tokens'],example['ner_tags'],example['word_ids'],example['labels'][:len(example['word_ids'])]

(['RUGBY',
  'UNION',
  '-',
  'CUTTITTA',
  'BACK',
  'FOR',
  'ITALY',
  'AFTER',
  'A',
  'YEAR',
  '.'],
 [3, 4, 0, 1, 0, 0, 5, 0, 0, 0, 0],
 [0, 1, 3, 4, 5, 6, 7, 8, 9],
 [3, 4, 1, 0, 0, 5, 0, 0, 0])

In [42]:
[np.argmax(x) for x in transformer.predict(test)[22]]



[3,
 4,
 0,
 0,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 7,
 7,
 7,
 3,
 7,
 0,
 0,
 0,
 0,
 0,
 0]

In [34]:
label_list = data['train'].features['ner_tags'].feature.names

In [37]:
label_list

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

In [75]:
from sklearn.metrics import precision_score, recall_score, f1_score 

def compute_metrics(labels, pred_logits):

    pred_logits = np.argmax(pred_logits, axis=-1)

    predictions = []
    for prediction, label in zip(pred_logits, labels):
        for eval_pred, l in zip(prediction, label):
            if l!= -1:
                predictions.append(eval_pred)

    true_labels = []
    for prediction, label in zip(pred_logits, labels):
        for eval_pred, l in zip(prediction, label):
            if l!= -1:
                true_labels.append(l)

    precision = precision_score(true_labels, predictions, average='macro')
    recall = recall_score(true_labels, predictions, average='macro')
    f1 = f1_score(true_labels, predictions, average='macro')
    
    return {'precision': precision,
            'recall': recall,
            'f1_score': f1 }


In [74]:
labels = data['test']['labels']
pred_logits = transformer.predict(test)

compute_metrics((pred_logits,labels))



{'precision': 0.7354912726458616,
 'recall': 0.6560956424169582,
 'f1_score': 0.6889829013166076}