In [65]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [66]:
import pandas as pd
file_path = '/content/drive/MyDrive/GTL_dataset/data.tsv'

In [67]:
sentences = []
pos_tags = []
ner_tags = []

# Temporary storage for words, POS, and NER tags of a sentence.
temp_sentence = []
temp_pos_tags = []
temp_ner_tags = []

with open(file_path, 'r') as file:
    for line in file:
        line = line.strip()
        # Check if the line is a sentence or a word
        # In the given dataset, five sentences had one tab separation and rest had no tab separation
        tab_count = line.count('\t')
        if tab_count == 0 or tab_count == 1:

            # If it's a sentence (zero or one tabs), finalize the previous sentence
            if temp_sentence:
                sentences.append(temp_sentence)
                pos_tags.append(temp_pos_tags)
                ner_tags.append(temp_ner_tags)
                temp_sentence = []
                temp_pos_tags = []
                temp_ner_tags = []
        else:
            # If it's a word with tags, split by tabs
            parts = line.split('\t')
            temp_sentence.append(parts[0])
            if len(parts) > 1:
                temp_pos_tags.append(parts[1])
            if len(parts) > 2:
                temp_ner_tags.append(parts[2])

# Append the last sentence if the loop ends without a sentence boundary
if temp_sentence:
    sentences.append(temp_sentence)
    pos_tags.append(temp_pos_tags)
    ner_tags.append(temp_ner_tags)

# Convert sentences, pos_tags, and ner_tags to a DataFrame for better inspection
processed_data = pd.DataFrame({
    'Sentence': [" ".join(sentence) for sentence in sentences],
    'POS_Tags': pos_tags,
    'NER_Tags': ner_tags
})

# Display the processed data
print(processed_data.head())
processed_data.POS_Tags
processed_data.NER_Tags.head()


                                            Sentence  \
0  শনিবার (২৭ আগস্ট) রাতে পটুয়াখালী সদর থানার ভা...   
1  বায়ুদূষণ ও স্মার্ট ফোন ছেলেমেয়ে উভয়ের প্রজনন ক...   
2  ছাত্র রাজনীতির বর্তমান অবস্থার শুরু হয়েছিলো ...   
3  শাকিল রাজধানীর ৩০০ ফিট, দিয়াবাড়ি ও পূর্বাচল ...   
4  সম্প্রতি ক্লাবের নবীন ব্যবস্থাপনা প্রশিক্ষণার্...   

                                            POS_Tags  \
0  [NNP, PUNCT, NNP, NNC, NNP, NNC, NNC, ADJ, NNC...   
1  [NNC, CONJ, NNC, NNC, NNC, PRO, NNC, NNC, NNC,...   
2   [NNC, NNC, ADJ, NNC, NNC, VF, NNC, NNP, NNC, PP]   
3  [NNP, NNC, QF, NNC, NNP, CONJ, NNP, NNC, NNC, ...   
4  [ADV, NNC, ADJ, NNC, NNC, CONJ, NNC, NNC, PP, ...   

                                            NER_Tags  
0  [B-D&T, B-OTH, B-D&T, B-D&T, B-GPE, I-GPE, I-G...  
1  [B-OTH, B-OTH, B-OTH, B-OTH, B-PER, B-OTH, B-O...  
2  [B-OTH, B-OTH, B-OTH, B-OTH, B-OTH, B-OTH, B-P...  
3  [B-PER, B-OTH, B-LOC, I-LOC, B-LOC, B-OTH, B-L...  
4  [B-OTH, B-ORG, B-OTH, B-OTH, B-PER, B-OTH, B-P..

Unnamed: 0,NER_Tags
0,"[B-D&T, B-OTH, B-D&T, B-D&T, B-GPE, I-GPE, I-G..."
1,"[B-OTH, B-OTH, B-OTH, B-OTH, B-PER, B-OTH, B-O..."
2,"[B-OTH, B-OTH, B-OTH, B-OTH, B-OTH, B-OTH, B-P..."
3,"[B-PER, B-OTH, B-LOC, I-LOC, B-LOC, B-OTH, B-L..."
4,"[B-OTH, B-ORG, B-OTH, B-OTH, B-PER, B-OTH, B-P..."


In [68]:
flat_pos_labels = [label for sublist in pos_tags for label in sublist]
flat_ner_labels = [label for sublist in ner_tags for label in sublist]

unique_pos_classes = set(flat_pos_labels)
pos_classes = len(unique_pos_classes)
print(f"POS classes: {unique_pos_classes}, Number of POS classes: {pos_classes}")

# Find unique NER classes
unique_ner_classes = set(flat_ner_labels)
ner_classes = len(unique_ner_classes)
print(f"NER classes: {unique_ner_classes}, Number of NER classes: {ner_classes}")


POS classes: {'VNF', 'PUNCT', 'ADJ', 'VF', 'PRO', 'OTH', 'INTJ', 'NNP', 'ADV', 'NNC', 'PP', 'CONJ', 'PART', 'QF', 'DET'}, Number of POS classes: 15
NER classes: {'B-ORG', 'I-MISC', 'I-LOC', 'B-NUM', 'I-ORG', 'B-D&T', 'B-MISC', 'B-OTH', 'I-NUM', 'I-EVENT', 'B-T&T', 'I-UNIT', 'B-LOC', 'B-GPE', 'I-D&T', 'B-UNIT', 'I-T&T', 'B-PER', 'I-PER', 'B-EVENT', 'I-GPE'}, Number of NER classes: 21


In [70]:
from transformers import BertTokenizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Tokenize the sentences
inputs = tokenizer(sentences, padding=True, truncation=True, max_length=92, is_split_into_words=True, return_tensors='np')  # Use 'np' to return numpy arrays

max_token_id = np.max(inputs['input_ids'])
vocab_size = tokenizer.vocab_size

assert max_token_id < vocab_size, f"Token ID {max_token_id} exceeds the vocab size of {vocab_size}. Check your tokenization process."


# Convert tags to label encoding
ner_label_encoder = LabelEncoder()
pos_label_encoder = LabelEncoder()

# Flatten the list of tags, fit and transform
flat_ner_tags = [item for sublist in ner_tags for item in sublist]
flat_pos_tags = [item for sublist in pos_tags for item in sublist]

ner_label_encoder.fit(flat_ner_tags)
pos_label_encoder.fit(flat_pos_tags)

encoded_ner_tags = [ner_label_encoder.transform(tag_list) for tag_list in ner_tags]
encoded_pos_tags = [pos_label_encoder.transform(tag_list) for tag_list in pos_tags]

# Pad the encoded tags to match input length
ner_tags_padded = tf.keras.preprocessing.sequence.pad_sequences(encoded_ner_tags, maxlen=92, padding='post', truncating='post')
pos_tags_padded = tf.keras.preprocessing.sequence.pad_sequences(encoded_pos_tags, maxlen=92, padding='post', truncating='post')

# Convert to numpy arrays to ensure compatibility with scikit-learn
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Split the data into train, validation, and test sets
X_train, X_temp, y_train_ner, y_temp_ner, y_train_pos, y_temp_pos = train_test_split(
    input_ids, ner_tags_padded, pos_tags_padded, test_size=0.3, random_state=42)

X_val, X_test, y_val_ner, y_test_ner, y_val_pos, y_test_pos = train_test_split(
    X_temp, y_temp_ner, y_temp_pos, test_size=0.5, random_state=42)

# Split attention_mask as well to match X_train, X_val, and X_test
attention_mask_train, attention_mask_temp = train_test_split(attention_mask, test_size=0.3, random_state=42)
attention_mask_val, attention_mask_test = train_test_split(attention_mask_temp, test_size=0.5, random_state=42)

# Convert to tf.Tensor to feed into the model
X_train = tf.convert_to_tensor(X_train)
X_val = tf.convert_to_tensor(X_val)
X_test = tf.convert_to_tensor(X_test)

y_train_ner = tf.convert_to_tensor(y_train_ner)
y_val_ner = tf.convert_to_tensor(y_val_ner)
y_test_ner = tf.convert_to_tensor(y_test_ner)

y_train_pos = tf.convert_to_tensor(y_train_pos)
y_val_pos = tf.convert_to_tensor(y_val_pos)
y_test_pos = tf.convert_to_tensor(y_test_pos)

attention_mask_train = tf.convert_to_tensor(attention_mask_train)
attention_mask_val = tf.convert_to_tensor(attention_mask_val)
attention_mask_test = tf.convert_to_tensor(attention_mask_test)


In [71]:
def pad_sequences(sequences, maxlen):
    return tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=maxlen, padding='post', truncating='post')

# Define maximum sequence length
max_seq_length = 92

# Pad sequences and labels
X_train_padded = pad_sequences(X_train, maxlen=max_seq_length)
y_train_ner_padded = pad_sequences(y_train_ner, maxlen=max_seq_length)
y_train_pos_padded = pad_sequences(y_train_pos, maxlen=max_seq_length)

X_val_padded = pad_sequences(X_val, maxlen=max_seq_length)
y_val_ner_padded = pad_sequences(y_val_ner, maxlen=max_seq_length)
y_val_pos_padded = pad_sequences(y_val_pos, maxlen=max_seq_length)

# Convert to tensors
X_train_tensor = tf.convert_to_tensor(X_train_padded)
y_train_ner_tensor = tf.convert_to_tensor(y_train_ner_padded)
y_train_pos_tensor = tf.convert_to_tensor(y_train_pos_padded)

X_val_tensor = tf.convert_to_tensor(X_val_padded)
y_val_ner_tensor = tf.convert_to_tensor(y_val_ner_padded)
y_val_pos_tensor = tf.convert_to_tensor(y_val_pos_padded)

# Example with attention masks (all ones for simplicity; adjust as needed)
attention_mask_train = np.ones_like(X_train_tensor)
attention_mask_val = np.ones_like(X_val_tensor)


In [72]:
print('Input IDs shape:', input_ids.shape)  # Should be (batch_size, sequence_length)
print('Attention Mask shape:', attention_mask.shape)
print(max_token_id)
vocab_size


Input IDs shape: (7002, 92)
Attention Mask shape: (7002, 92)
111240


119547

In [73]:
from transformers import TFBertModel
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
import tensorflow as tf


In [74]:
class BertMultiTaskModel(Model):
    def __init__(self, hidden_dim, ner_classes, pos_classes, **kwargs):
        super(BertMultiTaskModel, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-multilingual-cased')
        self.hidden_dim = hidden_dim
        self.ner_classes = ner_classes
        self.pos_classes = pos_classes
        self.dense = Dense(hidden_dim, activation='relu')
        self.ner_output = Dense(ner_classes, activation='softmax', name='ner_output')
        self.pos_output = Dense(pos_classes, activation='softmax', name='pos_output')

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        # BERT embeddings
        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state

        # Apply dense layer and separate outputs
        x = self.dense(sequence_output)
        ner_logits = self.ner_output(x)
        pos_logits = self.pos_output(x)

        return {'ner_output': ner_logits, 'pos_output': pos_logits}

    def get_config(self):
        return {
            'hidden_dim': self.hidden_dim,
            'ner_classes': self.ner_classes,
            'pos_classes': self.pos_classes
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)
    def build_from_config(self, config):
        self.__init__(**config)
        self.build((None, 92))  # Example input shape; adjust as needed

hidden_dim = 256
class BertMultiTaskModel(Model):
    def __init__(self, hidden_dim, ner_classes, pos_classes, **kwargs):
        super(BertMultiTaskModel, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-multilingual-cased')
        self.hidden_dim = hidden_dim
        self.ner_classes = ner_classes
        self.pos_classes = pos_classes
        self.dense = Dense(hidden_dim, activation='relu')
        self.ner_output = Dense(ner_classes, activation='softmax', name='ner_output')
        self.pos_output = Dense(pos_classes, activation='softmax', name='pos_output')

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        # BERT embeddings
        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state

        # Apply dense layer and separate outputs
        x = self.dense(sequence_output)
        ner_logits = self.ner_output(x)
        pos_logits = self.pos_output(x)

        return {'ner_output': ner_logits, 'pos_output': pos_logits}

    def get_config(self):
        return {
            'hidden_dim': self.hidden_dim,
            'ner_classes': self.ner_classes,
            'pos_classes': self.pos_classes
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)
    def build_from_config(self, config):
        self.__init__(**config)
        self.build((None, 92))  # Example input shape; adjust as needed

hidden_dim = 256
ner_classes = 21  # Number of NER classes
pos_classes = 15  # Number of POS classes
learning_rate = 0.0001

input_ids = Input(shape=(92,), dtype=tf.int32, name='input_ids')
attention_mask = Input(shape=(92,), dtype=tf.int32, name='attention_mask')

model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)
model({'input_ids': input_ids, 'attention_mask': attention_mask})  # This will build the model

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
    loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
    metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'}
)
model.summary()
learning_rate = 0.0001

input_ids = Input(shape=(92,), dtype=tf.int32, name='input_ids')
attention_mask = Input(shape=(92,), dtype=tf.int32, name='attention_mask')

model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)
model({'input_ids': input_ids, 'attention_mask': attention_mask})  # This will build the model

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
    loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
    metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'}
)
model.summary()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [76]:
# Train the model
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',           # Metric to monitor
    patience=3,                   # Number of epochs with no improvement to wait
    restore_best_weights=True,    # Restore model weights from the epoch with the best value of the monitored quantity
    verbose=1                     # Verbosity mode
)

history = model.fit(
    {'input_ids': X_train_tensor, 'attention_mask': attention_mask_train},
    {'ner_output': y_train_ner_tensor, 'pos_output': y_train_pos_tensor},
    validation_data=(
        {'input_ids': X_val_tensor, 'attention_mask': attention_mask_val},
        {'ner_output': y_val_ner_tensor, 'pos_output': y_val_pos_tensor}
    ),
    epochs=30,
    batch_size=32,
    callbacks=[early_stopping]
)

Epoch 1/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 261ms/step - loss: 0.6302 - ner_output_accuracy: 0.9178 - pos_output_accuracy: 0.9002 - val_loss: 0.6137 - val_ner_output_accuracy: 0.9192 - val_pos_output_accuracy: 0.9008
Epoch 2/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 183ms/step - loss: 0.5973 - ner_output_accuracy: 0.9228 - pos_output_accuracy: 0.9031 - val_loss: 0.5880 - val_ner_output_accuracy: 0.9248 - val_pos_output_accuracy: 0.9025
Epoch 3/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 185ms/step - loss: 0.5693 - ner_output_accuracy: 0.9262 - pos_output_accuracy: 0.9065 - val_loss: 0.5699 - val_ner_output_accuracy: 0.9274 - val_pos_output_accuracy: 0.9048
Epoch 4/30
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 185ms/step - loss: 0.5519 - ner_output_accuracy: 0.9288 - pos_output_accuracy: 0.9075 - val_loss: 0.5574 - val_ner_output_accuracy: 0.9295 - val_pos_output_accuracy: 0.9

In [77]:
# If you have test data, prepare and evaluate similarly
# Example test data preparation
X_test_padded = pad_sequences(X_test, maxlen=max_seq_length)
y_test_ner_padded = pad_sequences(y_test_ner, maxlen=max_seq_length)
y_test_pos_padded = pad_sequences(y_test_pos, maxlen=max_seq_length)

X_test_tensor = tf.convert_to_tensor(X_test_padded)
y_test_ner_tensor = tf.convert_to_tensor(y_test_ner_padded)
y_test_pos_tensor = tf.convert_to_tensor(y_test_pos_padded)

attention_mask_test = np.ones_like(X_test_tensor)

# Evaluate the model
evaluation = model.evaluate(
    {'input_ids': X_test_tensor, 'attention_mask': attention_mask_test},
    {'ner_output': y_test_ner_tensor, 'pos_output': y_test_pos_tensor}
)

print(f"Evaluation results: {evaluation}")




[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 356ms/step - loss: 0.4737 - ner_output_accuracy: 0.9410 - pos_output_accuracy: 0.9155
Evaluation results: [0.47288796305656433, 0.9408223628997803, 0.9157014489173889]


In [78]:

# Make predictions
predictions = model.predict({'input_ids': X_test_tensor, 'attention_mask': attention_mask_test})

# Get the predicted labels
pred_ner = np.argmax(predictions['ner_output'], axis=-1)
pred_pos = np.argmax(predictions['pos_output'], axis=-1)


[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 387ms/step


In [79]:
from sklearn.metrics import confusion_matrix

# Flatten the true and predicted labels
y_true_ner = np.concatenate([y_test_ner.numpy().flatten()])
y_pred_ner = np.concatenate([pred_ner.flatten()])

# Compute confusion matrix
confusion_mat_ner = confusion_matrix(y_true_ner, y_pred_ner)
print("Confusion Matrix (NER):\n", confusion_mat_ner)

# Flatten the true and predicted labels
y_true_pos = np.concatenate([y_test_pos.numpy().flatten()])
y_pred_pos = np.concatenate([pred_pos.flatten()])

# Compute confusion matrix
confusion_mat_pos = confusion_matrix(y_true_pos, y_pred_pos)
print("Confusion Matrix (POS):\n", confusion_mat_pos)



Confusion Matrix (NER):
 [[84028     0     0     0     0     0     0  1107     1     0     0     7
      0     0     0     0     0     1     6     0     0]
 [   18     0     0     0     0     0     1   123     1     0     0     0
      0     0     0     0     0     1     2     0     0]
 [   30     0     2     0     0     0     1   234     3     0     0     2
      0     0     0     0     0     1     2     0     0]
 [    7     0     0     0     0     0     0   111     0     0     0     0
      0     0     0     0     0     1     2     0     0]
 [    4     0     0     0     0     0     0    57     0     0     0     0
      0     0     0     0     0     0     0     0     0]
 [   43     0     0     0     0     0     1   303     0     0     0     3
      0     0     0     0     0     0     0     0     0]
 [   59     0     0     0     0     0     1   330     3     0     0     1
      0     0     0     0     0     2     4     0     0]
 [ 1267     0     2     0     0     4     3  6814    23   

In [80]:
from sklearn.metrics import classification_report

# Compute classification report
report_ner = classification_report(y_true_ner, y_pred_ner)
print("Classification Report (NER):\n", report_ner)


# Compute classification report
report_pos = classification_report(y_true_pos, y_pred_pos)
print("Classification Report (POS):\n", report_pos)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report (NER):
               precision    recall  f1-score   support

           0       0.98      0.99      0.98     85150
           1       0.00      0.00      0.00       146
           2       0.50      0.01      0.01       275
           3       0.00      0.00      0.00       121
           4       0.00      0.00      0.00        61
           5       0.00      0.00      0.00       350
           6       0.11      0.00      0.00       400
           7       0.64      0.84      0.72      8149
           8       0.32      0.02      0.04       895
           9       0.00      0.00      0.00        15
          10       1.00      0.02      0.05        85
          11       0.64      0.32      0.42       192
          12       0.00      0.00      0.00        67
          13       0.00      0.00      0.00        23
          14       0.00      0.00      0.00        63
          15       0.00      0.00      0.00        11
          16       0.00      0.00      0.00        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [81]:
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score

# For NER
f1_ner = f1_score(y_true_ner, y_pred_ner, average='weighted', zero_division=0)
accuracy_ner = accuracy_score(y_true_ner, y_pred_ner)
recall_ner = recall_score(y_true_ner, y_pred_ner, average='weighted', zero_division=0)
precision_ner = precision_score(y_true_ner, y_pred_ner, average='weighted', zero_division=0)

print("NER Metrics:")
print("F1 Score:", f1_ner)
print("Accuracy:", accuracy_ner)
print("Recall:", recall_ner)
print("Precision:", precision_ner)

# For POS
f1_pos = f1_score(y_true_pos, y_pred_pos, average='weighted', zero_division=0)
accuracy_pos = accuracy_score(y_true_pos, y_pred_pos)
recall_pos = recall_score(y_true_pos, y_pred_pos, average='weighted', zero_division=0)
precision_pos = precision_score(y_true_pos, y_pred_pos, average='weighted', zero_division=0)

print("POS Metrics:")
print("F1 Score:", f1_pos)
print("Accuracy:", accuracy_pos)
print("Recall:", recall_pos)
print("Precision:", precision_pos)


NER Metrics:
F1 Score: 0.9291583265097709
Accuracy: 0.9408224051627849
Recall: 0.9408224051627849
Precision: 0.926507689972078
POS Metrics:
F1 Score: 0.9019121154940104
Accuracy: 0.9157014023910975
Recall: 0.9157014023910975
Precision: 0.9051857188454433


In [82]:
# Save the trained model
model.save('file_path/saved_model.keras')

In [87]:
hidden_dim = 256
ner_classes = 21  # Number of NER classes
pos_classes = 15
loaded_model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)
loaded_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
    metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'}
)
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/GTL_dataset/saved_model.keras', custom_objects={'BertMultiTaskModel': BertMultiTaskModel})
loaded_model.summary()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

TypeError: BertMultiTaskModel.__init__() missing 3 required positional arguments: 'hidden_dim', 'ner_classes', and 'pos_classes'

In [86]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import tensorflow as tf
from transformers import BertTokenizer
import numpy as np

app = FastAPI()

# Load the model and tokenizer
model_path = "path/to/your/saved/model"  # Update with the path to your saved model
model = tf.keras.models.load_model(model_path, custom_objects={'BertMultiTaskModel': BertMultiTaskModel})
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Define the input data model
class InferenceRequest(BaseModel):
    sentences: list[str]

# Define health check endpoint
@app.get("/health")
async def health_check():
    return {"status": "healthy"}

# Define prediction endpoint
@app.post("/predict")
async def predict(request: InferenceRequest):
    sentences = request.sentences

    # Tokenize the sentences
    inputs = tokenizer(sentences, padding=True, truncation=True, max_length=92, return_tensors='tf')
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Perform inference
    predictions = model({
        'input_ids': input_ids,
        'attention_mask': attention_mask
    })

    # Extract predictions
    ner_predictions = np.argmax(predictions['ner_output'], axis=-1).tolist()
    pos_predictions = np.argmax(predictions['pos_output'], axis=-1).tolist()

    return {
        "ner_predictions": ner_predictions,
        "pos_predictions": pos_predictions
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)


ModuleNotFoundError: No module named 'fastapi'

In [17]:
import tensorflow as tf
from transformers import TFBertModel, BertTokenizer

class BertMultiTaskModel(tf.keras.Model):
    def __init__(self, hidden_dim, ner_classes, pos_classes):
        super(BertMultiTaskModel, self).__init__()
        self.bert = TFBertModel.from_pretrained('bert-base-multilingual-cased')
        self.hidden_dim = hidden_dim
        self.ner_classes = ner_classes
        self.pos_classes = pos_classes
        self.dense = tf.keras.layers.Dense(hidden_dim, activation='relu')
        self.ner_output = tf.keras.layers.Dense(ner_classes, activation='softmax', name='ner_output')
        self.pos_output = tf.keras.layers.Dense(pos_classes, activation='softmax', name='pos_output')

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        x = self.dense(sequence_output)
        ner_logits = self.ner_output(x)
        pos_logits = self.pos_output(x)
        return {'ner_output': ner_logits, 'pos_output': pos_logits}

    def get_config(self):
        return {
            'hidden_dim': self.hidden_dim,
            'ner_classes': self.ner_classes,
            'pos_classes': self.pos_classes
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)

# Initialize and compile the model
hidden_dim = 256
ner_classes = 21
pos_classes = 15
model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
    metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'}
)

# Save the model
model.save('/content/drive/MyDrive/GTL_dataset/mod_save.weights.h5')

# To load the model, first reinitialize the architecture
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/GTL_dataset/mod_save.weights.h5', custom_objects={'BertMultiTaskModel': BertMultiTaskModel})
loaded_model.summary()
# Now you can use `loaded_model` for inference or further training


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [14]:
from transformers import TFBertModel, BertTokenizer
import tensorflow as tf

# Define the model
class BertMultiTaskModel(tf.keras.Model):
    def __init__(self, hidden_dim, ner_classes, pos_classes):
        super(BertMultiTaskModel, self).__init__()
        self.bert = TFBertModel.from_pretrained('bert-base-multilingual-cased')
        self.hidden_dim = hidden_dim
        self.ner_classes = ner_classes
        self.pos_classes = pos_classes
        self.dense = tf.keras.layers.Dense(hidden_dim, activation='relu')
        self.ner_output = tf.keras.layers.Dense(ner_classes, activation='softmax', name='ner_output')
        self.pos_output = tf.keras.layers.Dense(pos_classes, activation='softmax', name='pos_output')

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        x = self.dense(sequence_output)
        ner_logits = self.ner_output(x)
        pos_logits = self.pos_output(x)
        return {'ner_output': ner_logits, 'pos_output': pos_logits}

    def get_config(self):
        return {
            'hidden_dim': self.hidden_dim,
            'ner_classes': self.ner_classes,
            'pos_classes': self.pos_classes
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)

# Initialize model
hidden_dim = 256
ner_classes = 10  # Example number of NER classes
pos_classes = 20  # Example number of POS classes
input_ids = Input(shape=(92,), dtype=tf.int32, name='input_ids')
attention_mask = Input(shape=(92,), dtype=tf.int32, name='attention_mask')

upld_model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)
upld_model({'input_ids': input_ids, 'attention_mask': attention_mask})  # This will build the model

# Compile the model
upld_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
    metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'}
)

# Load weights (if you have pre-trained weights)
upld_model.load_weights('/content/drive/MyDrive/GTL_dataset/model_save.weights.h5')
upld_model.summary()



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: A total of 2 objects could not be loaded. Example error message for object <Dense name=ner_output, built=True>:

Layer 'ner_output' expected 2 variables, but received 0 variables during loading. Expected: ['kernel', 'bias']

List of objects that could not be loaded:
[<Dense name=ner_output, built=True>, <Dense name=pos_output, built=True>]

In [12]:

hidden_dim = 256
ner_classes = 21
pos_classes = 15

upld_model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)
#input_shape = (92,)  # Assuming your input shape is (batch_size, 92)
#model.build(input_shape=(None, input_shape[0]))

# Compile the model (if needed)

upld_model.load_weights('/content/drive/MyDrive/GTL_dataset/model_save.weights.h5')
upld_model.summary()


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [36]:
# Example sentence for inference
test_sentence = ["আমি স্কুলে যাচ্ছি।"]  # Replace with your sentence

# Tokenize and preprocess the sentence
inputs = tokenizer(test_sentence, padding=True, truncation=True, max_length=50, return_tensors='tf')
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
input_ids = inputs(shape=(92,), dtype=tf.int32, name='input_ids')
attention_mask = inputs(shape=(92,), dtype=tf.int32, name='attention_mask')

inputs = {
    'input_ids': input_ids,             # Replace with actual input_ids tensor
    'attention_mask': attention_mask    # Replace with actual attention_mask tensor
}

# Call the model with the inputs dictionary
outputs = model(inputs)
outputs


TypeError: 'BatchEncoding' object is not callable

In [34]:
# Perform inference
ner_prediction, pos_prediction = upld_model.predict([input_ids, attention_mask])

# Convert predictions to labels (assuming softmax output)
ner_labels = tf.argmax(ner_prediction, axis=-1)
pos_labels = tf.argmax(pos_prediction, axis=-1)

print("NER Prediction:", ner_labels)
print("POS Prediction:", pos_labels)


TypeError: Exception encountered when calling BertMultiTaskModel.call().

[1mtuple indices must be integers or slices, not str[0m

Arguments received by BertMultiTaskModel.call():
  • inputs=('tf.Tensor(shape=(1, 12), dtype=int32)', 'tf.Tensor(shape=(1, 12), dtype=int32)')

In [17]:
model = tf.keras.models.load_model('/content/drive/MyDrive/GTL_dataset/simple_model.h5', custom_objects={'BertMultiTaskModel': BertMultiTaskModel})


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Layer count mismatch when loading weights from file. Model expected 0 layers, found 3 saved layers.

In [15]:
from tensorflow.keras.utils import CustomObjectScope
from tensorflow.keras.models import load_model

model = BertMultiTaskModel(hidden_dim, ner_classes, pos_classes)

# Register the custom model class
with CustomObjectScope({'BertMultiTaskModel': BertMultiTaskModel}):
    loaded_model = load_model('/content/drive/MyDrive/GTL_dataset/simple_model.h5')


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Layer count mismatch when loading weights from file. Model expected 0 layers, found 3 saved layers.

In [None]:
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import custom_object_scope
from tensorflow.keras.models import load_model

class SimpleMultiTaskModel(Model):
    def __init__(self, hidden_dim, ner_classes, pos_classes, **kwargs):
        super(SimpleMultiTaskModel, self).__init__(**kwargs)
        self.hidden_dim = hidden_dim
        self.ner_classes = ner_classes
        self.pos_classes = pos_classes
        # Define layers
        self.dense_ner = Dense(ner_classes, activation='softmax', name='ner_output')
        self.dense_pos = Dense(pos_classes, activation='softmax', name='pos_output')

    def call(self, inputs):
        x = inputs
        ner_output = self.dense_ner(x)
        pos_output = self.dense_pos(x)
        return {'ner_output': ner_output, 'pos_output': pos_output}

    def get_config(self):
        config = super(SimpleMultiTaskModel, self).get_config()
        config.update({
            'hidden_dim': self.hidden_dim,
            'ner_classes': self.ner_classes,
            'pos_classes': self.pos_classes,
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

# Define input shape and create a model instance
input_shape = (128,)  # Adjust based on your input shape
inputs = Input(shape=input_shape)
model = SimpleMultiTaskModel(hidden_dim=256, ner_classes=10, pos_classes=20)
outputs = model(inputs)
model = Model(inputs, outputs)

# Compile the model
model.compile(optimizer='adam',
              loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
              metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'})

# Build the model explicitly by passing a dummy input
model.build((None, *input_shape))

# Save the model
model.save('/content/drive/MyDrive/GTL_dataset/simple_model.h5')

# Load the model with custom objects
with custom_object_scope({'SimpleMultiTaskModel': SimpleMultiTaskModel}):
    loaded_model = load_model('/content/drive/MyDrive/GTL_dataset/simple_model.h5')

# Verify the model is loaded correctly
loaded_model.summary()


In [None]:
# Example sentence for inference
test_sentence = ["আমি স্কুলে যাচ্ছি।"]  # Replace with your sentence

# Tokenize and preprocess the sentence
inputs = tokenizer(test_sentence, padding=True, truncation=True, max_length=50, return_tensors='tf')
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']


In [None]:
# Perform inference
ner_prediction, pos_prediction = loaded_model.predict([input_ids, attention_mask])

# Convert predictions to labels (assuming softmax output)
ner_labels = tf.argmax(ner_prediction, axis=-1)
pos_labels = tf.argmax(pos_prediction, axis=-1)

print("NER Prediction:", ner_labels)
print("POS Prediction:", pos_labels)


In [None]:
# Example mapping dictionaries (assuming you have these)
ner_tags = {0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG', 5: 'B-LOC', 6: 'I-LOC'}
pos_label_map = {0: 'NOUN', 1: 'VERB', 2: 'ADJ', 3: 'ADV', 4: 'PRON', 5: 'DET', 6: 'ADP'}

# Convert predictions to human-readable labels
ner_prediction_labels = [ner_label_map[label] for label in ner_labels.numpy()[0]]
pos_prediction_labels = [pos_label_map[label] for label in pos_labels.numpy()[0]]

print("NER Prediction Labels:", ner_prediction_labels)
print("POS Prediction Labels:", pos_prediction_labels)


In [None]:
!pip install transformers tensorflow tensorflow-addons


In [None]:
import tensorflow as tf
from transformers import TFBertModel, BertTokenizer
import tensorflow_addons as tfa
from tensorflow.keras import layers, Model

# Load pre-trained BERT model and tokenizer
bert_model = TFBertModel.from_pretrained('bert-base-multilingual-cased')
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Input layers
input_ids = tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="input_ids")
attention_mask = tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="attention_mask")

# BERT model
bert_output = bert_model(input_ids, attention_mask=attention_mask)
sequence_output = bert_output.last_hidden_state

# NER and POS CRF layers
ner_crf = tfa.layers.CRF(num_classes=ner_classes, name="ner_crf")
pos_crf = tfa.layers.CRF(num_classes=pos_classes, name="pos_crf")

# CRF outputs
ner_output = ner_crf(sequence_output)
pos_output = pos_crf(sequence_output)

# Model definition
model = Model(inputs=[input_ids, attention_mask], outputs=[ner_output, pos_output])

# Compile model with loss and optimizer
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),
              loss={'ner_crf': ner_crf.loss, 'pos_crf': pos_crf.loss},
              metrics={'ner_crf': 'accuracy', 'pos_crf': 'accuracy'})

model.summary()


In [None]:
# Example sentences and tags
#sentences = ["আমি স্কুলে যাই।", "সে একটি বড় বই পড়ছে।"]
#ner_tags = [[0, 1, 2, 3, 0], [0, 1, 2, 3, 4, 0]]
#pos_tags = [[0, 1, 2, 3, 0], [0, 1, 2, 3, 4, 0]]

# Tokenize sentences
inputs = tokenizer(sentences, padding=True, truncation=True, max_length=50, return_tensors='tf')

input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Convert tags to tensors and pad
ner_tags = tf.keras.preprocessing.sequence.pad_sequences(ner_tags, maxlen=50, padding='post')
pos_tags = tf.keras.preprocessing.sequence.pad_sequences(pos_tags, maxlen=50, padding='post')

# Convert to tensors
ner_tags = tf.convert_to_tensor(ner_tags)
pos_tags = tf.convert_to_tensor(pos_tags)


In [None]:
history = model.fit(
    x={'input_ids': input_ids, 'attention_mask': attention_mask},
    y={'ner_crf': ner_tags, 'pos_crf': pos_tags},
    batch_size=32,
    epochs=3,
    validation_split=0.2
)

In [None]:
# Evaluate model on validation set
val_loss, val_ner_acc, val_pos_acc = model.evaluate(
    x={'input_ids': val_input_ids, 'attention_mask': val_attention_mask},
    y={'ner_crf': val_ner_tags, 'pos_crf': val_pos_tags}
)
print(f"Validation NER Accuracy: {val_ner_acc}")
print(f"Validation POS Accuracy: {val_pos_acc}")


In [None]:
!pip install tensorflow-addons==0.17.0
!pip install tensorflow==2.10.0


Collecting tensorflow==2.10.0
  Downloading tensorflow-2.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.1 kB)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow==2.10.0)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting keras<2.11,>=2.10.0 (from tensorflow==2.10.0)
  Downloading keras-2.10.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras-preprocessing>=1.1.1 (from tensorflow==2.10.0)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting protobuf<3.20,>=3.9.2 (from tensorflow==2.10.0)
  Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (787 bytes)
Collecting tensorboard<2.11,>=2.10 (from tensorflow==2.10.0)
  Downloading tensorboard-2.10.1-py3-none-any.whl.metadata (1.9 kB)
Collecting tensorflow-estimator<2.11,>=2.10.0 (from tensorflow==2.10.0)
  Downloading tensorflow_estimator-2.10.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting google-auth-oaut

In [None]:
!pip install transformers scikit-learn


In [None]:
# Define the multi-task model
class MultiTaskModel(tf.keras.Model):
    def __init__(self, ner_classes, pos_classes):
        super(MultiTaskModel, self).__init__()
        self.bert = bert_model
        self.dense = tf.keras.layers.Dense(hidden_dim, activation='relu')
        self.ner_crf = tfa.layers.CRF(ner_classes)
        self.pos_crf = tfa.layers.CRF(pos_classes)

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        features = self.dense(sequence_output)

        ner_output, _ = self.ner_crf(features)
        pos_output, _ = self.pos_crf(features)

        return {'ner_output': ner_output, 'pos_output': pos_output}

    def get_loss(self, ner_labels, pos_labels, ner_pred, pos_pred):
        # Calculate the CRF loss using the built-in CRF loss method
        ner_loss = -self.ner_crf.log_likelihood(ner_labels, ner_pred, sequence_lengths=tf.reduce_sum(ner_labels != 0, axis=1))
        pos_loss = -self.pos_crf.log_likelihood(pos_labels, pos_pred, sequence_lengths=tf.reduce_sum(pos_labels != 0, axis=1))
        return tf.reduce_mean(ner_loss + pos_loss)

# Instantiate the model
model = MultiTaskModel(ner_classes, pos_classes)

# Define a custom loss function that uses model's get_loss method
def custom_loss(y_true, y_pred):
    ner_labels = y_true['ner_output']
    pos_labels = y_true['pos_output']
    ner_pred = y_pred['ner_output']
    pos_pred = y_pred['pos_output']
    return model.get_loss(ner_labels, pos_labels, ner_pred, pos_pred)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss=custom_loss,
              metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'})

In [None]:
# Assuming X_train, X_val, X_test, y_train_ner, y_val_ner, y_test_ner, y_train_pos, y_val_pos, y_test_pos are already defined

# Train the model
history = model.fit(
    {'input_ids': X_train, 'attention_mask': attention_mask_train},
    {'ner_crf': y_train_ner, 'pos_crf': y_train_pos},
    validation_data=(
        {'input_ids': X_val, 'attention_mask': attention_mask_val},
        {'ner_crf': y_val_ner, 'pos_crf': y_val_pos}
    ),
    epochs=5,
    batch_size=32
)

# Evaluate the model
evaluation = model.evaluate(
    {'input_ids': X_test, 'attention_mask': attention_mask_test},
    {'ner_crf': y_test_ner, 'pos_crf': y_test_pos}
)


Epoch 1/5


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_filee5hi8qsu.py", line 15, in tf__call
        (ner_output, _) = ag__.converted_call(ag__.ld(self).ner_crf, (ag__.ld(features),), None, fscope)

    ValueError: Exception encountered when calling layer "multi_task_model_4" "                 f"(type MultiTaskModel).
    
    in user code:
    
        File "<ipython-input-16-0604c038637a>", line 18, in call  *
            ner_output, _ = self.ner_crf(features)
    
        ValueError: too many values to unpack (expected 2)
    
    
    Call arguments received by layer "multi_task_model_4" "                 f"(type MultiTaskModel):
      • inputs={'input_ids': 'tf.Tensor(shape=(None, 92), dtype=int64)', 'attention_mask': 'tf.Tensor(shape=(None, 92), dtype=int64)'}


In [None]:
import tensorflow as tf
from transformers import TFBertModel

# Hyperparameters
hidden_dim = 256
ner_classes = 10  # Number of NER classes
pos_classes = 20  # Number of POS classes
learning_rate = 0.001

# Load pre-trained BERT model
bert_model = TFBertModel.from_pretrained('bert-base-multilingual-cased')

# Define a simpler multi-task model
class SimpleMultiTaskModel(tf.keras.Model):
    def __init__(self, ner_classes, pos_classes):
        super(SimpleMultiTaskModel, self).__init__()
        self.bert = bert_model
        self.dense = tf.keras.layers.Dense(hidden_dim, activation='relu')
        self.ner_output_layer = tf.keras.layers.Dense(ner_classes, activation='softmax')
        self.pos_output_layer = tf.keras.layers.Dense(pos_classes, activation='softmax')

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        features = self.dense(sequence_output)

        ner_output = self.ner_output_layer(features)
        pos_output = self.pos_output_layer(features)

        return {'ner_output': ner_output, 'pos_output': pos_output}

# Instantiate the simpler model
simple_model = SimpleMultiTaskModel(ner_classes, pos_classes)

# Compile the model with simple categorical crossentropy loss
simple_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss={'ner_output': 'sparse_categorical_crossentropy', 'pos_output': 'sparse_categorical_crossentropy'},
              metrics={'ner_output': 'accuracy', 'pos_output': 'accuracy'})

# Train the simpler model
history = simple_model.fit(
    {'input_ids': X_train, 'attention_mask': attention_mask_train},
    {'ner_output': y_train_ner, 'pos_output': y_train_pos},
    validation_data=(
        {'input_ids': X_val, 'attention_mask': attention_mask_val},
        {'ner_output': y_val_ner, 'pos_output': y_val_pos}
    ),
    epochs=5,
    batch_size=32
)

# Evaluate the simpler model
evaluation = simple_model.evaluate(
    {'input_ids': X_test, 'attention_mask': attention_mask_test},
    {'ner_output': y_test_ner, 'pos_output': y_test_pos}
)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/5




ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 998, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1092, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/compile_utils.py", line 605, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/usr/local/lib/python3.10/dist-packages/keras/utils/metrics_utils.py", line 77, in decorated
        update_op = update_state_fn(*args, **kwargs)
    File "/usr/local/lib/python3.10/dist-packages/keras/metrics/base_metric.py", line 143, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/usr/local/lib/python3.10/dist-packages/keras/metrics/base_metric.py", line 700, in update_state  **
        matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/usr/local/lib/python3.10/dist-packages/keras/metrics/metrics.py", line 3669, in sparse_categorical_accuracy
        matches = metrics_utils.sparse_categorical_matches(y_true, y_pred)
    File "/usr/local/lib/python3.10/dist-packages/keras/utils/metrics_utils.py", line 970, in sparse_categorical_matches
        matches = tf.cast(tf.equal(y_true, y_pred), backend.floatx())

    ValueError: Dimensions must be equal, but are 128 and 92 for '{{node Equal}} = Equal[T=DT_FLOAT, incompatible_shape_error=true](Cast_3, Cast_4)' with input shapes: [?,128], [?,92].


In [None]:
print(processed_data.shape)
print(processed_data.columns)
processed_data.info()
processed_data.describe()

(7002, 3)
Index(['Sentence', 'POS Tags', 'NER Tags'], dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7002 entries, 0 to 7001
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Sentence  7002 non-null   object
 1   POS Tags  7002 non-null   object
 2   NER Tags  7002 non-null   object
dtypes: object(3)
memory usage: 164.2+ KB


Unnamed: 0,Sentence,POS Tags,NER Tags
count,7002,7002,7002
unique,6991,6809,4985
top,মেষ [২১ মার্চ-২০ এপ্রিল],"[ADJ, NNC, PUNCT, QF, PART, NNP, QF, NNC, QF]","[B-OTH, B-OTH, B-OTH, B-OTH, B-OTH, B-OTH]"
freq,2,20,187


In [None]:
for i in range(3):
    print(f"Sentence: {processed_data['Sentence'][i]}")
    print(f"POS Tags: {processed_data['POS Tags'][i]}")
    print(f"NER Tags: {processed_data['NER Tags'][i]}")
    print()

Sentence: শনিবার (২৭ আগস্ট) রাতে পটুয়াখালী সদর থানার ভারপ্রাপ্ত কর্মকর্তা (ওসি) মো. মনিরুজ্জামান এ তথ্য নিশ্চিত করেছেন।
POS Tags: ['NNP', 'PUNCT', 'NNP', 'NNC', 'NNP', 'NNC', 'NNC', 'ADJ', 'NNC', 'PUNCT', 'NNP', 'NNP', 'DET', 'NNC', 'ADJ', 'VF']
NER Tags: ['B-D&T', 'B-OTH', 'B-D&T', 'B-D&T', 'B-GPE', 'I-GPE', 'I-GPE', 'B-PER', 'I-PER', 'B-OTH', 'B-PER', 'I-PER', 'B-OTH', 'B-OTH', 'B-OTH', 'B-OTH']

Sentence: বায়ুদূষণ ও স্মার্ট ফোন ছেলেমেয়ে উভয়ের প্রজনন ক্ষমতা হ্রাস করে দিচ্ছে।
POS Tags: ['NNC', 'CONJ', 'NNC', 'NNC', 'NNC', 'PRO', 'NNC', 'NNC', 'NNC', 'VNF', 'VF']
NER Tags: ['B-OTH', 'B-OTH', 'B-OTH', 'B-OTH', 'B-PER', 'B-OTH', 'B-OTH', 'B-OTH', 'B-OTH', 'B-OTH', 'B-OTH']

Sentence: ছাত্র রাজনীতির বর্তমান অবস্থার শুরু হয়েছিলো স্বৈরশাসক এরশাদের হাত ধরে।
POS Tags: ['NNC', 'NNC', 'ADJ', 'NNC', 'NNC', 'VF', 'NNC', 'NNP', 'NNC', 'PP']
NER Tags: ['B-OTH', 'B-OTH', 'B-OTH', 'B-OTH', 'B-OTH', 'B-OTH', 'B-PER', 'B-PER', 'B-OTH', 'B-OTH']



In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from transformers import TFBertModel, BertTokenizer
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split


ModuleNotFoundError: No module named 'tensorflow_addons'

In [None]:
!pip install transformers scikit-learn
!pip install tensorflow==2.10.0
!pip install tensorflow-addons==0.17.0


Collecting tensorflow==2.10.0
  Downloading tensorflow-2.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.1 kB)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow==2.10.0)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting keras<2.11,>=2.10.0 (from tensorflow==2.10.0)
  Downloading keras-2.10.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras-preprocessing>=1.1.1 (from tensorflow==2.10.0)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting protobuf<3.20,>=3.9.2 (from tensorflow==2.10.0)
  Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (787 bytes)
Collecting tensorboard<2.11,>=2.10 (from tensorflow==2.10.0)
  Downloading tensorboard-2.10.1-py3-none-any.whl.metadata (1.9 kB)
Collecting tensorflow-estimator<2.11,>=2.10.0 (from tensorflow==2.10.0)
  Downloading tensorflow_estimator-2.10.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting google-auth-oaut

Collecting tensorflow-addons==0.17.0
  Downloading tensorflow_addons-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Downloading tensorflow_addons-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.17.0
^C


In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from transformers import TFBertModel, BertTokenizer
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split


In [None]:
# Define hyperparameters
#vocab_size = 10000  # Example vocabulary size
#embedding_dim = 128
#hidden_dim = 256
#ner_classes = 10  # Number of NER classes
#pos_classes = 20  # Number of POS classes
#sequence_length = 50  # Maximum sequence length

# Load BERT model and tokenizer
bert_model_name = 'bert-base-uncased'
bert_model = TFBertModel.from_pretrained(bert_model_name)
tokenizer = BertTokenizer.from_pretrained(bert_model_name)

# Define the model
class MultiTaskModel(tf.keras.Model):
    def __init__(self, ner_classes, pos_classes):
        super(MultiTaskModel, self).__init__()
        self.bert = bert_model
        self.dense = tf.keras.layers.Dense( 256, activation='relu')
        self.ner_crf = tfa.layers.CRF(ner_classes)
        self.pos_crf = tfa.layers.CRF(pos_classes)

    def call(self, inputs, attention_mask):
        bert_output = self.bert(inputs, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        features = self.dense(sequence_output)
        ner_output = self.ner_crf(features)
        pos_output = self.pos_crf(features)
        return ner_output, pos_output

# Instantiate the model
model = MultiTaskModel(ner_classes, pos_classes)
model.compile(optimizer=Adam(learning_rate=2e-5),
              loss={'crf': lambda y_true, y_pred: ner_crf.compute_loss(y_true, y_pred),
                    'crf': lambda y_true, y_pred: pos_crf.compute_loss(y_true, y_pred)},
              metrics={'crf': lambda y_true, y_pred: ner_crf.compute_accuracy(y_true, y_pred)})


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [None]:
from sklearn.model_selection import train_test_split

X_texts = sentences
y_ner = ner_tags
y_pos = pos_tags

# Tokenize and prepare input data
inputs = tokenizer(X_texts, padding=True, truncation=True, max_length=50, return_tensors='tf')
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Split data into train, validation, and test sets
X_train, X_temp, y_train_ner, y_temp_ner = train_test_split(input_ids, y_ner, test_size=0.2, random_state=42)
X_val, X_test, y_val_ner, y_test_ner = train_test_split(X_temp, y_temp_ner, test_size=0.2, random_state=42)

# Note: For simplicity, assume POS labels are split similarly
y_train_pos, y_val_pos, y_test_pos = train_test_split(y_pos, test_size=0.2, random_state=42)
y_val_pos, y_test_pos = train_test_split(y_val_pos, test_size=0.2, random_state=42)


ValueError: too many values to unpack (expected 2)

In [None]:
import tensorflow as tf
import numpy as np
from transformers import TFBertModel, BertTokenizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define hyperparameters
vocab_size = 10000  # Example vocabulary size (not directly used here since BERT handles its own vocabulary)
embedding_dim = 128  # Dimension of embedding vectors (not used here as BERT embeddings are used)
hidden_dim = 256  # Dimension of hidden layer
ner_classes = 10  # Number of NER classes
pos_classes = 20  # Number of POS classes
sequence_length = 50  # Maximum sequence length
batch_size = 8
epochs = 3
learning_rate = 2e-5

# Load BERT model and tokenizer
bert_model_name = 'bert-base-uncased'
bert_model = TFBertModel.from_pretrained(bert_model_name)
tokenizer = BertTokenizer.from_pretrained(bert_model_name)

# data preparation
X_texts = [sentence[0] for sentence in sentences]
y_ner = y_ner.numpy() if isinstance(y_ner, tf.Tensor) else y_ner
y_pos = y_pos.numpy() if isinstance(y_pos, tf.Tensor) else y_pos

# Tokenize and prepare input data
inputs = tokenizer(X_texts, padding=True, truncation=True, max_length=sequence_length, return_tensors='tf')

# Extract the input tensors from the dictionary
input_ids = inputs['input_ids'].numpy()
attention_mask = inputs['attention_mask'].numpy()

# Split data into train, validation, and test sets
X_train, X_temp, y_train_ner, y_temp_ner = train_test_split(input_ids, y_ner, test_size=0.4, random_state=42)
X_val, X_test, y_val_ner, y_test_ner = train_test_split(X_temp, y_temp_ner, test_size=0.5, random_state=42)

# Split POS labels similarly
#y_train_pos, y_val_pos, y_test_pos = train_test_split(y_pos, test_size=0.4, random_state=42)
#y_val_pos, y_test_pos = train_test_split(y_val_pos, test_size=0.5, random_state=42)

# First, split into train and temp (which will later be split into val and test)
y_train_pos, y_temp_pos = train_test_split(y_pos, test_size=0.4, random_state=42)

# Then split the temp set into validation and test sets
y_val_pos, y_test_pos = train_test_split(y_temp_pos, test_size=0.5, random_state=42)

# Define the model
class MultiTaskModel(tf.keras.Model):
    def __init__(self, ner_classes, pos_classes):
        super(MultiTaskModel, self).__init__()
        self.bert = bert_model
        self.dense = Dense(hidden_dim, activation='relu')
        self.ner_crf = tfa.layers.CRF(ner_classes)
        self.pos_crf = tfa.layers.CRF(pos_classes)

    def call(self, inputs, attention_mask):
        bert_output = self.bert(inputs, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        features = self.dense(sequence_output)
        ner_output = self.ner_crf(features)
        pos_output = self.pos_crf(features)
        return ner_output, pos_output

def crf_loss(y_true, y_pred):
    log_likelihood, transition_params = tfa.text.crf.crf_log_likelihood(
        y_pred, y_true, tf.math.count_nonzero(y_pred, axis=1)
    )
    return -tf.reduce_mean(log_likelihood)
model = MultiTaskModel(ner_classes, pos_classes)
model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss={'crf_output_1': crf_loss, 'crf_output_2': crf_loss},
              metrics={'crf_output_1': 'accuracy', 'crf_output_2': 'accuracy'})

#model.summary()

# Train the model
history = model.fit(
    {'input_ids': X_train, 'attention_mask': attention_mask[:len(X_train)]},
    {'ner_crf': y_train_ner, 'pos_crf': y_train_pos},
    validation_data=(
        {'input_ids': X_val, 'attention_mask': attention_mask[len(X_train):len(X_train) + len(X_val)]},
        {'ner_crf': y_val_ner, 'pos_crf': y_val_pos}),
    epochs=epochs,
    batch_size=batch_size
)

# Evaluate the model
eval_results = model.evaluate(
    {'input_ids': X_test, 'attention_mask': attention_mask[len(X_train) + len(X_val):]},
    {'ner_crf': y_test_ner, 'pos_crf': y_test_pos}
)

print(f"Test Loss (NER): {eval_results[1]}")
print(f"Test Accuracy (NER): {eval_results[3]}")
print(f"Test Loss (POS): {eval_results[2]}")
print(f"Test Accuracy (POS): {eval_results[4]}")

# Generate predictions
ner_predictions, pos_predictions = model.predict({'input_ids': X_test, 'attention_mask': attention_mask[len(X_train) + len(X_val):]})

# Convert logits to predicted labels
ner_pred_labels = np.argmax(ner_predictions, axis=-1)
pos_pred_labels = np.argmax(pos_predictions, axis=-1)

# Flatten the labels for confusion matrix
def flatten_labels(labels):
    return [label for sublist in labels for label in sublist]

y_test_flat_ner = flatten_labels(y_test_ner)
y_pred_flat_ner = flatten_labels(ner_pred_labels)

# Compute confusion matrix for NER
conf_matrix_ner = confusion_matrix(y_test_flat_ner, y_pred_flat_ner)

# Plot confusion matrix for NER
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix_ner, annot=True, fmt='d', cmap='Blues', xticklabels=range(ner_classes), yticklabels=range(ner_classes))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix for NER')
plt.show()

# Generate classification report for NER
report_ner = classification_report(y_test_flat_ner, y_pred_flat_ner, labels=range(ner_classes))
print("NER Classification Report:")
print(report_ner)

# Similarly, compute and plot confusion matrix and classification report for POS
y_test_flat_pos = flatten_labels(y_test_pos)
y_pred_flat_pos = flatten_labels(pos_pred_labels)

conf_matrix_pos = confusion_matrix(y_test_flat_pos, y_pred_flat_pos)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix_pos, annot=True, fmt='d', cmap='Blues', xticklabels=range(pos_classes), yticklabels=range(pos_classes))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix for POS')
plt.show()

report_pos = classification_report(y_test_flat_pos, y_pred_flat_pos, labels=range(pos_classes))
print("POS Classification Report:")
print(report_pos)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Models passed to `fit` can only have `training` and the first argument in `call()` as positional arguments, found: ['attention_mask'].

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
from transformers import TFBertModel, BertTokenizer

# Hyperparameters
vocab_size = 10000  # Example vocabulary size
embedding_dim = 128
hidden_dim = 256
ner_classes = 10  # Number of NER classes
pos_classes = 20  # Number of POS classes
sequence_length = 50  # Maximum sequence length
learning_rate = 0.001

# Load pre-trained BERT model
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

# Define the multi-task model
class MultiTaskModel(tf.keras.Model):
    def __init__(self, ner_classes, pos_classes):
        super(MultiTaskModel, self).__init__()
        self.bert = bert_model
        self.dense = tf.keras.layers.Dense(hidden_dim, activation='relu')
        self.ner_crf = tfa.layers.CRF(ner_classes)
        self.pos_crf = tfa.layers.CRF(pos_classes)

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state
        features = self.dense(sequence_output)
        ner_output = self.ner_crf(features)
        pos_output = self.pos_crf(features)
        return {'ner_crf': ner_output, 'pos_crf': pos_output}

# Instantiate the model
model = MultiTaskModel(ner_classes, pos_classes)

# Define loss functions for NER and POS using the CRF built-in loss
losses = {
    'ner_crf': model.ner_crf.get_loss,
    'pos_crf': model.pos_crf.get_loss
}

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss=losses,
              metrics={'ner_crf': 'accuracy', 'pos_crf': 'accuracy'})

# Example input data (replace with actual data)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
X_texts = ["Sentence one.", "Sentence two.", "Another sentence."]
inputs = tokenizer(X_texts, padding=True, truncation=True, max_length=sequence_length, return_tensors='tf')

# Example labels (replace with actual labels)
y_ner = tf.random.uniform((len(X_texts), sequence_length), maxval=ner_classes, dtype=tf.int32)
y_pos = tf.random.uniform((len(X_texts), sequence_length), maxval=pos_classes, dtype=tf.int32)

# Train the model (replace with actual training data and labels)
model.fit({'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask']},
          {'ner_crf': y_ner, 'pos_crf': y_pos},
          epochs=5, batch_size=32)

# Evaluate the model (replace with actual test data and labels)
model.evaluate({'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask']},
               {'ner_crf': y_ner, 'pos_crf': y_pos})
