In [36]:
import numpy as np
import pandas as pd
import torch 

# NER task

In [37]:
# Check the available device and use GPU if available, otherwise use CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# Print the device being used
print(f'Working on {device}')

Working on cuda


## Data Processing

In [38]:
from datasets import load_dataset

# Load the CoNLL-2003 dataset using the 'datasets' library.
dataset = load_dataset('conll2003')
label_names = dataset['train'].features['ner_tags'].feature.names

dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})

In [39]:
from transformers import AutoTokenizer

# Create a tokenizer instance by loading the pre-trained model.
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [40]:
def align_target(labels, word_ids):
    # Define a mapping from beginning (B-) labels to inside (I-) labels
    begin2inside = {
        1: 2,  # B-LOC -> I-LOC
        3: 4,  # B-MISC -> I-MISC
        5: 6,  # B-ORG -> I-ORG
        7: 8    # B-PER -> I-PER
    }

    # Initialize an empty list to store aligned labels and a variable to track the last word
    align_labels = []
    last_word = None

    # Iterate through the word_ids
    for word in word_ids:
        if word is None:
            label = -100  # Set label to -100 for None word_ids
        elif word != last_word:
            label = labels[word]  # Use the label corresponding to the current word_id
        else:
            label = labels[word]
            # Change B- to I- if the previous word is the same
            if label in begin2inside:
                label = begin2inside[label]  # Map B- to I-

        # Append the label to the align_labels list and update last_word
        align_labels.append(label)
        last_word = word

    return align_labels

In [41]:
def tokenize_fn(batch):
    # Tokenize the input batch
    tokenized_inputs = tokenizer(batch['tokens'], truncation=True, is_split_into_words=True)

    # Extract the labels batch from the input batch
    labels_batch = batch['ner_tags']

    # Initialize a list to store aligned targets for each example in the batch
    aligned_targets_batch = []

    # Iterate through each example and align the labels
    for i, labels in enumerate(labels_batch):
        # Extract the word_ids for the current example
        word_ids = tokenized_inputs.word_ids(i)

        # Use the align_target function to align the labels
        aligned_targets_batch.append(align_target(labels, word_ids))

    # Add the aligned labels to the tokenized inputs under the key "labels"
    tokenized_inputs["labels"] = aligned_targets_batch

    # Return the tokenized inputs, including aligned labels
    return tokenized_inputs

In [42]:
tokenized_dataset = dataset.map(tokenize_fn, batched=True, remove_columns=dataset['train'].column_names)

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

In [43]:
from transformers import DataCollatorForTokenClassification

# Create a DataCollatorForTokenClassification object
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Testing data using the data collator
batch = data_collator([tokenized_dataset['train'][i] for i in range(2)])

# Display the resulting batch
# batch

## Evaluation Function

In [44]:
# Import the seqeval metric from Hugging Face's datasets library
from datasets import load_metric  

# Load the seqeval metric which can evaluate NER and other sequence tasks
metric = load_metric("seqeval")

# Function to compute evaluation metrics from model logits and true labels
def compute_metrics(logits_and_labels):
    
    # Unpack the logits and labels
    logits, labels = logits_and_labels 
    
    # Get predictions from the logits
    predictions = np.argmax(logits, axis=-1)
    
    # Remove ignored index (special tokens)
    str_labels = [
    [label_names[t] for t in label if t!=-100] for label in labels
    ]

    str_preds = [
    [label_names[p] for (p, t) in zip(prediction, label) if t != -100]
    for prediction, label in zip(predictions, labels)
    ]
    
    # Compute metrics
    results = metric.compute(predictions=str_preds, references=str_labels) 
    
    # Extract key metrics
    return {
    "precision": results["overall_precision"],
    "recall": results["overall_recall"], 
    "f1": results["overall_f1"],
    "accuracy": results["overall_accuracy"]  
    }

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


## Load Model

In [45]:
# Create mapping from label ID to label string name
id2label = {k: v for k, v in enumerate(label_names)} 

# Create reverse mapping from label name to label ID
label2id = {v: k for k, v in enumerate(label_names)}

In [46]:
# Load pretrained token classification model from Transformers 
from transformers import AutoModelForTokenClassification, BertForTokenClassification

# Initialize model object with pretrained weights
# model = AutoModelForTokenClassification.from_pretrained(
#   checkpoint,

#   # Pass in label mappings
#   id2label=id2label,  
#   label2id=label2id
# )
model = BertForTokenClassification.from_pretrained('bert-base-uncased', 
                                                   num_labels=len(id2label),
                                                   id2label=id2label,
                                                   label2id=label2id)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Freeze Layer

In [47]:
import os
import torch
from tqdm import tqdm

In [48]:
def freeze_layers(model, num_layers_to_freeze):
    """
    Freeze the first 'num_layers_to_freeze' layers of a model.

    Args:
    model (torch.nn.Module): The model whose layers are to be frozen.
    num_layers_to_freeze (int): The number of layers to freeze.

    Returns:
    None
    """
    # Check for the typical attribute in BERT-like models
    encoder_layers = model.bert.encoder.layer

    # Freeze specified number of layers in the encoder
    layer_count = 0
    for layer in encoder_layers:
        if layer_count < num_layers_to_freeze:
            for param in layer.parameters():
                param.requires_grad = False
        layer_count += 1

        # Break if we have frozen the desired number of layers
        if layer_count >= num_layers_to_freeze:
            break


In [49]:
# Freeze the first 11 layers of the model
freeze_layers(model, 11)

In [50]:
def check_if_layers_are_frozen(model):
    """
    Check if the encoder layers of a model are frozen.

    Args:
    model (torch.nn.Module): The model to check.

    Returns:
    bool: True if all encoder layers are frozen, False otherwise.
    """
    encoder_layers = model.bert.encoder.layer
    all_frozen = True
    layer_count = 0

    for layer in encoder_layers:
        layer_frozen = True
        for param in layer.parameters():
            if param.requires_grad:
                layer_frozen = False
                all_frozen = False
        print(f"Layer {layer_count + 1} is {'frozen' if layer_frozen else 'not frozen'}.")
        layer_count += 1

    if all_frozen:
        print("All layers are frozen.")
    else:
        print("Some layers are not fully frozen.")

    return all_frozen

# Now, check if the layers are frozen
check_if_layers_are_frozen(model)

Layer 1 is frozen.
Layer 2 is frozen.
Layer 3 is frozen.
Layer 4 is frozen.
Layer 5 is frozen.
Layer 6 is frozen.
Layer 7 is frozen.
Layer 8 is frozen.
Layer 9 is frozen.
Layer 10 is frozen.
Layer 11 is frozen.
Layer 12 is not frozen.
Some layers are not fully frozen.


False

## Trainer Parameter

In [51]:
# Configure training arguments using TrainigArguments class
from transformers import TrainingArguments

training_args = TrainingArguments(
  # Location to save fine-tuned model 
  output_dir = "fine_tuned_model",

  # Evaluate each epoch
  evaluation_strategy = "epoch",

  # Learning rate for Adam optimizer
  learning_rate = 5e-5, 
  
  # Batch sizes for training and evaluation
  per_device_train_batch_size = 16,
  per_device_eval_batch_size = 16,
    
  # Number of training epochs
  num_train_epochs = 50,

  # L2 weight decay regularization
  weight_decay = 0.01,
  
  save_strategy="no"
)



## Fine-Tuning

In [52]:
from transformers import TrainerCallback

class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, patience=3):
        self.patience = patience
        self.best_metric = float('-inf')
        self.counter = 0

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        # Extract the evaluation metric from the state
        metric = metrics['eval_loss']  ## edit here
        # Check if the metric has improved
        # If improving, keep training
        if metric < self.best_metric:
            self.best_metric = metric
            self.counter = 0
            trainer.save_model('../NER/one_layer/') # edit here
        else:
            self.counter += 1
            # If no improvement after patience epochs, stop training
            if self.counter >= self.patience:
                control.should_training_stop = True
                print("Training stopped due to lack of improvement.")

In [53]:
from transformers import Trainer

model = BertForTokenClassification.from_pretrained('bert-base-uncased', 
                                                   num_labels=len(id2label),
                                                   id2label=id2label,
                                                   label2id=label2id)

freeze_layers(model, 11) # edit here

trainer = Trainer(
  # Model to train
  model=model, 
  
  # Training arguments
  args=training_args,

  # Training and validation datasets
  train_dataset=tokenized_dataset["train"],
  eval_dataset=tokenized_dataset["validation"],

  # Tokenizer
  tokenizer=tokenizer,

  # Custom metric function
  compute_metrics=compute_metrics,

  # Data collator
  data_collator=data_collator,
    
  callbacks=[EarlyStoppingCallback(patience=3)]
)

trainer.train()

#trainer.save_model('../ner/one_layer/')

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3171,0.093593,0.808401,0.871255,0.838652,0.973613
2,0.0825,0.070523,0.856598,0.907775,0.881445,0.980047
3,0.0498,0.065968,0.881252,0.919219,0.899835,0.982652


Training stopped due to lack of improvement.


TrainOutput(global_step=2634, training_loss=0.12119111384780641, metrics={'train_runtime': 277.1959, 'train_samples_per_second': 2532.685, 'train_steps_per_second': 158.372, 'total_flos': 1020143109346326.0, 'train_loss': 0.12119111384780641, 'epoch': 3.0})

## Gradual Unfreezing

In [None]:
model = BertForTokenClassification.from_pretrained('bert-base-uncased', 
                                                   num_labels=len(id2label),
                                                   id2label=id2label,
                                                   label2id=label2id)
freeze_layers(model, 11)

In [None]:
from transformers import TrainerCallback

class GradualUnfreezingCallback(TrainerCallback):
    def __init__(self, model, patience=2): # edit patience here
        self.model = model
        self.patience = patience
        self.best_metric = float('inf')
        self.counter = 0
        self.num_freeze = 11
                 
    def unfreeze_last_layers(self, model, num_layers_to_unfreeze):
                 
        # Initially freeze all layers
        for param in model.parameters():
            param.requires_grad = False

        # Check for the typical attribute in BERT-like models
        encoder_layers = model.bert.encoder.layer
        total_layers = len(encoder_layers)

        # Unfreeze the specified number of last layers
        layers_to_start_unfreezing = total_layers - num_layers_to_unfreeze

        for i, layer in enumerate(encoder_layers):
            if i >= layers_to_start_unfreezing:
                for param in layer.parameters():
                    param.requires_grad = True
    
    def check_if_layers_are_frozen(self, model):
        encoder_layers = model.bert.encoder.layer
        all_frozen = True
        layer_count = 0

        for layer in encoder_layers:
            layer_frozen = True
            for param in layer.parameters():
                if param.requires_grad:
                    layer_frozen = False
                    all_frozen = False
            print(f"Layer {layer_count + 1} is {'frozen' if layer_frozen else 'not frozen'}.")
            layer_count += 1

        if all_frozen:
            print("All layers are frozen.")
        else:
            print("Some layers are not fully frozen.")

        return all_frozen

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        # Extract the evaluation metric from the state
        metric = metrics['eval_loss']  ## edit here
        # Check if the metric has improved
        # If improving, keep training
        if metric < self.best_metric:
            self.best_metric = metric
            self.counter = 0
        # If not improving, unfreeze a layer or stop early
        else:
            self.counter += 1
            # If no improvement after patience epochs, stop training
            if self.counter > self.patience:
                control.should_training_stop = True
                print("Training stopped due to lack of improvement.")
            else:
                self.num_freeze -= 1
                 
                if self.num_freeze < 0:
                    control.should_training_stop = True
                    print('Stopping triggered. No improvement in validation loss on last layer')
                else:
                    self.unfreeze_last_layers(self.model,12-self.num_freeze)
                    print(f'unfreezing layer {12-self.num_freeze}')


trainer = Trainer(
  # Model to train
  model=model, 
  
  # Training arguments
  args=training_args,

  # Training and validation datasets
  train_dataset=tokenized_dataset["train"],
  eval_dataset=tokenized_dataset["validation"],

  # Tokenizer
  tokenizer=tokenizer,

  # Custom metric function
  compute_metrics=compute_metrics,

  # Data collator
  data_collator=data_collator,
    
  callbacks=[GradualUnfreezingCallback(model=model, patience=2)]
)

In [None]:
trainer.train()

#trainer.save_model('../ner/one_layer/') # edit here