In [1]:
import torch

# Confirm that the GPU is detected

assert torch.cuda.is_available()

# Get the GPU device name.
device_name = torch.cuda.get_device_name()
n_gpu = torch.cuda.device_count()
print(f"Found device: {device_name}, n_gpu: {n_gpu}")
device = torch.device("cuda")

Found device: Tesla T4, n_gpu: 1


In [2]:
!pip install transformers
!pip install -U -q PyDrive
!pip install datasets
!pip install seqeval

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import os
import itertools
import pandas as pd
import numpy as np
from datasets import Dataset
from datasets import load_metric
from transformers import AutoTokenizer
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("nlpaueb/legal-bert-base-uncased", num_labels=15) # num_labels = 14 + 1


Some weights of the model checkpoint at nlpaueb/legal-bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification wer

In [4]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive/Colab Notebooks/
import json

# from datasets import load_dataset, concatenate_datasets, Features, Value, ClassLabel, Sequence
# dataset = load_dataset('json', data_files='NER_JUDGEMENT_WikiAnn_Format.json')

import pandas as pd
train_df_judgement = pd.read_json('NER_TRAIN_JUDGEMENT_PREPROCESSED.json')
train_df_preamble = pd.read_json('NER_TRAIN_PREAMBLE_PREPROCESSED.json')

test_df_judgement = pd.read_json('NER_DEV_JUDGEMENT_PREPROCESSED.json')
test_df_preamble = pd.read_json('NER_DEV_PREAMBLE_PREPROCESSED.json')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab Notebooks


In [5]:
df = pd.concat([train_df_judgement, train_df_preamble])
test_df = pd.concat([test_df_judgement, test_df_preamble])

In [6]:
train_dataset = Dataset.from_pandas(df)
test_dataset = Dataset.from_pandas(test_df)

In [7]:
labels_list = ["COURT", "PETITIONER", "RESPONDENT", "JUDGE", "LAWYER", "DATE", "ORG", "GPE", "STATUTE", "PROVISION", "PRECEDENT", "CASE_NUMBER", "WITNESS", "OTHER_PERSON", "OTHERS"]
label_encoding_dict = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14}
label_list_encoding_dict = {0: "OTHERS", 1: "PETITIONER", 2: "COURT", 3: "RESPONDENT", 4: "JUDGE", 5: "OTHER_PERSON", 6: "LAWYER", 7: "DATE", 8: "ORG", 9: "GPE", 10: "STATUTE", 11: "PROVISION", 12: "PRECEDENT", 13: "CASE_NUMBER", 14: "WITNESS"}

In [8]:
batch_size = 10

def tokenize_all_labels(rows):
    label_all = True
    tokenized_inputs = tokenizer(list(rows["tokens"]), truncation = True, is_split_into_words = True)
    labels = []
    for index, label in enumerate(rows["ner_tags"]):
        # print(i, label)
        previous_word_idx = None
        word_ids = tokenized_inputs.word_ids(batch_index = index)
        
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None: label_ids.append(-100)
            elif label[word_idx] == '0': label_ids.append(0)
            elif word_idx != previous_word_idx: label_ids.append(label_encoding_dict[label[word_idx]])
            else: label_ids.append(label_encoding_dict[label[word_idx]] if label_all else -100)
            previous_word_idx = word_idx
        labels.append(label_ids)
        
    tokenized_inputs["labels"] = labels
    return tokenized_inputs


train_dataset_tokenized = train_dataset.map(tokenize_all_labels, batched=True)
test_dataset_tokenized = test_dataset.map(tokenize_all_labels, batched=True)

Map:   0%|          | 0/9047 [00:00<?, ? examples/s]

Map:   0%|          | 0/867 [00:00<?, ? examples/s]

In [9]:
import warnings
warnings.filterwarnings('ignore')

tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")
model = AutoModelForTokenClassification.from_pretrained("nlpaueb/legal-bert-base-uncased", num_labels=len(labels_list))
metric = load_metric("seqeval")

training_arguments = TrainingArguments(
    "eval_indian_legal_ner",
    evaluation_strategy = "epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    weight_decay=1e-5,
)

data_collator = DataCollatorForTokenClassification(tokenizer)

def evaluate_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    actual_predictions = [[labels_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    actual_labels = [[labels_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    results = metric.compute(predictions=actual_predictions, references=actual_labels)
    return {"precision": results["overall_precision"], "recall": results["overall_recall"], "f1": results["overall_f1"], "accuracy": results["overall_accuracy"]}
    
trainer = Trainer(
    model,
    training_arguments,
    train_dataset = train_dataset_tokenized,
    eval_dataset = test_dataset_tokenized,
    data_collator = data_collator,
    tokenizer=tokenizer,
    compute_metrics=evaluate_metrics
)
trainer.train()
trainer.evaluate()
trainer.save_model('indian_legal_ner.model')

Some weights of the model checkpoint at nlpaueb/legal-bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initia

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.2753,0.120609,0.802235,0.808559,0.805384,0.963789
2,0.1008,0.106156,0.819414,0.844144,0.831595,0.968203
3,0.0493,0.108245,0.84955,0.871171,0.860225,0.971716
