In [4]:
import torch
from transformers import LayoutLMForTokenClassification, TrainingArguments, Trainer


  from .autonotebook import tqdm as notebook_tqdm





In [5]:
from datasets import load_from_disk

train_dataset = load_from_disk('dataset/preprocessed_data/train')
test_dataset = load_from_disk('dataset/preprocessed_data/test')


In [1]:
import torch

print("CUDA Available: ", torch.cuda.is_available())
print("Number of GPUs: ", torch.cuda.device_count())
if torch.cuda.is_available():
    print("CUDA Device Name: ", torch.cuda.get_device_name(torch.cuda.current_device()))


CUDA Available:  True
Number of GPUs:  1
CUDA Device Name:  NVIDIA GeForce GTX 1650 with Max-Q Design


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
labels = ["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]


model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=len(labels))
model.to(device)

Some weights of LayoutLMForTokenClassification were not initialized from the model checkpoint at microsoft/layoutlm-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


LayoutLMForTokenClassification(
  (layoutlm): LayoutLMModel(
    (embeddings): LayoutLMEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (x_position_embeddings): Embedding(1024, 768)
      (y_position_embeddings): Embedding(1024, 768)
      (h_position_embeddings): Embedding(1024, 768)
      (w_position_embeddings): Embedding(1024, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): LayoutLMEncoder(
      (layer): ModuleList(
        (0-11): 12 x LayoutLMLayer(
          (attention): LayoutLMAttention(
            (self): LayoutLMSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
 

In [9]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)



In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)


In [11]:
trainer.train()


  9%|▉         | 10/114 [00:32<06:56,  4.01s/it]

{'loss': 1.3372, 'grad_norm': 7.078808784484863, 'learning_rate': 1.824561403508772e-05, 'epoch': 0.26}


 18%|█▊        | 20/114 [01:10<05:43,  3.66s/it]

{'loss': 0.2312, 'grad_norm': 0.8434969782829285, 'learning_rate': 1.649122807017544e-05, 'epoch': 0.53}


 26%|██▋       | 30/114 [01:46<04:55,  3.51s/it]

{'loss': 0.0156, 'grad_norm': 0.14894023537635803, 'learning_rate': 1.4736842105263159e-05, 'epoch': 0.79}


                                                
 33%|███▎      | 38/114 [02:19<03:22,  2.67s/it]

{'eval_loss': 0.002503749681636691, 'eval_runtime': 8.3256, 'eval_samples_per_second': 6.006, 'eval_steps_per_second': 1.561, 'epoch': 1.0}


 35%|███▌      | 40/114 [02:27<06:15,  5.08s/it]

{'loss': 0.0046, 'grad_norm': 0.06458652764558792, 'learning_rate': 1.2982456140350879e-05, 'epoch': 1.05}


 44%|████▍     | 50/114 [03:09<04:02,  3.78s/it]

{'loss': 0.0028, 'grad_norm': 0.043660860508680344, 'learning_rate': 1.1228070175438597e-05, 'epoch': 1.32}


 53%|█████▎    | 60/114 [03:52<04:19,  4.81s/it]

{'loss': 0.0021, 'grad_norm': 0.03512336686253548, 'learning_rate': 9.473684210526315e-06, 'epoch': 1.58}


 61%|██████▏   | 70/114 [04:46<03:47,  5.17s/it]

{'loss': 0.0018, 'grad_norm': 0.03204615041613579, 'learning_rate': 7.719298245614036e-06, 'epoch': 1.84}


                                                
 67%|██████▋   | 76/114 [05:20<02:18,  3.65s/it]

{'eval_loss': 0.0010749995708465576, 'eval_runtime': 8.7773, 'eval_samples_per_second': 5.697, 'eval_steps_per_second': 1.481, 'epoch': 2.0}


 70%|███████   | 80/114 [05:33<02:21,  4.17s/it]

{'loss': 0.0016, 'grad_norm': 0.029229240491986275, 'learning_rate': 5.964912280701755e-06, 'epoch': 2.11}


 79%|███████▉  | 90/114 [06:09<01:24,  3.53s/it]

{'loss': 0.0015, 'grad_norm': 0.02653154917061329, 'learning_rate': 4.210526315789474e-06, 'epoch': 2.37}


 88%|████████▊ | 100/114 [06:51<00:59,  4.24s/it]

{'loss': 0.0015, 'grad_norm': 0.026035649701952934, 'learning_rate': 2.456140350877193e-06, 'epoch': 2.63}


 96%|█████████▋| 110/114 [07:35<00:18,  4.67s/it]

{'loss': 0.0014, 'grad_norm': 0.028959989547729492, 'learning_rate': 7.017543859649123e-07, 'epoch': 2.89}


                                                 
100%|██████████| 114/114 [08:03<00:00,  4.24s/it]

{'eval_loss': 0.0009335149079561234, 'eval_runtime': 8.9011, 'eval_samples_per_second': 5.617, 'eval_steps_per_second': 1.46, 'epoch': 3.0}
{'train_runtime': 483.8902, 'train_samples_per_second': 0.924, 'train_steps_per_second': 0.236, 'train_loss': 0.1405073140927574, 'epoch': 3.0}





TrainOutput(global_step=114, training_loss=0.1405073140927574, metrics={'train_runtime': 483.8902, 'train_samples_per_second': 0.924, 'train_steps_per_second': 0.236, 'total_flos': 117615921638400.0, 'train_loss': 0.1405073140927574, 'epoch': 3.0})

In [12]:
metrics = trainer.evaluate()
print(metrics)


100%|██████████| 13/13 [00:08<00:00,  1.52it/s]

{'eval_loss': 0.0009335149079561234, 'eval_runtime': 8.5938, 'eval_samples_per_second': 5.818, 'eval_steps_per_second': 1.513, 'epoch': 3.0}





In [13]:
model.save_pretrained("./fine-tuned-layoutlm")
