In [2]:
from dataset_loader import PunctuationDataset
import pandas as pd
import torch
import training_params
from tqdm import tqdm
# from seqeval.metrics import f1_score, accuracy_score
from sklearn import metrics
from transformers import AlbertForTokenClassification, AdamW, get_linear_schedule_with_warmup
import numpy as np

In [5]:
def process_data(data_csv):
    df = pd.read_csv(data_csv)
    sentences = df.groupby("sentence")["word"].apply(list).values
    labels = df.groupby("sentence")["label"].apply(list).values
    tag_values = list(set(df["label"].values))
    tag_values.append("PAD")
    encoder = {t: i for i, t in enumerate(tag_values)}
    return sentences, labels, encoder, tag_values

In [6]:
train_sentences, train_labels, train_encoder, tag_values = process_data(training_params.TRAIN_DATA)
valid_sentences, valid_labels, _, _ = process_data(training_params.VALID_DATA)

train_dataset = PunctuationDataset(texts=train_sentences, labels=train_labels,
                                   tag2idx=train_encoder)
valid_dataset = PunctuationDataset(texts=valid_sentences, labels=valid_labels,
                                   tag2idx=train_encoder)

train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=training_params.BATCH_SIZE, num_workers=4)
valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=training_params.BATCH_SIZE, num_workers=4)

model = AlbertForTokenClassification.from_pretrained('ai4bharat/indic-bert',
                                                     num_labels=len(train_encoder),
                                                     output_attentions=False,
                                                     output_hidden_states=False)

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForTokenClassification: ['predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias']
- This IS expected if you are initializing AlbertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and a

In [7]:
len(train_data_loader)

7607

In [8]:
len(valid_data_loader)

7607

In [20]:
checkpoint = torch.load('checkpoints/2021-04-20_18-49-21/checkpoint_best.pt')

RuntimeError: CUDA out of memory. Tried to allocate 98.00 MiB (GPU 0; 14.73 GiB total capacity; 502.76 MiB already allocated; 50.88 MiB free; 516.00 MiB reserved in total by PyTorch)

In [14]:
model

AlbertForTokenClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(200000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, b

In [15]:
checkpoint.keys()

dict_keys(['epoch', 'state_dict', 'optimizer'])

In [18]:
checkpoint['state_dict']

OrderedDict([('module.albert.embeddings.position_ids',
              tensor([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
                        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
                        28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
                        42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
                        56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
                        70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
                        84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
                        98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
                       112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
                       126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
               

In [19]:
model.load_state_dict(checkpoint['state_dict'])

RuntimeError: Error(s) in loading state_dict for AlbertForTokenClassification:
	Missing key(s) in state_dict: "albert.embeddings.position_ids", "albert.embeddings.word_embeddings.weight", "albert.embeddings.position_embeddings.weight", "albert.embeddings.token_type_embeddings.weight", "albert.embeddings.LayerNorm.weight", "albert.embeddings.LayerNorm.bias", "albert.encoder.embedding_hidden_mapping_in.weight", "albert.encoder.embedding_hidden_mapping_in.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.query.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.query.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.key.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.key.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.value.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.value.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.ffn.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.ffn.bias", "albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.weight", "albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.bias", "classifier.weight", "classifier.bias". 
	Unexpected key(s) in state_dict: "module.albert.embeddings.position_ids", "module.albert.embeddings.word_embeddings.weight", "module.albert.embeddings.position_embeddings.weight", "module.albert.embeddings.token_type_embeddings.weight", "module.albert.embeddings.LayerNorm.weight", "module.albert.embeddings.LayerNorm.bias", "module.albert.encoder.embedding_hidden_mapping_in.weight", "module.albert.encoder.embedding_hidden_mapping_in.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.query.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.query.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.key.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.key.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.value.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.value.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn.bias", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.weight", "module.albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.bias", "module.classifier.weight", "module.classifier.bias". 