Импортируем общий код

In [1]:
%run NER_common.ipynb

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import pytorch_transformers
import seqeval.metrics
from tqdm import tqdm_notebook

import numpy as np
from itertools import chain, islice
from collections import Counter
from collections import defaultdict
from functools import partial

In [3]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, Dataset, DataLoader
import pytorch_lightning as pl
from test_tube import Experiment
import argparse
import os.path

Для проведения эксперимента используются pytorch-lightning и test_tube. <br>
Опишем параметры эксперимента. Можно изменять их значения перед дальнейшим исполнением. Также они сохранены в директории experiment_dir/experiment_name/версия-эксперимента (test_tube делает ее автоинкремент основываясь на содержимом директории) в файле meta_tags.csv. <br>
Файл metrics.csv хранит логи метрик (каждый 100 итераций при обучении, также логируются результаты валидации).

Суть эксперимента:
Мы будем производить fine-tuning претренированной модели BERT (bert-base-cased) под текущую задачу. Поскольку токенизация у этой модели может расцепить оригинальные токены, в этом случае предсказание для первого субтокена из разбиения считается предсказанием для всего оригинального токена. Также для последующих субтокенов не считается loss.

In [4]:
experiment_config = argparse.Namespace()
experiment_config.basic_model = 'BERT'

experiment_config.replace_urls = True
experiment_config.replace_numbers = False 
experiment_config.split_hashtags = False # оставить False, не реализовано, BertTokenizer сам это сделает
experiment_config.split_nicknames = False # оставить False, не реализовано,  BertTokenizer сам это сделает

experiment_config.val_batch_size = 24
experiment_config.train_batch_size = 16
experiment_config.lr = 5e-5
experiment_config.gradient_acccumulation_steps = 2
experiment_config.gradient_clipping_norm = 5.0
experiment_config.n_epochs = 4
experiment_config.mask_additional_wordpieces = True # оставить True, не реализовано

experiment_dir = 'NER_experiments/'
experiment_config.experiment_name = 'BERT_finetune_mask'

Прочтём файлы и подсчитаем число тегов.

In [5]:
original_inputs, original_targets = read_data('data/data.txt')

In [6]:
unique_tags = count_tags(original_targets)
experiment_config.n_classes = len(unique_tags)

In [7]:
print(experiment_config.n_classes)

21


Препроцессинг токенов при помощи wordpiece токенайзера. Он применяется к каждому оригинальному токену по отдельности. Предполагается, что расщеплений хэштегов и имен не было. В случае если появились спец-токены (&lt;NUM&gt;, &lt;URL&gt;), не будем применять к ним токенайзер, но назначим им свободные слоты в словаре BERT ([unused1], [unused2]), это отображение описывается в словаре bert_specials. 

Функция также возвращает для каждого текста булеву маску, где True значения соответствуют местам, в которых подсчитываются предсказания для оригинальных токенов, как описано выше.

In [8]:
def bert_preprocessing(inputs, targets, bert_tokenizer, bert_specials=None):
    new_inputs = []
    new_targets = []
    masks = []
#     bert_tokenizer = pytorch_transformers.BertTokenizer.from_pretrained('bert-base-uncased')
    for text, tags in tqdm_notebook(zip(inputs, targets), total=len(inputs)):
        new_tokens, new_tags = [],[]
        mask = []
        for token, tag in zip(text, tags):
            if bert_specials and token in bert_specials:
                token_pieces = [bert_specials[token]]
            else:
                token_pieces = bert_tokenizer.tokenize(token)
            new_tokens.extend(token_pieces)
            new_tags.extend(split_tag(tag, len(token_pieces)))
            if experiment_config.mask_additional_wordpieces:
                mask.extend([True] + [False] * (len(token_pieces) - 1))
            else:
                mask.extend([True] * len(token_pieces))
        new_inputs.append(new_tokens)
        new_targets.append(new_tags)
        masks.append(mask)
    return new_inputs, new_targets, masks

Трансформация токенов, тегов и маски для модели Bert в индексы в словарях. Текст дополняется токенами [CLS] и [MASK], соответственно дополняются теги и маска.

In [9]:
def bert_numericalize_targets(targets, target_vocab):
    target_ids = target_vocab.numericalize(targets)
    return [[0] + ids + [0] for ids in target_ids]

def bert_numericalize_mask(masks):
    return [[False] + mask + [False] for mask in masks]

def bert_numericalize_inputs(inputs, bert_tokenizer):
    result = []
    for input_tokens in inputs:
        ids = [bert_tokenizer._convert_token_to_id(tok) for tok in input_tokens]
        ids = bert_tokenizer.add_special_tokens_single_sentence(ids)
        result.append(ids)
    return result

Объединим эти функции.

In [10]:
def bert_numericalize(inputs, targets, masks, bert_tokenizer, target_vocab):
    input_ids = bert_numericalize_inputs(inputs, bert_tokenizer)
    target_ids = bert_numericalize_targets(targets, target_vocab)
    mask_ids = bert_numericalize_mask(masks)
    return input_ids, target_ids, mask_ids

Теперь зададим Dataset и функцию создания мини-батча из тензоров.
<b>Забыл унаследоваться от torch.utils.data.Dataset</b>

In [11]:
class BertDataset:
    def __init__(self, input_ids, target_ids, mask_ids):
        self.inputs = input_ids
        self.targets = target_ids
        self.masks = mask_ids
        assert len(input_ids) == len(target_ids) == len(self.masks)
        
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        return (self.inputs[idx], self.targets[idx], self.masks[idx])
    
    @staticmethod
    def collate(examples):
        inputs, targets, masks = [],[],[]
        for inp, tgt, msk in examples:
            inputs.append(torch.tensor(inp, dtype=torch.long))
            targets.append(torch.tensor(tgt, dtype=torch.long))
            masks.append(torch.tensor(msk, dtype=torch.bool))
            
        input_tensor = torch.nn.utils.rnn.pad_sequence(inputs, batch_first=True)
        target_tensor = torch.nn.utils.rnn.pad_sequence(targets, batch_first=True)
        mask_tensor = torch.nn.utils.rnn.pad_sequence(masks, batch_first=True)
#         print(input_tensor.size(), target_tensor.size(), mask_tensor.size())
        return input_tensor, target_tensor, mask_tensor

In [12]:
bert_tokenizer = pytorch_transformers.BertTokenizer.from_pretrained('bert-base-cased')

The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.


In [13]:
bert_specials = {'<URL>': '[unused1]', '<NUM>': '[unused2]'}

Назначим 

In [14]:
target_vocab = Vocab.from_id2word(unique_tags, unk_index=None, n_specials=0)

In [15]:
inputs, targets, _ = basic_preprocessing(original_inputs, original_targets,
                                      replace_urls=experiment_config.replace_urls,
                                      replace_numbers=experiment_config.replace_numbers,
                                      split_hashtags=experiment_config.split_hashtags,
                                      split_mentions=experiment_config.split_nicknames)

In [16]:
bert_tokenized_inputs, bert_tokenized_targets, bert_masks = bert_preprocessing(inputs, targets, bert_tokenizer, bert_specials)

HBox(children=(IntProgress(value=0, max=7243), HTML(value='')))




<b>Прокомментировать</b>

In [17]:
print(original_inputs[0])

['Man', 'i', 'hate', 'when', 'people', 'carry', 'ragedy', 'luggage', '..', 'ima', 'just', 'rip', 'it', 'up', 'more', 'with', 'the', 'belt', 'loader', '#itaintmines']


In [18]:
print(bert_tokenized_inputs[0])

['Man', 'i', 'hate', 'when', 'people', 'carry', 'rage', '##dy', 'luggage', '.', '.', 'im', '##a', 'just', 'rip', 'it', 'up', 'more', 'with', 'the', 'belt', 'load', '##er', '#', 'it', '##ain', '##t', '##mine', '##s']


In [19]:
print(bert_tokenized_targets[0])

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']


In [20]:
print(bert_masks[0])

[True, True, True, True, True, True, True, False, True, True, False, True, False, True, True, True, True, True, True, True, True, True, False, True, False, False, False, False, False]


In [21]:
split_names = ['train', 'val', 'test']

In [22]:
original_inputs_split, original_targets_split = split_to_dicts([original_inputs, original_targets],
                                                                 (train_indices, val_indices, test_indices), 
                                                                 split_names)

In [23]:
bert_inputs_split, bert_targets_split, bert_masks_split = split_to_dicts([bert_tokenized_inputs, bert_tokenized_targets, bert_masks],
                                                                 (train_indices, val_indices, test_indices), 
                                                                 split_names)

In [24]:
bert_numericalized_inputs, bert_numericalized_targets, bert_numericalized_masks = bert_numericalize(bert_tokenized_inputs, 
                                                                                                    bert_tokenized_targets,
                                                                                                    bert_masks, bert_tokenizer, target_vocab)

In [25]:
print(bert_numericalized_masks[0])

[False, True, True, True, True, True, True, True, False, True, True, False, True, False, True, True, True, True, True, True, True, True, True, False, True, False, False, False, False, False, False]


In [26]:
bert_input_ids_spl, bert_target_ids_spl, bert_mask_ids_spl = split_to_dicts([bert_numericalized_inputs, bert_numericalized_targets, bert_numericalized_masks],
                                                                            (train_indices, val_indices, test_indices), split_names)

In [27]:
train_dataset = BertDataset(*[spl['train'] for spl in (bert_input_ids_spl, bert_target_ids_spl, bert_mask_ids_spl)])

In [28]:
val_dataset = BertDataset(*[spl['val'] for spl in (bert_input_ids_spl, bert_target_ids_spl, bert_mask_ids_spl)])

In [29]:
test_dataset = BertDataset(*[spl['test'] for spl in (bert_input_ids_spl, bert_target_ids_spl, bert_mask_ids_spl)])

<b>Теперь можно построить модель

In [30]:
def masked_crossentropy_loss(logits, targets, masked):
    loss_values = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), reduction='none')
    loss_values[masked.view(-1)] = 0
    return loss_values.mean()

In [31]:
class BERTForNER(nn.Module):
    def __init__(self, exp_config):
        super().__init__()
        self.model = pytorch_transformers.BertForTokenClassification.from_pretrained('bert-base-cased', 
                                                                                     num_labels=exp_config.n_classes)   
    def forward(self, inputs):
        attention_mask = (inputs != 0).type(torch.float32)
        return self.model(inputs, attention_mask=attention_mask)[0]

In [32]:
def bert_compute_lengths_by_padding(inputs):
    return ((inputs != 0).sum(dim=-1) - 2).tolist()

def bert_predict_tags_with_mask(model, inputs, tag_mask, target_vocab):
    model.eval()
    result = []
    with torch.no_grad():
        logits = model(inputs)
        seqs = logits.argmax(dim=-1)
        for i,pred in enumerate(seqs):
            pred = pred[tag_mask[i]].tolist()
            result.append(target_vocab.transform_ids(pred))
    return result

def bert_predict_tags_for_loader(model, loader, target_vocab, use_mask=True, device='cuda'):        
    result = []
    for batch in loader:
        if use_mask:
            inputs,_,mask = [x.to(device) for x in batch]
            result.extend(bert_predict_tags_with_mask(model, inputs, mask, target_vocab))
        else:
            raise ValueError("Not implemented yet")
    return result

In [33]:
class LightningBERTMasking(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.model = BERTForNER(config)
        self.config = config
        self.lr = config.lr
        self.train_batch_size = config.train_batch_size
        self.val_batch_size = config.val_batch_size

    def forward(self, inputs):
        return self.model(inputs)
    
    def compute_loss_on_batch(self, batch):
        inputs, targets, mask = batch
        logits = self(inputs)
        
        loss_mask = ~mask
        loss = masked_crossentropy_loss(logits, targets, loss_mask)
        return loss
    
    def training_step(self, batch, batch_nb):
        # REQUIRED
        loss = self.compute_loss_on_batch(batch)
        return {'loss': loss}

    def validation_step(self, batch, batch_nb):
        # OPTIONAL
        loss = self.compute_loss_on_batch(batch)
        inputs, targets, mask = batch
        predicted_tags = bert_predict_tags_with_mask(self, inputs, mask, target_vocab)
        
        return {'val_loss': loss, 'tags': predicted_tags}

    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.tensor([x['val_loss'] for x in outputs]).mean()
        predictions = list(chain.from_iterable(x['tags'] for x in outputs))
        f1_score = seqeval.metrics.f1_score(original_targets_split['val'], predictions)
        
        print(seqeval.metrics.classification_report(original_targets_split['val'], predictions))
        metrics = {'avg_val_loss': avg_loss.item(), 'f1': f1_score}
        metrics_to_write = dict(metrics, epoch=self.trainer.current_epoch+1)
#         metrics.update(self.trainer.tng_tqdm_dic)
        
#         scalar_metrics = self.trainer.__metrics_to_scalars(
#                     metrics, blacklist=self.trainer.__log_vals_blacklist())
        
        assert self.experiment
        self.experiment.log(metrics_to_write)
        self.experiment.save()
        
        return metrics

    def configure_optimizers(self):
        # REQUIRED
        # can return multiple optimizers and learning_rate schedulers
        return torch.optim.Adam(self.parameters(), lr=self.lr)

    @pl.data_loader
    def tng_dataloader(self):
        # REQUIRED
        assert isinstance(train_dataset, BertDataset)
        return DataLoader(train_dataset, batch_size=self.train_batch_size, shuffle=True, collate_fn=BertDataset.collate)

    @pl.data_loader
    def val_dataloader(self):
        # OPTIONAL
        assert isinstance(val_dataset, BertDataset)
        return DataLoader(val_dataset, batch_size=self.val_batch_size, shuffle=False, collate_fn=BertDataset.collate)

In [34]:
def test_bert_predict():
    bert = BERTForNER(experiment_config)
    dl = DataLoader(train_dataset, batch_size=4, collate_fn=BertDataset.collate)
    for batch in dl:
#         print(batch)
        res = bert_predict_tags_with_mask(bert, batch[0], batch[2], target_vocab)
        print(res)
        break

test_bert_predict()

[['I-facility', 'I-facility', 'I-company', 'I-person', 'I-facility', 'B-other', 'I-company', 'I-facility', 'I-company', 'I-person', 'B-geo-loc', 'B-other', 'B-other', 'B-other', 'B-other'], ['I-other', 'B-product', 'I-facility', 'B-product', 'I-person', 'I-person', 'B-product', 'I-facility', 'B-product'], ['B-musicartist', 'I-geo-loc', 'I-facility', 'I-facility', 'B-geo-loc', 'I-facility', 'I-company', 'I-facility', 'I-facility', 'I-facility', 'I-facility', 'B-geo-loc', 'I-facility', 'I-facility', 'I-facility', 'I-facility', 'I-facility', 'B-product', 'I-movie', 'B-product', 'B-other', 'B-product'], ['B-musicartist', 'B-other', 'I-person', 'I-facility', 'B-product', 'B-product', 'I-facility', 'I-facility', 'I-facility', 'I-product', 'B-geo-loc', 'I-facility', 'I-facility', 'I-facility', 'B-product', 'I-facility', 'I-facility', 'I-facility', 'I-person']]


## Запуск эксперимента


In [35]:
# import gc
# del pl_bert
# gc.collect()
# torch.cuda.empty_cache()

In [36]:
!nvidia-smi

Sun Sep  1 15:03:03 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:08:00.0 Off |                  N/A |
| 52%   65C    P0    68W / 250W |     10MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [37]:
exp = Experiment(save_dir=experiment_dir, name=experiment_config.experiment_name)

In [38]:
print(exp.version)

4


In [39]:
exp.argparse(experiment_config)
exp.save()

In [40]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    filepath= f'{experiment_dir}/{experiment_config.experiment_name}/version_{exp.version}/checkpoint',
    save_best_only=True,
    verbose=True,
    monitor='f1',
    mode='max'
)

early_stop = pl.callbacks.EarlyStopping(
        monitor='f1',
        patience=5,
        verbose=True,
        mode='max'
)

In [41]:
pl_bert = LightningBERTMasking(experiment_config)

In [42]:
print(len(pl_bert.tng_dataloader))

325


In [43]:
print(len(pl_bert.val_dataloader))

36


In [44]:
trainer = pl.Trainer(experiment=exp,
                     max_nb_epochs=experiment_config.n_epochs,
                     gpus=[0],
                     gradient_clip=experiment_config.gradient_clipping_norm,
                     early_stop_callback=early_stop,
                     accumulate_grad_batches=experiment_config.gradient_acccumulation_steps,
                     add_log_row_interval=100,
                     checkpoint_callback=checkpoint_callback)

VISIBLE GPUS: '0'
gpu available: True, used: True


In [None]:
trainer.fit(pl_bert)

  0%|          | 0/5 [00:00<?, ?it/s]

                                            Name                        Type  \
0                                          model                  BERTForNER   
1                                    model.model  BertForTokenClassification   
2                               model.model.bert                   BertModel   
3                    model.model.bert.embeddings              BertEmbeddings   
4    model.model.bert.embeddings.word_embeddings                   Embedding   
..                                           ...                         ...   
215                      model.model.bert.pooler                  BertPooler   
216                model.model.bert.pooler.dense                      Linear   
217           model.model.bert.pooler.activation                        Tanh   
218                          model.model.dropout                     Dropout   
219                       model.model.classifier                      Linear   

        Params  
0    108326421  
1    

  0%|          | 0/361 [00:00<01:29,  4.04it/s]

             precision    recall  f1-score   support

        loc       0.01      0.01      0.01       156
    product       0.00      0.00      0.00        38
    company       0.00      0.00      0.00        84
     person       0.00      0.00      0.00       131
      other       0.02      0.02      0.02       110
musicartist       0.00      0.00      0.00        33
   facility       0.00      0.00      0.00        58
     tvshow       0.00      0.00      0.00        11
 sportsteam       0.00      0.00      0.00        21
      movie       0.00      0.00      0.00        17

  micro avg       0.00      0.01      0.00       659
  macro avg       0.01      0.01      0.01       659



100%|██████████| 361/361 [00:47<00:00, 11.48it/s, avg_val_loss=0.0698, batch_nb=324, epoch=0, f1=0.367, gpu=0, loss=0.091, v_nb=4]

             precision    recall  f1-score   support

        loc       0.35      0.68      0.46       156
    product       0.00      0.00      0.00        38
    company       0.44      0.48      0.46        84
     person       0.59      0.73      0.65       131
      other       0.07      0.06      0.07       110
musicartist       0.00      0.00      0.00        33
   facility       0.03      0.02      0.02        58
     tvshow       0.00      0.00      0.00        11
 sportsteam       0.24      0.19      0.21        21
      movie       0.00      0.00      0.00        17

  micro avg       0.35      0.38      0.37       659
  macro avg       0.28      0.38      0.32       659

save callback...

Epoch 00001: f1 improved from -inf to 0.36720, saving model to NER_experiments//BERT_finetune_mask/version_4/checkpoint/_ckpt_epoch_1.ckpt


100%|██████████| 361/361 [00:47<00:00, 11.58it/s, avg_val_loss=0.0523, batch_nb=324, epoch=1, f1=0.59, gpu=0, loss=0.052, v_nb=4] 

             precision    recall  f1-score   support

        loc       0.70      0.73      0.72       156
    product       0.22      0.32      0.26        38
    company       0.59      0.69      0.64        84
     person       0.66      0.80      0.72       131
      other       0.44      0.49      0.47       110
musicartist       0.50      0.36      0.42        33
   facility       0.48      0.66      0.55        58
     tvshow       0.00      0.00      0.00        11
 sportsteam       0.60      0.71      0.65        21
      movie       0.00      0.00      0.00        17

  micro avg       0.56      0.62      0.59       659
  macro avg       0.55      0.62      0.58       659

save callback...

Epoch 00002: f1 improved from 0.36720 to 0.59045, saving model to NER_experiments//BERT_finetune_mask/version_4/checkpoint/_ckpt_epoch_2.ckpt


100%|██████████| 361/361 [00:47<00:00, 10.92it/s, avg_val_loss=0.0501, batch_nb=324, epoch=2, f1=0.603, gpu=0, loss=0.031, v_nb=4]

             precision    recall  f1-score   support

        loc       0.71      0.74      0.73       156
    product       0.16      0.32      0.21        38
    company       0.73      0.63      0.68        84
     person       0.87      0.68      0.76       131
      other       0.48      0.52      0.50       110
musicartist       0.55      0.52      0.53        33
   facility       0.55      0.72      0.63        58
     tvshow       0.00      0.00      0.00        11
 sportsteam       0.58      0.71      0.64        21
      movie       0.23      0.35      0.28        17

  micro avg       0.59      0.62      0.60       659
  macro avg       0.62      0.62      0.61       659

save callback...

Epoch 00003: f1 improved from 0.59045 to 0.60296, saving model to NER_experiments//BERT_finetune_mask/version_4/checkpoint/_ckpt_epoch_3.ckpt


 48%|████▊     | 173/361 [00:22<00:23,  7.84it/s, avg_val_loss=0.0501, batch_nb=171, epoch=3, f1=0.603, gpu=0, loss=0.019, v_nb=4]

### Inference

In [None]:
test_predictions = bert_predict_tags_for_loader(pl_bert, DataLoader(test_dataset, collate_fn=BertDataset.collate, batch_size=24), target_vocab)

In [None]:
print(test_predictions[2], original_targets_split['test'][2])

In [None]:
print(seqeval.metrics.classification_report(original_targets_split['test'][2:3], test_predictions[2:3]))

In [None]:
def bert_predict_for_tokens(model, tokens, device='cuda'):
    fake_targets = [['O' for _ in text] for text in tokens]
    preproc_tokens, preproc_targets, _ = basic_preprocessing(tokens, fake_targets, 
                                                             replace_urls=experiment_config.replace_urls,
                                                              replace_numbers=experiment_config.replace_numbers,
                                                              split_hashtags=experiment_config.split_hashtags,
                                                              split_mentions=experiment_config.split_nicknames)
    preproc_tokens, preproc_targets, preproc_mask = bert_preprocessing(preproc_tokens, preproc_targets, bert_tokenizer, bert_specials)
    input_ids, target_ids, mask_ids = bert_numericalize(preproc_tokens, preproc_targets, preproc_mask, bert_tokenizer, target_vocab)
    model.eval()
    batch = BertDataset.collate(zip(input_ids, target_ids, mask_ids))
    inputs,_,mask = batch
    with torch.no_grad():
        result = bert_predict_tags_with_mask(model, inputs.to(device), mask.to(device), target_vocab)
    return result


In [None]:
!pip3 install nltk

In [None]:
text_example = "Satellite imagery this morning of now Category 5 Hurricane Dorian approaching the Abaco Islands in the northern Bahamas. For the latest on Dorian visit http://hurricanes.gov"

In [None]:
import nltk

In [None]:
print(text_example)

In [None]:
tokens_example = nltk.tokenize.TweetTokenizer().tokenize(text_example)
print(tokens_example)

In [None]:
prediction = bert_predict_for_tokens(pl_bert, [tokens_example])[0]

In [None]:
for token, tag in zip(tokens_example, prediction):
    print(token, tag)

In [None]:
from pytorch_lightning.root_module.model_saving import load_hparams_from_tags_csv

In [None]:
hparams = load_hparams_from_tags_csv('NER_experiments/BERT_finetune_mask/version_2/meta_tags.csv')

In [None]:
print(hparams)

In [None]:
print(experiment_config)