In [1]:
import os

import hydra
from omegaconf import OmegaConf
import torch
import pytorch_lightning as pl
from pytorch_lightning.plugins import DDPPlugin
from transformers import BertJapaneseTokenizer

2021-10-20 21:40:25.410168: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-20 21:40:25.410196: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# HierBERT

In [2]:
abs_data_path = os.path.abspath("data/nested_sample/")

with hydra.initialize(config_path='config'):
    cfg = hydra.compose(
        config_name="defaults.yaml",
        overrides=[
            "experiment=predict",
            "name=ave_pooled_base-v2",
            "model=HierBERT",
            "data=wereWolf_sample",
            f"data.dir={abs_data_path}/",
            "trainer.gpus=[6]",
            "model.tokenizer.pretrained_model=cl-tohoku/bert-base-japanese-v2",
            "model.sent_level_BERT_config.hidden_size=768",
            "model.sent_level_BERT_config.num_hidden_layers=12",
            "model.sent_level_BERT_config.num_attention_heads=12",
        ]
    )

print(OmegaConf.to_yaml(cfg, resolve=True))

data_module = hydra.utils.instantiate(
    cfg.model.data_module,
    data_dir=cfg.data.dir,
    tokenizer=cfg.model.tokenizer,
    _recursive_=False,
)

model = hydra.utils.instantiate(
        cfg.model.model,
        pretrained_model=cfg.model.tokenizer.pretrained_model,
        sent_level_BERT_config=cfg.model.sent_level_BERT_config,
        optim=cfg.optim,
        _recursive_=False,
)

#tb_logger = pl.loggers.TensorBoardLogger(".", "", "", log_graph=True, default_hp_metric=False)

trainer = pl.Trainer(
    **OmegaConf.to_container(cfg.trainer),
#    callbacks=[tb_logger],
    plugins=DDPPlugin(),
)



model:
  name: HierBERT
  model:
    num_labels: 2
    _target_: src.model.HierBERT.HierchicalBERT
    use_ave_pooled_output: true
    output_attentions: true
    is_japanese: true
  tokenizer:
    _target_: src.tokenizer.tokenizer_HierBERT.HierBertTokenizer
    sent_length: 256
    doc_length: 256
    pretrained_model: cl-tohoku/bert-base-japanese-v2
  data_module:
    _target_: src.model.HierBERTDataModule.CreateHierBertDataModule
    batch_size: 64
  sent_level_BERT_config:
    _target_: transformers.BertConfig
    hidden_size: 768
    num_hidden_layers: 12
    num_attention_heads: 12
data:
  name: wereWolf_sample
  dir: /disk/ssd14tb/haoki/Documents/vscode-workplaces/lie_detector/data/nested_sample/
optim:
  name: AdamW
  optimizer:
    _target_: torch.optim.AdamW
    lr: 0.001
    weight_decay: 0.01
experiment: predict
name: ave_pooled_base-v2
message: null
trainer:
  accumulate_grad_batches: 1
  benchmark: true
  deterministic: true
  fast_dev_run: false
  gpus:
  - 6
  max_epoch

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-v2 were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification

In [3]:
best_epoch = 2
ckpt_path = f'outputs/{cfg.data.name}/{cfg.model.name}/baseline/{cfg.name}/checkpoints/epoch={best_epoch}.ckpt'
print(ckpt_path)
outputs = trainer.predict(model=model, datamodule=data_module, ckpt_path=ckpt_path)

initializing ddp: GLOBAL_RANK: 0, MEMBER: 1/1
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All DDP processes registered. Starting ddp with 1 processes
----------------------------------------------------------------------------------------------------



outputs/wereWolf_sample/HierBERT/baseline/ave_pooled_base-v2/checkpoints/epoch=2.ckpt


A100-PCIE-40GB with CUDA capability sm_80 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the A100-PCIE-40GB GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


Predicting: 0it [00:00, ?it/s]

In [4]:
logits = torch.cat([p['logits'] for p in outputs], dim=0)
word_attentions = torch.cat([torch.stack(p['word_attentions']).permute(1, 0, 2) for p in outputs])
sent_attentions = torch.cat([p['sent_attentions'] for p in outputs])
input_ids = torch.cat([p['input_ids'] for p in outputs])
labels = torch.cat([p['labels'] for p in outputs])

tokenizer = BertJapaneseTokenizer.from_pretrained(f'{cfg.model.tokenizer.pretrained_model}')



## Word attention

In [7]:
from src.visualization.plot_attention import plot_word_attentions

ploted_doc = []
for _input_ids, _word_attentions in zip(input_ids, word_attentions):
    tokens = [tokenizer.convert_ids_to_tokens(ids) for ids in _input_ids]
    ploted_doc.append(plot_word_attentions(doc=tokens, weights_list=_word_attentions, threshold=0.01, size=3))

ヒートマップを作成中...: 256it [00:00, 1632.30it/s]
ヒートマップを作成中...: 256it [00:00, 989.50it/s]
ヒートマップを作成中...: 256it [00:00, 2373.89it/s]
ヒートマップを作成中...: 256it [00:00, 1664.56it/s]
ヒートマップを作成中...: 256it [00:00, 991.89it/s]
ヒートマップを作成中...: 256it [00:00, 1004.11it/s]


In [None]:
from IPython.display import HTML, display
display(HTML(ploted_doc[5]))

## Sentence attention

In [9]:
from src.visualization.plot_attention import plot_sent_attention

ploted_doc = []
for _input_ids, _sent_attentions in zip(input_ids, sent_attentions):
    tokens = [tokenizer.convert_ids_to_tokens(ids) for ids in _input_ids]
    ploted_doc.append(plot_sent_attention(doc=tokens, weights_list=_sent_attentions, threshold=0.01, size=3))

TypeError: plot_sent_attention() got an unexpected keyword argument 'weights_list'