In [1]:
import torch
import json
from os import walk
from os import makedirs
from tqdm import tqdm
from argparse import ArgumentParser
from transformers import BertJapaneseTokenizer
from torch.nn import Softmax
from torch import nn
import torch.nn.functional as F

In [10]:
class BertClassifier(nn.Module):
    def __init__(self):
        super(BertClassifier, self).__init__()
        self.bert = torch.load(f'../savepoint/bert-fm/bert-fm-42.pt')
        self.linear = nn.Linear(768, 2)
        nn.init.normal_(self.linear.weight, std=0.02)
        nn.init.normal_(self.linear.bias, 0)

    def forward(self, input_ids):
        out = self.bert(input_ids=tensor_ids, return_dict=True, output_hidden_states=True, output_attentions=True)
        vec = out.hidden_states[-1][:,0,:]
        vec = vec.view(-1, 768)
        o = self.linear(vec)
        return F.log_softmax(o), out.attentions

In [2]:
works_path = "../tsv/first-match-scatter/42/test.json"
with open(works_path, "r") as f:
    works = json.load(f)
work = works[100]
work_iter = [w['paragraph'] for w in work['contents']]

In [3]:
tokenizer = BertJapaneseTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking", return_tensors='pt', padding='max_length', max_length=1024)

In [4]:
batch = tokenizer.batch_encode_plus(work_iter, pad_to_max_length=True, max_length=512, truncation=True, add_special_tokens=True)
batch_ids = batch['input_ids']



In [5]:
tensor_ids = torch.tensor(batch_ids).to('cuda')
tensor_ids.size()

torch.Size([5, 512])

In [6]:
bert = torch.load(f'../savepoint/bert-fm/bert-fm-42.pt')
bert = bert.module.to('cuda')

In [7]:
print(bert.bert.pooler)
print(bert.classifier)

BertPooler(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (activation): Tanh()
)
Linear(in_features=768, out_features=2, bias=True)


In [8]:
out = bert(tensor_ids, output_hidden_states=True)

In [9]:
logits = out.logits
last_hidden = out.hidden_states[-1]

In [10]:
pooled = bert.bert.pooler(last_hidden)
pooled.size()

torch.Size([5, 768])

In [11]:
pooled = bert.bert.pooler(last_hidden)
cls_linear = nn.Linear(768, 2).to('cuda')
nn.init.normal_(cls_linear.weight, std=0.0001)
nn.init.normal_(cls_linear.bias, 0.01)

Parameter containing:
tensor([ 0.3860, -1.4263], device='cuda:0', requires_grad=True)

In [12]:
cls_linear.weight = bert.classifier.weight
cls_linear.bias = bert.classifier.bias

In [13]:
bert.classifier.weight.size()

torch.Size([2, 768])

In [14]:
cls_linear(pooled)

tensor([[ 0.0503,  0.3246],
        [-0.1981,  0.5081],
        [-0.0189,  0.2033],
        [ 0.1545,  0.2322],
        [ 0.5509, -0.2223]], device='cuda:0', grad_fn=<AddmmBackward>)

In [15]:
print(logits)
print(pooled)
print(bert.classifier(pooled))

tensor([[ 0.0503,  0.3246],
        [-0.1981,  0.5081],
        [-0.0189,  0.2033],
        [ 0.1545,  0.2322],
        [ 0.5509, -0.2223]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([[ 0.7014, -0.5432, -0.6199,  ...,  0.5750,  0.0863, -0.4331],
        [ 0.7077, -0.4064, -0.5185,  ...,  0.5127,  0.0337, -0.1763],
        [ 0.6667, -0.6674, -0.7178,  ...,  0.6313,  0.1256, -0.2005],
        [ 0.7409, -0.5881, -0.6273,  ...,  0.5767,  0.1062, -0.2970],
        [ 0.7268, -0.6956, -0.7196,  ...,  0.5405,  0.1342, -0.4827]],
       device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.0503,  0.3246],
        [-0.1981,  0.5081],
        [-0.0189,  0.2033],
        [ 0.1545,  0.2322],
        [ 0.5509, -0.2223]], device='cuda:0', grad_fn=<AddmmBackward>)


In [21]:
[s[0] for s in pool_lst]

[0.7013938426971436,
 0.7077224254608154,
 0.6667032241821289,
 0.7409121990203857,
 0.7267847657203674]

In [31]:
pool_lst = pooled.tolist()

In [32]:
### CLS AVERAGE ###
sum_lst = list(0 for i in range (768))
for lst in pool_lst:
    sum_lst = [a+b for a, b in zip(sum_lst, lst)]
avg_lst = [a/5 for a in sum_lst]
t_avg_lst = torch.tensor(avg_lst).to('cuda')
bert.classifier(t_avg_lst)

tensor([0.1077, 0.2092], device='cuda:0', grad_fn=<AddBackward0>)

In [36]:
### CLS GRAND ###
grand_lst = []
for lst in pool_lst:
    grand_lst+=lst
t_grand_lst = torch.tensor(grand_lst).to('cuda')

In [53]:
bert_w_lst = bert.classifier.weight.tolist()
grand_w_lst_l = []
grand_w_lst_r = []
for i in range(5):
    grand_w_lst_l += bert_w_lst[0]
    grand_w_lst_r += bert_w_lst[1]
grand_w_lst = [grand_w_lst_l, grand_w_lst_r]
t_grand_w_lst = torch.tensor(grand_w_lst).to('cuda')

torch.Size([2, 3840])

In [57]:
### BERT CLASSIFIER ###
gnd_linear = nn.Linear(3840, 2).to('cuda')
nn.init.normal_(gnd_linear.weight, std=0.0001)
nn.init.normal_(gnd_linear.bias, 0.01)
    
gnd_linear.weight = torch.nn.Parameter(t_grand_w_lst)
gnd_linear.bias = bert.classifier.bias
gnd_linear(t_grand_lst)

tensor([-0.6479, -0.1303], device='cuda:0', grad_fn=<AddBackward0>)

In [87]:
print(bert.classifier.bias)
print(bert.classifier.weight)

Parameter containing:
tensor([-0.0001,  0.0001], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[-0.0007, -0.0107,  0.0009,  ...,  0.0212,  0.0404,  0.0134],
        [-0.0114,  0.0242, -0.0184,  ...,  0.0114,  0.0350, -0.0189]],
       device='cuda:0', requires_grad=True)


In [88]:
print(cls_linear.bias)
print(cls_linear.weight)

Parameter containing:
tensor([-0.0167, -0.0170], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[-0.0116,  0.0290, -0.0314,  ..., -0.0281,  0.0172,  0.0240],
        [-0.0246,  0.0103, -0.0301,  ...,  0.0190, -0.0186, -0.0153]],
       device='cuda:0', requires_grad=True)


In [40]:
pool_lst = pooled.to('cpu').tolist()
len(pool_lst[0])

768

In [13]:
cls = BertClassifier().to('cuda')

In [None]:
print(cls(tensor_ids))

In [14]:
out_n = bert(tensor_ids)
print(out_n[0])
softmax = nn.Softmax(dim=1)
print(softmax(out_n[0]))
type(out_n)

tensor([[ 0.0503,  0.3246],
        [-0.1981,  0.5081],
        [-0.0189,  0.2033],
        [ 0.1545,  0.2322],
        [ 0.5509, -0.2223]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([[0.4318, 0.5682],
        [0.3305, 0.6695],
        [0.4447, 0.5553],
        [0.4806, 0.5194],
        [0.6842, 0.3158]], device='cuda:0', grad_fn=<SoftmaxBackward>)


transformers.modeling_outputs.SequenceClassifierOutput

In [None]:
out = bert(input_ids=tensor_ids, return_dict=True, output_hidden_states=True, output_attentions=True)


In [None]:
hidden = out.hidden_states
print('last hs', hidden[-1].size())
pooled_output = hidden[-1][:, 0, :]
print('po', pooled_output.size())
pooled_output

In [None]:
linear = nn.Linear(768, 2).to('cuda')
nn.init.normal_(linear.weight, std=0.02)
nn.init.normal_(linear.bias, 0)

In [None]:
#pooled_output.tolist()
pooled = pooled_output.view(-1, 768)
linout = linear(pooled_output)

print(linout)
print(F.softmax(linout, dim=1))

In [None]:
pooled_list = pooled_output.tolist()
grand_list = []
for l in pooled_list:
    grand_list += l
print(len(grand_list))
t_grand = torch.tensor(grand_list)
g_linear = nn.Linear(3840, 2).to('cuda')
nn.init.normal_(g_linear.weight, std=0.02)
nn.init.normal_(g_linear.bias, 0)

g_linout = g_linear(t_grand)

In [None]:
from torch import nn
import torch.nn.functional as F
from transformers import BertModel

class BertClassifier(nn.Module):
      def __init__(self):
        super(BertClassifier, self).__init__()
        self.bert = torch.load(f'../savepoint/bert-fm/bert-fm-42.pt')
        self.linear = nn.Linear(768, 9)
        nn.init.normal_(self.linear.weight, std=0.02)
        nn.init.normal_(self.linear.bias, 0)

        def forward(self, input_ids):
            vec, _, attentions = self.bert(input_ids, output_attentions=True)
            vec = vec[:,0,:]
            vec = vec.view(-1, 768)
            out = self.linear(vec)
            return F.log_softmax(out), attentions

classifier = BertClassifier()

In [17]:
from transformers import BertModel
bt = BertModel.from_pretrained('cl-tohoku/bert-base-japanese-whole-word-masking')


Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-whole-word-masking were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


transformers.models.bert.modeling_bert.BertModel

In [24]:
print(type(bt))
print(type(bert))

<class 'transformers.models.bert.modeling_bert.BertModel'>
<class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'>


## TEST

In [None]:
paragraph = "能力 を 持っ てる 霊能 者 に しか でき ない の よ ほー だ から こそ 、 浄霊 の ため の 御 札 なんて いう の が 、 高名 な 人 謹製 って なる と 、 一 つ 凄い 値段 し て 取引 さ れ ちゃう の よ 結局 金 だ な 、 おい 地獄 の 沙汰 も 金 次第 って いう でしょ だ から 祝詞 と か 、 御 札 と か 、 誰 で も 徳 の ある 人物 から 力 を 借り受ける こと が できる ね 、 技術 が それ なり に 発展 し てる の よ 少し で も 多く の 彷徨 える 魂 を 救う ため に 少し で も 才能 が ある 人 が 、 祀ら れ た 存在 から 力 を 借りれる よう に 俺 で も できる の か できる ん じゃ ない 修行 次第 と か だろう けど 適当 だ なぁ まだ いい わ よ 、 モグリ と か だ と 、 そう いう の 考え ず に 除霊 消 霊 、 今 だけ を 解決 し て 後 で 災い に なる こと しでかす やつ が いる ん だ から 消 霊 … … 気 に し ない で 、 あまり 愉快 な 話 で も ない し そう か 、 それ なら 聞か ない で お こう あー で も ない 、 こー で も ない と いい ながら 、 日報 と で も いう べき か 、 今回 の レポート に つい て 少し ずつ 書い て いく 隣 の 相沢 さん は 、 スマホ だっ た タブレット で 打ち込ん で いる が 、 こちら は 手書き で ある 黒沢 君 、 スクロール の 方 手配 し て おく 例え 今後 やら ない と し て も 、 こう いう の 持っ て い て も 損 は ない わ よ あー … … それ じゃあ お 願い でき ます か 今回 は こっち で 料金 は 持っ て おく わ 凄い 怖い の です が 青田 買い よ 、"

In [None]:
bert = torch.load(f'../savepoint/bert-fm/bert-fm-42.pt')
tokenizer = BertJapaneseTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")


In [None]:
from torch.nn import DataParallel
model = nn.DataParallel(bert)

In [None]:
bert

In [None]:
tokenized = tokenizer.tokenize(paragraph)
tokens = tokenizer.convert_tokens_to_ids(tokenized)
wakati_ids = torch.tensor([tokens])

In [None]:
print(wakati_ids)
print(wakati_ids.size())

In [None]:
output = bert(tensor_tokens)
print(output)

In [None]:
o = bert(input_ids = wakati_ids, return_dict=True, output_hidden_states=True, output_attentions=True)

In [None]:
o

In [None]:
hid, pool = bert(input_ids = wakati_ids)

In [None]:
print(o.keys())

In [None]:
### これか？
o.hidden_states[0].size()

In [None]:
BertModel.from_pretrained('../savepoint/bert-fm/bert-fm-42.bin', from_pt=True)

In [None]:
print(tensor_tokens.Text[0])

In [None]:
model

In [None]:
att = bert(input_ids=wakati_ids, output_attentions=True).attentions

In [None]:
bert(input_ids=wakati_ids, output_attentions=True)

In [None]:
from transformers import BertModel
model = BertModel.from_pretrained('../savepoint/bert-fm/bert-fm-42.pt')

In [None]:
pool = model(wakati_ids, output_attentions=True).pooler_output

In [None]:
pool.size()