-
Notifications
You must be signed in to change notification settings - Fork 14
/
model.py
162 lines (139 loc) · 7.1 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# coding: UTF-8
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertPreTrainedModel, BertForMaskedLM, AutoModelForMaskedLM
class Config(object):
"""配置参数"""
def __init__(self, args):
self.model_name = 'bert'
self.rank = -1
self.local_rank = -1
self.train_path = args.data_dir + '/train_triple.jsonl' # 训练集
self.test_path = args.data_dir + '/OpenBG-CSK_test.jsonl' # 测试集
self.save_path = args.output_dir # 模型训练结果
self.bert_path = args.model_dir
self.test_batch = args.test_batch
self.a_new_tokens = args.a_new_tokens
self.b_new_tokens = args.b_new_tokens
self.new_tokens = 4 * self.a_new_tokens + 2 * self.b_new_tokens
self.lamda = args.lamda
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备
self.num_workers = 1
self.local_rank = -1
self.num_classes = 2 # 类别数
self.num_epochs = args.epochs # epoch数
self.batch_size = args.batch_size # mini-batch大小
self.learning_rate = args.learning_rate # 学习率
self.weight_decay = args.weight_decay
self.dropout = args.dropout
self.tokenizer = BertTokenizer.from_pretrained(self.bert_path, do_lower_case=True)
self.hidden_size = args.hidden_size
self.max_length = args.max_length
self.dropout = 0.1
self.model=args.model
class KGBERT(nn.Module):
def __init__(self, config, ):
super(KGBERT, self).__init__()
self.num_labels = 1
self.bert = BertModel.from_pretrained(config.bert_path)
self.dropout = nn.Dropout(config.dropout)
self.dense_1 = nn.Linear(config.hidden_size, 1)
def forward(self, input_ids, attention_mask, type_ids, position_ids):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=type_ids, position_ids=position_ids)
sequence_output = outputs[0]
sequence_output = self.dropout(sequence_output)
procuct_ouput = torch.mean(sequence_output, 1)
x = self.dense_1(procuct_ouput)
x = torch.sigmoid(x).squeeze(-1)
return x
class PMI(nn.Module):
def __init__(self, config):
super(PMI, self).__init__()
self.device = config.device
self.config = config
self.bert = AutoModelForMaskedLM.from_pretrained(config.bert_path).to(self.device)
# for param in self.bert.parameters():
# param.requires_grad = False
self.mlp = torch.nn.Sequential(
torch.nn.Linear(config.hidden_size, config.hidden_size),
torch.nn.ReLU(),
nn.Dropout(p=config.dropout),
torch.nn.Linear(config.hidden_size, config.hidden_size),
)
self.lstm_head = torch.nn.LSTM(input_size=config.hidden_size,
hidden_size=config.hidden_size // 2,
num_layers=2,
dropout=config.dropout,
bidirectional=True,
batch_first=True)
self.seq_indices = torch.LongTensor(list(range(config.new_tokens))).to(config.device)
self.extra_token_embeddings = nn.Embedding(config.new_tokens, config.hidden_size).to(config.device)
self.embeddings = self.bert.get_input_embeddings()
self.lamda = torch.nn.Parameter(torch.tensor([config.lamda]))
self.scale_a = torch.nn.Parameter(torch.tensor([0.66]))
self.scale_b = torch.nn.Parameter(torch.tensor([0.66]))
self.relu = torch.nn.ReLU()
self.lamda = torch.nn.Parameter(torch.tensor([config.lamda]))
def forward(self, sent, masked_head, masked_tail, masked_both, attention_mask):
# def forward(self, masked, ids):
# [5, 768]
new_token_embeddings = self.extra_token_embeddings(self.seq_indices).unsqueeze(0)
new_token_embeddings = self.mlp(self.lstm_head(new_token_embeddings)[0]).squeeze()
# # [8,5,768]
new_token_embeddings = new_token_embeddings.expand(min(self.config.batch_size, len(sent)), -1, -1)
# [8, 64, 768]
tail_embedding = self.embeddings(masked_tail)
for bid in range(masked_tail.shape[0]):
k = 0
for cid in range(masked_tail[bid].shape[0]):
if masked_tail[bid][cid] == 105:
tail_embedding[bid, cid, :] = new_token_embeddings[bid][k]
k += 1
if k >= self.config.new_tokens:
break
head_embedding = self.embeddings(masked_head)
for bid in range(masked_head.shape[0]):
k = 0
for cid in range(masked_head[bid].shape[0]):
if masked_head[bid][cid] == 105:
head_embedding[bid, cid, :] = new_token_embeddings[bid][k]
k += 1
if k >= self.config.new_tokens:
break
both_embedding = self.embeddings(masked_both)
for bid in range(masked_both.shape[0]):
k = 0
for cid in range(masked_both[bid].shape[0]):
if masked_both[bid][cid] == 105:
both_embedding[bid, cid, :] = new_token_embeddings[bid][k]
k += 1
if k >= self.config.new_tokens:
break
# p(t|h,r)
logprob_tail_conditional = self.predict(sent, tail_embedding, masked_tail, attention_mask)
# p(h|t,r)
logprob_head_conditional = self.predict(sent, head_embedding, masked_head, attention_mask)
# marginal
# p(t|r)
logprob_tail_marginal = self.predict(sent, both_embedding, masked_tail, attention_mask)
# p(h|r)
logprob_head_marginal = self.predict(sent, both_embedding, masked_head, attention_mask)
# NPMI average approximations of NPMI(t,h|r) and NPMI(h,t|r)
mutual_inf_0 = (logprob_tail_conditional - self.scale_a * logprob_tail_marginal) / (
-logprob_tail_conditional - self.scale_b * logprob_head_marginal)
# Nec
mutual_inf_1 = (logprob_head_conditional - self.scale_a * logprob_head_marginal) / (
-logprob_head_conditional - self.scale_b * logprob_tail_marginal)
mutual_inf = mutual_inf_0 * self.lamda + mutual_inf_1 * (1 - self.lamda)
return mutual_inf
def predict(self, sent, embeddings, masked, attention_mask):
pred = self.bert(inputs_embeds=embeddings, attention_mask=attention_mask).logits
pred = pred.log_softmax(2)
logprob = torch.zeros(min(self.config.batch_size, len(sent))).to(self.device)
# [8,69]
for i, m in enumerate(masked):
masked_ids = [idx for idx, token in enumerate(m) if token == 103]
for idx in masked_ids:
logprob[i] += pred[i, idx, sent[i][idx]]
# logprob[i] = logprob[i]/len(masked_ids)
return logprob