In [13]:
import numpy as np
import pandas as pd
import re
import pickle
import ast
from zhon.hanzi import non_stops, stops
import torch
import torch.nn.functional as F
from transformers import RobertaTokenizer , RobertaModel , RobertaForSequenceClassification
from transformers import BertTokenizer , BertConfig , BertModel ,BertForSequenceClassification
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch import nn

In [14]:
def eval(pred, ans):
    if bool(pred) is not bool(ans):
        return 0
    elif not pred and not ans:
        return 1
    else:
        pred = set(pred)
        ans = set(ans)
        interaction_len = len(pred & ans)
        if interaction_len == 0:
            return 0

        pred_len = len(pred)
        ans_len = len(ans)
        return 2 / (pred_len / interaction_len + ans_len / interaction_len)


def eval_all(pred_list, ans_list):
    assert len(pred_list) == len(ans_list)
    return sum(eval(p, a) for p, a in zip(pred_list, ans_list)) / len(pred_list)

In [15]:
def combine_sentence(sentences, max_len):
        li = []
        string = ""
        for k in range(len(sentences)):
            sentence = sentences[k]
            if len(string) + len(sentence) < max_len:
                string = string + sentence
            else:
                #             原本是空的代表sentences太常
                if string == "":
                    n = max_len
                    tmp_li = [sentence[i : i + n] for i in range(0, len(sentence), n)]
                    string = tmp_li.pop(-1)
                    li = li + tmp_li
                else:
                    li.append(string)
                    string = sentence
        if string != "":
            li.append(string)
        return li

def cut_sent(para):
    para = re.sub("([。！？\?])([^”’])", r"\1\n\2", para)
    para = re.sub("(\.{6})([^”’])", r"\1\n\2", para)
    para = re.sub("(\…{2})([^”’])", r"\1\n\2", para)
    para = re.sub("([。！？\?][”’])([^，。！？\?])", r"\1\n\2", para)
    return para.split("\n")

def clean_string(content):
#     cc = OpenCC('t2s')
    content = content.replace('\n','。').replace('\t','，').replace('!', '！').replace('?', '？')# erease white space cause English name error
    content = re.sub("[+\.\/_,$%●▼►^*(+\"\']+|[+——~@#￥%……&*（）★]", "",content)
    content = re.sub(r"[%s]+" %stops, "。",content)
#     content = cc.convert(content)
    return content

In [16]:

def qa_binary_split_data(df):
    tokenizer = BertTokenizer.from_pretrained(lm_path)
    # tokenizer = RobertaTokenizer.from_pretrained(lm_path)


    train_x = []
    train_y = []
    train_input_ids = []
    train_token_types = []
    train_attention_mask = []


    for index , row in df.iterrows():
        news = row['full_content']
        ckip_names = ast.literal_eval(row['ckip_names'])
        names  = ast.literal_eval(row['name'])

        if len(names) == 0 :
            continue

        content = clean_string(news)
        max_length = 500

        split_content = cut_sent(content)
        chunks = combine_sentence(split_content, max_length)

        for chunk in chunks:
            for ckip_name in ckip_names:
                if len(chunk) >= max_length:
                    print("error !!!! lenth > 500")
                    continue
                if ckip_name not in chunk:
                    continue

                input_ids = tokenizer.encode(ckip_name, chunk)
                if len(input_ids) > 512:
                    continue
                sep_index = input_ids.index(tokenizer.sep_token_id)
                num_seg_a = sep_index + 1
                num_seg_b = len(input_ids) - num_seg_a
                segment_ids = [0] * num_seg_a + [1] * num_seg_b

                input_mask = [1] * len(input_ids)

                while len(input_ids) < 512:
                    input_ids.append(0)
                    input_mask.append(0)
                    segment_ids.append(0)
                
                if ckip_name in names:
                    train_y.append(1)
                else:
                    train_y.append(0)

                train_input_ids.append(input_ids)
                train_token_types.append(segment_ids)
                train_attention_mask.append(input_mask)
                train_x.append((ckip_name,chunk))

    train_input_ids = np.array(train_input_ids)
    train_token_types = np.array(train_token_types)
    train_attention_mask = np.array(train_attention_mask)
    train_y = np.array(train_y)

    print(len(train_x))
    print(train_input_ids.shape)
    print(train_token_types.shape)
    print(train_attention_mask.shape)
    print(train_y.shape)

    return train_x , train_input_ids , train_token_types , train_attention_mask , train_y
                
                

In [17]:
class TrainDataset(Dataset):
    def __init__(self, input_ids, token_type_ids, attention_mask, y , x):
        self.input_ids = input_ids
        self.token_type_ids = token_type_ids
        self.attention_mask = attention_mask
        self.y = y
        self.x = x

    def __getitem__(self, idx):
        inputid = self.input_ids[idx]
        tokentype = self.token_type_ids[idx]
        attentionmask = self.attention_mask[idx]
        x = self.x[idx]
        y = self.y[idx]
        return inputid, tokentype, attentionmask,  y , x

    def __len__(self):
        return len(self.input_ids)



In [18]:
def get_test_acc(model , dataloader):

    model.eval()
    with torch.no_grad():
        total = 0
        correct = 0
        for data in dataloader:
            tokens_tensors ,  segments_tensors , masks_tensors , labels = [t.to(device) for t in data[:-1]]

            name , chunk = data[-1]

            outputs = model(input_ids=tokens_tensors, 
                            token_type_ids=segments_tensors, 
                            attention_mask=masks_tensors,
                            labels = labels)
            
            pred = outputs[1]

            total += len(tokens_tensors)
            pred = torch.argmax(pred,dim=-1)

            correct += (pred == labels).sum().item()
    return correct/total




In [7]:
# dataset = 1
for dataset in range(1,4):
    dataset_base_path = './dataset/dataset'


    lm_path = './chinese_roberta_wwm/'
    train_df = pd.read_csv(dataset_base_path + str(dataset) + '/tbrain_train.csv')
    test_df = pd.read_csv(dataset_base_path + str(dataset) + '/tbrain_test.csv')

    print(train_df.shape)
    print(test_df.shape)


    train_x , train_input_ids , train_token_types , train_attention_mask , train_y  = qa_binary_split_data(train_df)
    test_x , test_input_ids , test_token_types , test_attention_mask , test_y  = qa_binary_split_data(test_df)


    from transformers import BertForSequenceClassification
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = 'cpu'
    print("device:", device)
    print('dataset', dataset)

    num_labels = 2

    model = BertForSequenceClassification.from_pretrained(lm_path,num_labels=num_labels)
    model.to(device)
#     model.bert.init_weights()
    model.train()

    BATCH_SIZE = 10
    trainset = TrainDataset(
        train_input_ids, train_token_types, train_attention_mask, train_y,train_x
    )
    trainloader = DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True)

    testset = TrainDataset(
        test_input_ids, test_token_types, test_attention_mask, test_y ,test_x
    )
    testloader = DataLoader(testset, batch_size=BATCH_SIZE,shuffle=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-5)

    EPOCHS = 4
    for epoch in range(EPOCHS):
        running_loss = 0.0
        total = 0
        correct = 0
        for data in trainloader:
            tokens_tensors ,  segments_tensors , masks_tensors , labels = [t.to(device) for t in data[:-1]]

            name , chunk = data[-1]

            optimizer.zero_grad()
            outputs = model(input_ids=tokens_tensors, 
                            token_type_ids=segments_tensors, 
                            attention_mask=masks_tensors,
                            labels = labels)
            
            loss = outputs[0]
            pred = outputs[1]

            total += len(tokens_tensors)
            pred = torch.argmax(pred,dim=-1)

            correct += (pred == labels).sum().item()


            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            
        

        print('epoch:',epoch)
        print('loss:', running_loss)
        print('train_acc:',correct/total)
        print('test_acc:',get_test_acc(model,testloader))
        checkpoint_path = './QAModel/' + str(dataset) + '/roberta_init2_name_qa_split_epoch' + str(epoch) + '.pkl'
        torch.save(model.state_dict(),checkpoint_path)
    print('=====================================')



(4426, 4)
(491, 4)
3235
(3235, 512)
(3235, 512)
(3235, 512)
(3235,)
296
(296, 512)
(296, 512)
(296, 512)
(296,)
device: cuda:0
dataset 1


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

epoch: 0
loss: 144.62282383069396
train_acc: 0.7678516228748068
test_acc: 0.902027027027027
epoch: 1
loss: 59.964468009304255
train_acc: 0.9323029366306028
test_acc: 0.9459459459459459
epoch: 2
loss: 29.156971398973837
train_acc: 0.9697063369397217
test_acc: 0.9425675675675675
epoch: 3
loss: 17.036331634852104
train_acc: 0.9836166924265842
test_acc: 0.9391891891891891
(4426, 4)
(491, 4)
3148
(3148, 512)
(3148, 512)
(3148, 512)
(3148,)
383
(383, 512)
(383, 512)
(383, 512)
(383,)
device: cuda:0
dataset 2


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

epoch: 0
loss: 138.6571156270802
train_acc: 0.7814485387547649
test_acc: 0.9086161879895561
epoch: 1
loss: 54.66567703336477
train_acc: 0.9399618805590851
test_acc: 0.8877284595300261
epoch: 2
loss: 27.783681378001347
train_acc: 0.974587039390089
test_acc: 0.9190600522193212
epoch: 3
loss: 15.845037960505579
train_acc: 0.9879288437102922
test_acc: 0.9477806788511749
(4426, 4)
(491, 4)
3210
(3210, 512)
(3210, 512)
(3210, 512)
(3210,)
321
(321, 512)
(321, 512)
(321, 512)
(321,)
device: cuda:0
dataset 3


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

epoch: 0
loss: 129.80026596039534
train_acc: 0.795638629283489
test_acc: 0.8909657320872274
epoch: 1
loss: 49.4812869252637
train_acc: 0.946417445482866
test_acc: 0.9283489096573209
epoch: 2
loss: 26.64020235463977
train_acc: 0.9757009345794393
test_acc: 0.9221183800623053
epoch: 3
loss: 14.970211139996536
train_acc: 0.985981308411215
test_acc: 0.9065420560747663


In [19]:
def check_pred_name_is_real_ans(pred_name_list, news , checkpoint , lm_path):
    num_labels = 2
    model = BertForSequenceClassification.from_pretrained(lm_path,num_labels=num_labels)
    model.load_state_dict(torch.load(checkpoint))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    tokenizer = BertTokenizer.from_pretrained(lm_path)

    class Testset(Dataset):
        def __init__(self, input_ids, token_type_ids, attention_mask, names):
            self.input_ids = input_ids
            self.token_type_ids = token_type_ids
            self.attention_mask = attention_mask
            self.names = names

        def __getitem__(self, idx):
            inputid = self.input_ids[idx]
            tokentype = self.token_type_ids[idx]
            attentionmask = self.attention_mask[idx]
            name = self.names[idx]
            return inputid, tokentype, attentionmask, name

        def __len__(self):
            return len(self.input_ids)

    def combine_sentence(sentences, max_len):
        li = []
        string = ""
        for k in range(len(sentences)):
            sentence = sentences[k]
            if len(string) + len(sentence) < max_len:
                string = string + sentence
            else:
                #             原本是空的代表sentences太常
                if string == "":
                    n = max_len
                    tmp_li = [sentence[i : i + n] for i in range(0, len(sentence), n)]
                    string = tmp_li.pop(-1)
                    li = li + tmp_li
                else:
                    li.append(string)
                    string = sentence
        if string != "":
            li.append(string)
        return li

    train_input_ids = []
    train_token_types = []
    train_attention_mask = []
    testing_name = []

    content = clean_string(news)

    max_length = 500

    split_content = cut_sent(content)
    chunks = combine_sentence(split_content, max_length)

    for chunk in chunks:
        for name in pred_name_list:
            if len(chunk) >= max_length:
                print("error !!!! lenth > 500")
                continue
            if name not in chunk:
                continue

            input_ids = tokenizer.encode(name, chunk)
            if len(input_ids) > 512:
                continue
            sep_index = input_ids.index(tokenizer.sep_token_id)
            num_seg_a = sep_index + 1
            num_seg_b = len(input_ids) - num_seg_a
            segment_ids = [0] * num_seg_a + [1] * num_seg_b

            input_mask = [1] * len(input_ids)

            while len(input_ids) < 512:
                input_ids.append(0)
                input_mask.append(0)
                segment_ids.append(0)

            train_input_ids.append(input_ids)
            train_token_types.append(segment_ids)
            train_attention_mask.append(input_mask)
            testing_name.append(name)

    train_input_ids = np.array(train_input_ids)
    train_token_types = np.array(train_token_types)
    train_attention_mask = np.array(train_attention_mask)
    testing_name = np.array(testing_name)

    BATCH_SIZE = train_input_ids.shape[0]
    testset = Testset(
        train_input_ids, train_token_types, train_attention_mask, testing_name
    )
    testloader = DataLoader(testset, batch_size=BATCH_SIZE)

    with torch.no_grad():
        for data in testloader:
            tokens_tensors, segments_tensors, masks_tensors = [
                t.to(device) for t in data[:-1]
            ]
            name = data[-1]
            outputs = model(
                input_ids=tokens_tensors,
                token_type_ids=segments_tensors,
                attention_mask=masks_tensors,
            )
            pred = torch.softmax(outputs[0], dim=-1)
            pred = torch.argmax(pred, dim=-1)
            pred = pred.cpu().detach().numpy()
            pred_name_list = np.array(name)
            return list(pred_name_list[pred > 0])

In [21]:
dataset = 3
dataset_base_path = './dataset/dataset'

model_path = './QAModel/'+ str(dataset) +'/'


# rbt_checkpoint = model_path +  'roberta_name_qa_split_epoch1.pkl'
# # rbtl3_checkpoint = model_path + 'rbtl3_name_qa_split_epoch0.pkl'
# # bert_checkpoint = model_path + 'bert_name_qa_split_epoch2.pkl'

rbt0_checkpoint = model_path + 'roberta_init0_name_qa_split_epoch2.pkl'
rbt1_checkpoint = model_path + 'roberta_init1_name_qa_split_epoch2.pkl'
rbt2_checkpoint = model_path + 'roberta_init2_name_qa_split_epoch1.pkl'





rbt_lm_path = './chinese_roberta_wwm/'
# rbtl3_lm_path = './rbtl3_pretrain'
# bert_lm_path = './bert_wwm_pretrain_tbrain/'


train_df = pd.read_csv(dataset_base_path + str(dataset) + '/tbrain_train.csv')
test_df = pd.read_csv(dataset_base_path + str(dataset) + '/tbrain_test.csv')

print(train_df.shape)
print(test_df.shape)


ans = []
rbt0_pred = []
rbt1_pred = []
rbt2_pred = []
vote_pred = []
vote_all_pred = []
for index,row in test_df.iterrows():
    news = row['full_content']
    ckip_names = ast.literal_eval(row['ckip_names'])
    names = ast.literal_eval(row['name'])


    if len(names) == 0:
        continue

    ans.append(names)


    rbt0_result = check_pred_name_is_real_ans(ckip_names, news , rbt0_checkpoint , rbt_lm_path)
    rbt0_result = list(set(rbt0_result))
    rbt0_pred.append(rbt0_result)
    
    rbt1_result = check_pred_name_is_real_ans(ckip_names, news , rbt1_checkpoint , rbt_lm_path)
    rbt1_result = list(set(rbt1_result))
    rbt1_pred.append(rbt1_result)
    
    rbt2_result = check_pred_name_is_real_ans(ckip_names, news , rbt2_checkpoint , rbt_lm_path)
    rbt2_result = list(set(rbt2_result))
    rbt2_pred.append(rbt2_result)




    tmp = []
    tmp_all = []
    for name in list(set(rbt0_result + rbt1_result + rbt2_result )):
        vote = 0
        vote += name in rbt0_result
        vote += name in rbt1_result
        vote += name in rbt2_result
        if vote >=2:
            tmp.append(name)
        if vote == 3:
            tmp_all.append(name)
            
    vote_pred.append(tmp)
    vote_all_pred.append(tmp_all)
    
    print('------------')
    print('ans:',names)
    print('vote:',tmp)
    print('vote_all:',tmp_all)
            





    # ensemble_or.append(rbtl3_result or rbt_result)
    # ensemble_and.append(rbtl3_result and rbt_result)


print('dataset:',dataset)
print('rbt0: %.4f' % eval_all(rbt0_pred,ans))
print('rbt1: %.4f' % eval_all(rbt1_pred,ans))
print('rbt2: %.4f' % eval_all(rbt2_pred,ans))

# print('rbtl3:',eval_all(rbtl3_pred,ans))
# print('bert:',eval_all(bert_pred,ans))
print('vote: %.4f' % eval_all(vote_pred,ans))
print('vote_all: %.4f' % eval_all(vote_all_pred,ans))
# print('or:',eval_all(ensemble_or,ans))
# print('and:',eval_all(ensemble_and,ans))



(4426, 4)
(491, 4)


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['李孟謙', '于曉艷', '于曉燕']
vote: ['于曉燕', '李孟謙', '于曉艷']
vote_all: ['于曉燕', '李孟謙', '于曉艷']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['崔培明']
vote: ['崔培明']
vote_all: ['崔培明']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['蔡賜爵', '畢鈞輝']
vote: ['蔡賜爵', '畢鈞輝']
vote_all: ['蔡賜爵', '畢鈞輝']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['陳韋霖']
vote: ['陳韋霖']
vote_all: ['陳韋霖']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['林欣月']
vote: ['林欣月']
vote_all: ['林欣月']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['黃薪哲']
vote: ['余信憲', '黃薪哲']
vote_all: ['黃薪哲']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['洪勝明']
vote: ['洪勝明']
vote_all: ['洪勝明']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['陳耀東', '葉麗珍', '葉麗貞', '趙鈞震']
vote: ['趙鈞震', '葉麗貞', '陳耀東', '葉麗珍']
vote_all: ['趙鈞震', '陳耀東', '葉麗珍']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['曾盛麟', '陳清裕', '曾美菁', '林愈得']
vote: ['曾盛麟', '曾美菁', '陳清裕', '林愈得']
vote_all: ['曾盛麟', '曾美菁']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['葉美麗', '郭雅雯', '賴麗團', '林勇任']
vote: ['郭雅雯', '林勇任', '葉美麗', '賴麗團']
vote_all: ['郭雅雯', '林勇任', '葉美麗', '賴麗團']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['林銘宏', '蔡維峻']
vote: ['蔡維峻', '林銘宏']
vote_all: ['蔡維峻', '林銘宏']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['張承平']
vote: ['張承平']
vote_all: ['張承平']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['楊嘉仁']
vote: ['楊嘉仁']
vote_all: ['楊嘉仁']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['楊昇穎', '周宗賢']
vote: ['周宗賢', '楊昇穎']
vote_all: ['周宗賢', '楊昇穎']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['卓國華']
vote: []
vote_all: []


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['楊富巖', '戴盛世']
vote: ['戴盛世', '楊富巖']
vote_all: ['戴盛世', '楊富巖']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['許祈文']
vote: ['許祈文']
vote_all: ['許祈文']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['李維凱']
vote: ['李維凱']
vote_all: ['李維凱']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['歐炳辰', '林清井', '劉奕發']
vote: ['歐炳辰', '劉奕發', '林清井']
vote_all: ['歐炳辰', '劉奕發', '林清井']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['陳發貴']
vote: ['陳發貴']
vote_all: ['陳發貴']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['王延順']
vote: ['王延順', '陳致銘', '陳俊佑']
vote_all: ['王延順', '陳致銘', '陳俊佑']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['孔朝']
vote: []
vote_all: []


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['詹昭書']
vote: []
vote_all: []


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['楊善淵']
vote: ['楊善淵']
vote_all: ['楊善淵']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['孔繁仁', '楊富巖', '戴盛世']
vote: ['戴盛世', '孔繁仁', '楊富巖']
vote_all: ['戴盛世', '孔繁仁', '楊富巖']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['林繼蘇', '徐詩彥']
vote: ['林繼蘇', '徐詩彥']
vote_all: ['林繼蘇', '徐詩彥']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['陳麗珍']
vote: ['陳麗珍']
vote_all: ['陳麗珍']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['何培才']
vote: ['何培才']
vote_all: ['何培才']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['王隆昌', '吳淑珍']
vote: ['王隆昌']
vote_all: ['王隆昌']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['楊正平', '蔡思庭']
vote: ['楊正平', '蔡思庭']
vote_all: ['楊正平', '蔡思庭']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['葉鯤璟']
vote: ['葉鯤璟', '葉鯤璟今']
vote_all: ['葉鯤璟']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['洪丞俊', '黃丹怡']
vote: ['俊丞俊', '洪丞俊']
vote_all: ['俊丞俊', '洪丞俊']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['王春甡', '王柏森']
vote: ['范筱梵', '江智詮', '王柏森', '江智銓', '王春甡']
vote_all: ['范筱梵', '江智詮', '王柏森', '江智銓']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['吳金虎', '吳銀嵐']
vote: ['張安樂', '吳金虎', '吳銀嵐']
vote_all: ['吳金虎', '吳銀嵐']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['蒲念慈']
vote: ['蒲念慈']
vote_all: ['蒲念慈']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['紀雅玲']
vote: ['紀雅玲', '林睿君']
vote_all: ['林睿君']


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

------------
ans: ['李黃錦燕']
vote: ['黃錦燕', '李黃錦燕']
vote_all: ['黃錦燕', '李黃錦燕']
dataset: 3
rbt0: 0.8080
rbt1: 0.8255
rbt2: 0.8650
vote: 0.8299
vote_all: 0.8160


In [17]:
print('vote_all: %.4f' % eval_all(['2'],['123']))

vote_all: 0.5000


In [20]:
def qa_name_binary_ensemble(pred_name_list, news):
    num_labels = 2
    lm_path = './chinese_roberta_wwm/'
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    model_path = './QAModel/1/'

    rbt0_checkpoint = model_path + 'roberta_init0_all_data_name_qa_split_epoch0.pkl'
    rbt1_checkpoint = model_path + 'roberta_init1_all_data_name_qa_split_epoch2.pkl'
    rbt2_checkpoint = model_path + 'roberta_init2_all_data_name_qa_split_epoch2.pkl'

    
    
    model0 = BertForSequenceClassification.from_pretrained(lm_path,num_labels=num_labels)
    model0.load_state_dict(torch.load(rbt0_checkpoint))
    model0.to(device)
    model0.eval()
    
    model1 = BertForSequenceClassification.from_pretrained(lm_path,num_labels=num_labels)
    model1.load_state_dict(torch.load(rbt1_checkpoint))
    model1.to(device)
    model1.eval()
    
    model2 = BertForSequenceClassification.from_pretrained(lm_path,num_labels=num_labels)
    model2.load_state_dict(torch.load(rbt2_checkpoint))
    model2.to(device)
    model2.eval()
    
    
    

    tokenizer = BertTokenizer.from_pretrained(lm_path)

    class Testset(Dataset):
        def __init__(self, input_ids, token_type_ids, attention_mask, names):
            self.input_ids = input_ids
            self.token_type_ids = token_type_ids
            self.attention_mask = attention_mask
            self.names = names

        def __getitem__(self, idx):
            inputid = self.input_ids[idx]
            tokentype = self.token_type_ids[idx]
            attentionmask = self.attention_mask[idx]
            name = self.names[idx]
            return inputid, tokentype, attentionmask, name

        def __len__(self):
            return len(self.input_ids)

    def combine_sentence(sentences, max_len):
        li = []
        string = ""
        for k in range(len(sentences)):
            sentence = sentences[k]
            if len(string) + len(sentence) < max_len:
                string = string + sentence
            else:
                #             原本是空的代表sentences太常
                if string == "":
                    n = max_len
                    tmp_li = [sentence[i : i + n] for i in range(0, len(sentence), n)]
                    string = tmp_li.pop(-1)
                    li = li + tmp_li
                else:
                    li.append(string)
                    string = sentence
        if string != "":
            li.append(string)
        return li

    train_input_ids = []
    train_token_types = []
    train_attention_mask = []
    testing_name = []

    content = clean_string(news)

    max_length = 500

    split_content = cut_sent(content)
    chunks = combine_sentence(split_content, max_length)

    for chunk in chunks:
        for name in pred_name_list:
            if len(chunk) >= max_length:
                print("error !!!! lenth > 500")
                continue
            if name not in chunk:
                continue

            input_ids = tokenizer.encode(name, chunk)
            if len(input_ids) > 512:
                continue
            sep_index = input_ids.index(tokenizer.sep_token_id)
            num_seg_a = sep_index + 1
            num_seg_b = len(input_ids) - num_seg_a
            segment_ids = [0] * num_seg_a + [1] * num_seg_b

            input_mask = [1] * len(input_ids)

            while len(input_ids) < 512:
                input_ids.append(0)
                input_mask.append(0)
                segment_ids.append(0)

            train_input_ids.append(input_ids)
            train_token_types.append(segment_ids)
            train_attention_mask.append(input_mask)
            testing_name.append(name)

    train_input_ids = np.array(train_input_ids)
    train_token_types = np.array(train_token_types)
    train_attention_mask = np.array(train_attention_mask)
    testing_name = np.array(testing_name)

    BATCH_SIZE = train_input_ids.shape[0]
    testset = Testset(
        train_input_ids, train_token_types, train_attention_mask, testing_name
    )
    testloader = DataLoader(testset, batch_size=BATCH_SIZE)

    with torch.no_grad():
        for data in testloader:
            tokens_tensors, segments_tensors, masks_tensors = [
                t.to(device) for t in data[:-1]
            ]
            name = data[-1]
            pred_name_list = np.array(name)
            
            outputs0 = model0(
                input_ids=tokens_tensors,
                token_type_ids=segments_tensors,
                attention_mask=masks_tensors,
            )
            pred0 = torch.softmax(outputs0[0], dim=-1)
            pred0 = torch.argmax(pred0, dim=-1)
            pred0 = pred0.cpu().detach().numpy()
            ans0 = list(pred_name_list[pred0 > 0])
            
            outputs1 = model1(
                input_ids=tokens_tensors,
                token_type_ids=segments_tensors,
                attention_mask=masks_tensors,
            )
            pred1 = torch.softmax(outputs1[0], dim=-1)
            pred1 = torch.argmax(pred1, dim=-1)
            pred1 = pred1.cpu().detach().numpy()
            ans1 = list(pred_name_list[pred1 > 0])
            
            
            outputs2 = model2(
                input_ids=tokens_tensors,
                token_type_ids=segments_tensors,
                attention_mask=masks_tensors,
            )
            pred2 = torch.softmax(outputs2[0], dim=-1)
            pred2 = torch.argmax(pred2, dim=-1)
            pred2 = pred2.cpu().detach().numpy()
            ans2 = list(pred_name_list[pred2 > 0])
            
            
            vote_result = []
            for name in list(set(ans0 + ans1 + ans2)):
                vote = 0
                vote += name in ans0
                vote += name in ans1
                vote += name in ans2
                if vote >=2:
                    vote_result.append(name)

            
            return vote_result

In [22]:

# 這邊是每天的 validation csv 輸出code
import pandas as pd

test_df = pd.read_csv('./tbrain/2020-07-27.csv')
validation_df =  pd.DataFrame(columns=['idx', 'article','ckip_name' , 'original_output' , 'only_QA_output'])

count = 0
for index, row in test_df.iterrows():
    if(row['binary'] != 1):
        continue

    news = row['article']
    ckip_name = ast.literal_eval(row['ckip_name'])
    pred_name_list = ast.literal_eval(row['predict_name'])
    pred_name_list = sorted(list(set(pred_name_list)))
    
    only_qa_pred = qa_name_binary_ensemble(ckip_name,news)
    only_qa_pred = sorted(list(set(only_qa_pred)))

    validation_df.loc[count] = [str(index), news , str(ckip_name) ,  str(pred_name_list) , str(only_qa_pred) ]
    count += 1

validation_df.to_csv('./tbrain/2020-07-27_after_ensemble.csv',index=False)
    

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

In [7]:
dataset = 1

dataset_base_path = './dataset/dataset'


lm_path = './chinese_roberta_wwm/'
train_df = pd.read_csv(dataset_base_path + str(dataset) + '/tbrain_train.csv')
test_df = pd.read_csv(dataset_base_path + str(dataset) + '/tbrain_test.csv')

train_df = pd.concat([train_df,test_df])

print(train_df.shape)
print(test_df.shape)


train_x , train_input_ids , train_token_types , train_attention_mask , train_y  = qa_binary_split_data(train_df)
# test_x , test_input_ids , test_token_types , test_attention_mask , test_y  = qa_binary_split_data(test_df)




from transformers import BertForSequenceClassification
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
print("device:", device)
print('dataset', dataset)

num_labels = 2

model = BertForSequenceClassification.from_pretrained(lm_path,num_labels=num_labels)
model.to(device)
model_path = './QAModel/'+ str(dataset) +'/'
checkpoint = model_path + 'roberta_init2_name_qa_split_epoch1.pkl'
model.load_state_dict(torch.load(checkpoint))
#     model.bert.init_weights()
model.train()

BATCH_SIZE = 10
trainset = TrainDataset(
    train_input_ids, train_token_types, train_attention_mask, train_y,train_x
)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE,shuffle=True)

# testset = TrainDataset(
#     test_input_ids, test_token_types, test_attention_mask, test_y ,test_x
# )
# testloader = DataLoader(testset, batch_size=BATCH_SIZE,shuffle=True)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

EPOCHS = 4
for epoch in range(EPOCHS):
    running_loss = 0.0
    total = 0
    correct = 0
    for data in trainloader:
        tokens_tensors ,  segments_tensors , masks_tensors , labels = [t.to(device) for t in data[:-1]]

        name , chunk = data[-1]

        optimizer.zero_grad()
        outputs = model(input_ids=tokens_tensors, 
                        token_type_ids=segments_tensors, 
                        attention_mask=masks_tensors,
                        labels = labels)

        loss = outputs[0]
        pred = outputs[1]

        total += len(tokens_tensors)
        pred = torch.argmax(pred,dim=-1)

        correct += (pred == labels).sum().item()


        running_loss += loss.item()
        loss.backward()
        optimizer.step()



    print('epoch:',epoch)
    print('loss:', running_loss)
    print('train_acc:',correct/total)
#     print('test_acc:',get_test_acc(model,testloader))
    checkpoint_path = './QAModel/' + str(dataset) + '/roberta_init2_all_data_name_qa_split_epoch' + str(epoch) + '.pkl'
    torch.save(model.state_dict(),checkpoint_path)
print('=====================================')



(4917, 4)
(491, 4)
3531
(3531, 512)
(3531, 512)
(3531, 512)
(3531,)
device: cuda:0
dataset 1


Some weights of the model checkpoint at ./chinese_roberta_wwm/ were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoi

epoch: 0
loss: 30.690135530894622
train_acc: 0.9699801755876523
epoch: 1
loss: 18.242687261255924
train_acc: 0.9832908524497309
epoch: 2
loss: 10.620864343742142
train_acc: 0.9915038232795242
epoch: 3
loss: 6.995730837457813
train_acc: 0.994052676295667
