In [5]:
!nvidia-smi

Wed Sep 01 20:42:55 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.71       Driver Version: 456.71       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108... WDDM  | 00000000:01:00.0  On |                  N/A |
| 19%   51C    P5    28W / 260W |    715MiB / 11264MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|       

In [6]:
import pandas as pd
import numpy as np
import torch 
from torch import nn
from torch.utils.data import dataloader
from time import time
import datetime
import os
import sys

device = torch.device('cuda:0')
print(device)

cuda:0


In [7]:
def time_cost(func):
    def Wrapper(*arg, **kargs):
        t0 = time()
        res = func(*arg, **kargs)
        t1 = time()
        print(f'[{func.__name__}] cost {t1-t0:.2f}s')
        
        return res
    return Wrapper        

# dataset

In [46]:
class SimilarityDataProcessor:
    """
    处理如下类型数据集
    [q1 q2 label]

    [idx q1 q2 label]
    """

    def __init__(self, logger=print):
        self.logger = logger

    def process(self, name, path,
                sep='\t',
                has_index=False,
                batch_size=32,
                is_shuffle=True):

        data = self.read_data(name, path, sep, has_index)
        dataloader = self.create_dataloader(data, batch_size, is_shuffle)

        return data, dataloader

    def read_data(self,mode, name, path, sep, encoder='utf-8', has_index=False):
        """
        读取数据,返回 list形式的数据
        
        mode: 读取数据的方式, 
              readline 
              pandas 
        """
        self.logger(f'-'*42)
        self.logger(f'start to read: [{name}]...')
        
        if mode == 'readline':
            data = self._read_data_by_readline(path=path, 
                                         sep=sep, 
                                         encoder=encoder,
                                         has_index=has_index)
        elif mode == 'pandas':
            data = self._read_data_by_pandas(path=path, 
                                       sep=sep, 
                                       encoder=encoder)
        else:
            raise Exception('mode的值有误')
        
        self.logger(f'finish reading: [{name}]')
        
        return data
    
    
    def _read_data_by_readline(self, path, sep, encoder='utf-8', has_index=False):

        data = []
        with open(path, encoding=encoder) as f:
            line = f.readline()
            while line:
                try:
                    # 预处理
                    line = line.strip()
                    line = line.replace('\ufeff', '')

                    if has_index:
                        idx, q1, q2, label = line.split(sep)
                    else:
                        q1, q2, label = line.split(sep)
                    data.append([q1, q2, label])

                    line = f.readline()
                except Exception as e:
                    print(f'line: {line}')
                    print('-'*42)
                    print(e)
                    sys.exit()
                    
        return data
    
    
    def _read_data_by_pandas(self,path, sep, encoder='utf-8'):
        
        data = pd.read_csv(path, sep=sep, encoding=encoder)
        data = data.to_numpy().tolist()
        
        return data
        

    def create_dataloader(self, data, batch_size, is_shuffle):
        dataloader = torch.utils.data.DataLoader(data,
                                                 batch_size=batch_size,
                                                 shuffle=is_shuffle)

        return dataloader

data_processor = SimilarityDataProcessor()

## bq_corpus

In [47]:
path = './data/bq_corpus/'
bq_corpus_train = data_processor.read_data(mode='pandas',
                                           name='bq_corpus_train', 
                                           path=path+'train.csv', 
                                           sep=',',
                                           encoder='utf-8')

bq_corpus_val = data_processor.read_data(mode='pandas',
                                         name='bq_corpus_val', 
                                         path=path+'dev.csv', 
                                         sep=',',
                                         encoder='utf-8')

bq_corpus_test = data_processor.read_data(mode='pandas',
                                          name='bq_corpus_test', 
                                          path=path+'test.csv', 
                                          sep=',',
                                          encoder='utf-8')

------------------------------------------
start to read: [bq_corpus_train]...
finish reading: [bq_corpus_train]
------------------------------------------
start to read: [bq_corpus_val]...
finish reading: [bq_corpus_val]
------------------------------------------
start to read: [bq_corpus_test]...
finish reading: [bq_corpus_test]


## train,val,test

### exchange 

In [60]:
train = bq_corpus_train
train[:5]

[['用微信都6年，微信没有微粒贷功能', '4。  号码来微粒贷', 0],
 ['微信消费算吗', '还有多少钱没还', 0],
 ['交易密码忘记了找回密码绑定的手机卡也掉了', '怎么最近安全老是要改密码呢好麻烦', 0],
 ['你好 我昨天晚上申请的没有打电话给我 今天之内一定会打吗？', '什么时候可以到账', 0],
 ['“微粒贷开通"', '你好，我的微粒贷怎么没有开通呢', 0]]

In [61]:
train_change = list(zip(*train))
train_change = [train_change[1],train_change[0],train_change[2]]
train_change = list(zip(*train_change))
train_change[:5]

[('4。  号码来微粒贷', '用微信都6年，微信没有微粒贷功能', 0),
 ('还有多少钱没还', '微信消费算吗', 0),
 ('怎么最近安全老是要改密码呢好麻烦', '交易密码忘记了找回密码绑定的手机卡也掉了', 0),
 ('什么时候可以到账', '你好 我昨天晚上申请的没有打电话给我 今天之内一定会打吗？', 0),
 ('你好，我的微粒贷怎么没有开通呢', '“微粒贷开通"', 0)]

In [62]:
train = train+train_change
len(train)

200000

In [63]:
"""
val, test
"""
val = bq_corpus_val
test = bq_corpus_test

len(val), len(test)

(10000, 10000)

## dataloader

In [81]:
batch_size = 32

In [77]:
train_loader = data_processor.create_dataloader(train, 
                                                batch_size=batch_size, 
                                                is_shuffle=True)

val_loader = data_processor.create_dataloader(val, 
                                              batch_size=batch_size, 
                                              is_shuffle=False)

test_loader = data_processor.create_dataloader(test, 
                                               batch_size=batch_size, 
                                               is_shuffle=False)

# pretrain models

In [67]:
from transformers import AutoTokenizer, BertModel
path = '../a_nlp_resource/transformers/bert-base-chinese/'
tokenizer = AutoTokenizer.from_pretrained(path)
bert = BertModel.from_pretrained(path).to(device)
bert.training

False

In [68]:
x = tokenizer(["我的太阳", '我'], 
           padding='max_length', 
           truncation=True, 
           max_length=20,
           return_tensors = 'pt'
           ).to(device)
y = bert(**x)
y[0][0].shape

torch.Size([20, 768])

# model

## bert类

In [69]:
max_len = 20

### sbert

In [72]:
class SetenceBert(nn.Module):
    def __init__(self):
        global bert, tokenizer
        
        super().__init__()
        self.tokenizer = tokenizer
        self.bert = bert
        
        hidden_size = self.bert.config.hidden_size
        self.linear = nn.Linear(hidden_size*3, 2)
        
    def _BertModel_version_1(self, q):
        # 全体求mean，初版
        q = self.tokenizer(q, 
                           padding='max_length', 
                           truncation=True, 
                           max_length=max_len,
                           return_tensors = 'pt'
                           ).to(device)
        q = self.bert(**q)[0]
        
        q = torch.mean(q, dim=1)
        
        return q
    
    def _BertModel_version_2(self, q):
        # hidden output 去掉 padding 的影响
        token = self.tokenizer(q, 
                           padding='max_length', 
                           truncation=True, 
                           max_length=max_len,
                           return_tensors = 'pt'
                           ).to(device)
        q = self.bert(**token)[0] # hidden 
        
        attention_mask = torch.unsqueeze(token['attention_mask'], 2)
        q = (attention_mask*q).sum(1)/attention_mask.sum(1)
        
        return q
    
    
    def forward(self,q1,q2):
        q1 = self._BertModel_version_1(q1)
        q2 = self._BertModel_version_1(q2)
           
        diff = torch.abs(q1-q2)
        h = torch.cat((q1,q2,diff),dim=-1)
        h = self.linear(h)
        
        return h   
    
    def save(self, path='./sbert.model'):
        torch.save(self.state_dict(), path)
        
    def load(self, path='./sbert.model'):
        self.load_state_dict(torch.load(path))

In [73]:
sBert = SetenceBert().to(device)
sBert.save('init_sbert.model')

# baseline

In [142]:
class TrainProcessor():
    def __init__(self, model, train_loader, val_loader):
        self.model = model
        
        self.train = train_loader
        self.val = val_loader
        
        self._set_loss_optimizer()  
        
        self.best_val_loss = 10000  # model评测
        self.best_val_acc = 0 # model评测2
        
        self.current_step = -1
        
        self.training_time = None # 训练时间日期，每次训练时覆盖
        
        
    def _set_loss_optimizer(self):
        self.loss = nn.CrossEntropyLoss()
        print('loss initialled: cross entropy')
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=2e-5)
        print('optimizer initialled: adam')
    
    def _print_time_now(self):
        now = datetime.datetime.today()
        print(now)
    
    def _train_one_epoch(self):
        self.model.train()  # 开启训练模式
        assert self.model.training
        
        for batch in self.train:
            self.current_step += 1
            
            q1, q2, y = (list(i) for i in batch)
            y_hat = self.model(q1,q2)
            y = torch.tensor(y).to(device)
             
            self.optimizer.zero_grad()
            l = self.loss(y_hat, y)
            l.backward()
            self.optimizer.step()
            
            
            self._post_processing_per_step()
        
            
    def evaluate(self,data_loader, name):
        self.model.eval()  # 开启测试模式
        assert not self.model.training
        self._print_time_now()
         
        loss = 0
        acc_num = 0
        with torch.no_grad():
            for batch in data_loader:
                q1, q2, y = (list(i) for i in batch)
                y_hat = self.model.eval()(q1,q2)
                y = torch.tensor(y).to(device)
        
                l = self.loss(y_hat,y)
                loss += l
                acc_num += torch.sum(y_hat.argmax(dim=1)==y)
            
            acc = acc_num/(len(data_loader)*data_loader.batch_size)
            loss = loss/len(data_loader)
            print(f'{name:6s} | loss:{loss:0.4f} | acc:{acc:0.4f}')
            
            return loss, acc
        
    def predict_dataloader(self, data_loader):
        self.model.eval()  # 开启测试模式
        assert not self.model.training
        
        prediction = []
        prediction_labels = []
        with torch.no_grad():
            for batch in data_loader:
                q1, q2, y = (list(i) for i in batch)
                y_hat = self.model.eval()(q1,q2)
                y_hat = torch.softmax(y_hat, dim=1) # 归一化logit
                y_hat = y_hat.max(dim=1)
                pre = y_hat.values
                pre_label = y_hat.indices
                
                prediction += pre.tolist()
                prediction_labels += pre_label.tolist()

            
            return prediction, prediction_labels
            
              
    def trainng(self, epoch=5):
        print('start training: ')
        self.training_time = datetime.datetime.today()
        print(self.training_time)
        
        self.total_epoch = epoch
        torch.set_grad_enabled(True)
        assert torch.is_grad_enabled()
        
        t0 = time()
        for e in range(1, epoch+1):
            print('-'*42)
            self.current_epoch = e
            print(f'Epoch: {self.current_epoch}')
            
            t1 = time()
            self._train_one_epoch()
            t2 = time()
            print(f'cost:{(t2-t1)/3600:0.2f}h')
            
            self.evaluate(self.train, 'train')
            current_val_loss, current_val_acc = self.evaluate(self.val, 'val')
            
            if self.best_val_loss > current_val_loss:
                self.best_val_loss = current_val_loss
                torch.save(self.model.state_dict(), './best_val_loss_model')
                print('saved best val loss model')
                
            if self.best_val_acc < current_val_acc:
                self.best_val_acc = current_val_acc
                torch.save(self.model.state_dict(), './best_val_acc_model')
                print('saved best val acc model')
            
            self._post_processing_per_epoch()
                
        t3 = time()
        print(f'total cost: {(t3-t0)/3600:0.2f}h')
        
    def _post_processing_per_epoch(self):
        pass
    
    def _post_processing_per_step(self):
        pass
            

In [143]:
sBert.load_state_dict(torch.load('init_sbert.model'))
trainer = TrainProcessor(sBert, train_loader, val_loader)

print('-'*42)
print('using the initial model, test.')
trainer.evaluate(test_loader, 'test')

loss initialled: cross entropy
optimizer initialled: adam
------------------------------------------
using the initial model, test.
2021-08-21 20:03:40.373932
test   | loss:0.6576 | acc:0.8082


(tensor(0.6576, device='cuda:0'), tensor(0.8082, device='cuda:0'))

In [80]:
trainer.trainng()


print('-'*42)
print('load the best LOSS model, then test.')
trainer.model.load_state_dict(torch.load('./best_val_loss_model'))
trainer.evaluate(test_loader, 'test')


print('-'*42)
print('load the best ACC model, then test.')
trainer.model.load_state_dict(torch.load('./best_val_acc_model'))
trainer.evaluate(test_loader, 'test')

start training: 
2021-08-21 14:23:14.323652
------------------------------------------
Epoch: 1
cost:0.25h
2021-08-21 14:38:21.738531
train  | loss:0.1139 | acc:0.9606
2021-08-21 14:43:15.096785
val    | loss:0.4966 | acc:0.8141
saved best val loss model
saved best val acc model
------------------------------------------
Epoch: 2
cost:0.26h
2021-08-21 14:58:55.287158
train  | loss:0.0444 | acc:0.9858
2021-08-21 15:03:51.253132
val    | loss:0.6135 | acc:0.8230
saved best val acc model
------------------------------------------
Epoch: 3
cost:0.26h
2021-08-21 15:19:33.101704
train  | loss:0.0325 | acc:0.9899
2021-08-21 15:24:29.212909
val    | loss:0.8297 | acc:0.8083
------------------------------------------
Epoch: 4
cost:0.26h
2021-08-21 15:40:10.484704
train  | loss:0.0186 | acc:0.9941
2021-08-21 15:45:06.698666
val    | loss:0.8501 | acc:0.8077
------------------------------------------
Epoch: 5
cost:0.26h
2021-08-21 16:00:46.861943
train  | loss:0.0148 | acc:0.9952
2021-08-21 16:05

(tensor(0.6576, device='cuda:0'), tensor(0.8082, device='cuda:0'))

# learning rate decay

In [None]:
class TrainingProcesserMultistepLR(TrainProcessor):
    def __init__(self,*args):
        super().__init__(*args)
        
        
    def _set_loss_optimizer(self):
        self.loss = nn.CrossEntropyLoss()
        print('loss initialled: cross entropy loss')
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=2e-5)
        print('optimizer initialled: Adam')
        
        
        def multistep_v1(step):
            
            batch_size = 32
            train_num = 100000
            half_steps_one_epoch = (train_num//batch_size)//2
            
            # 基于经验，半个epoch时第一次decay
            # 后续每一个epoch，降一次decay
            # 0.5 1.5 2.5 3.5 这样
            
            
            n = (step//half_steps_one_epoch + 1)//2
            gamma = 0.1 # decay rate
            
            return gamma**n
        
        def multistep_v2(step):
            if step >= 1000:
                return 0.1
            else:
                return 1
            
        def warmup_multistep(step):
            if step<2000:
                return step/2000
            else:
                return 0.1
            
        def cosine_lr_v1(step):
            begin = 1000
            total_steps=31250
            if step <= begin:
                return 1
            else:
                # angle 从0到pi
                angle = np.pi*(step-begin)/(total_steps-begin)
                return 0.5*(1 + np.cos(angle))
            
        def warmup_cosine_v1(step):
            begin = 1000
            total_steps=31250
            if step <= begin:
                return step/begin
            else:
                # angle 从0到pi
                angle = np.pi*(step-begin)/(total_steps-begin)
                return 0.5*(1 + np.cos(angle))
            
                
        self.scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer, warmup_multistep)
        
        print('scheduler initialled: multistep lr')           
    
    def _post_processing_per_step(self):
        self.scheduler.step() # 更新学习率 
        if self.current_step % 1000 == 0:
            lr = self.scheduler.get_last_lr()
            print(f'steps:{self.current_step} lr = {lr}')

In [None]:
sBert.load_state_dict(torch.load('init_sbert.model'))
trainer_multistepLR = TrainingProcesserMultistepLR(sBert, bq_corpus.train_loader, bq_corpus.dev_loader)


In [None]:
trainer_multistepLR.trainng()


print('-'*42)
print('load the best LOSS model, then test.')
trainer_multistepLR.model.load_state_dict(torch.load('./best_val_loss_model'))
trainer_multistepLR.evaluate(bq_corpus.test_loader, 'test')


print('-'*42)
print('load the best ACC model, then test.')
trainer_multistepLR.model.load_state_dict(torch.load('./best_val_acc_model'))
trainer_multistepLR.evaluate(bq_corpus.test_loader, 'test')

# adversial training

## fgm

In [None]:
class TrainProcessorFgm(TrainProcessor):
    """
    J_hat = alpha*J + alpha*J_ad  
    """
    
    def __init__(self,*args):
        super().__init__(*args)
        self.emb_name = 'bert.embeddings.word_embeddings.weight'
        self.backup = {} # 保存 embedding 参数的值用于恢复
        self.grad_backup = {} # 保存 原始grad
        
        
    def _fgm_attack(self,
                   epsilon=1.):
        """
        embedding 攻击
        
        为了代码的简便，不计算过了emb之后的vertor，直接用emb计算。
        """
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                self.grad_backup[n] = p.grad  # 保存原始 grad   
                
                if n == self.emb_name:
                    self.backup[n] = p.data.clone()
                    norm = torch.norm(p.grad)
                    if norm != 0 and not torch.isnan(norm):
                        r_at = epsilon*p.grad/norm
                        p.data.add_(r_at)
        
    
    def _restore(self):
        """
        更新的时候在原始值基础上更新
        """
        for n, p in self.model.named_parameters():
            if p.requires_grad and n == self.emb_name:
                assert n in self.backup
                p.data = self.backup[n]
        self.backup = {}
        self.grad_backup = {}
      
    def _cal_grad(self, alpha=0.5):
        alpha = 0.7
        assert 0<=alpha<=1
        
        for n, p in self.model.named_parameters():
            if p.requires_grad and p.grad is not None:
                if n == self.emb_name:
                    p.grad = self.grad_backup[n]  # emb层的梯度不变
                else:
                    p.grad = alpha*self.grad_backup[n] + (1-alpha)*p.grad
#                     p.grad = self.grad_backup[n] + p.grad
        
    
    def _train_one_epoch(self):
        self.model.train()  # 开启训练模式
        assert self.model.training
        
        for batch in self.train:
            q1, q2, y = (list(i) for i in batch)
            y_hat = self.model(q1,q2)
            y = torch.tensor(y).to(device)
             
            self.optimizer.zero_grad()
            l = self.loss(y_hat, y)
            l.backward(retain_graph=True) # 首先正常 back 得到 grad
            
            self._fgm_attack() # 得到对抗embedding  
            
            self.optimizer.zero_grad()
            l_adv = self.loss(y_hat, y)  # 得到攻击后的误差
            l_adv.backward() # 求攻击样本的 grad
            
            self._cal_grad() # 计算 grad 的值
            self._restore()  # 恢复embedding 的值, 顺便清空 backup，方便下一轮
            self.optimizer.step() # 梯度下降，更新参数

In [None]:
sBert.load_state_dict(torch.load('init_sbert.model'))
trainer_fgm = TrainProcessorFgm(sBert, train_loader, dev_loader, test_loader)

In [None]:
trainer_fgm.trainng()
print('-'*42)
print('load the best LOSS model, then test.')
trainer_fgm.model.load_state_dict(torch.load('./best_val_loss_model'))
trainer_fgm.evaluate(test_loader, 'test')


print('-'*42)
print('load the best ACC model, then test.')
trainer_fgm.model.load_state_dict(torch.load('./best_val_acc_model'))
trainer_fgm.evaluate(test_loader, 'test')


## fgm+learning rate decay

In [None]:
class TrainProcessorFgmCosineLr(TrainProcessor):
    """
    J_hat = alpha*J + alpha*J_ad  
    """
    
    def __init__(self,*args):
        super().__init__(*args)
        self.emb_name = 'bert.embeddings.word_embeddings.weight'
        self.backup = {} # 保存 embedding 参数的值用于恢复
        self.grad_backup = {} # 保存 原始grad
        
        
    def _fgm_attack(self,
                   epsilon=1.):
        """
        embedding 攻击
        
        为了代码的简便，不计算过了emb之后的vertor，直接用emb计算。
        """
        for n, p in self.model.named_parameters():
            if p.requires_grad:
                self.grad_backup[n] = p.grad  # 保存原始 grad   
                
                if n == self.emb_name:
                    self.backup[n] = p.data.clone()
                    norm = torch.norm(p.grad)
                    if norm != 0 and not torch.isnan(norm):
                        r_at = epsilon*p.grad/norm
                        p.data.add_(r_at)
        
    
    def _restore(self):
        """
        更新的时候在原始值基础上更新
        """
        for n, p in self.model.named_parameters():
            if p.requires_grad and n == self.emb_name:
                assert n in self.backup
                p.data = self.backup[n]
        self.backup = {}
        self.grad_backup = {}
      
    def _cal_grad(self, alpha=0.5):
        alpha = 0.7
        assert 0<=alpha<=1
        
        for n, p in self.model.named_parameters():
            if p.requires_grad and p.grad is not None:
                if n == self.emb_name:
                    p.grad = self.grad_backup[n]  # emb层的梯度不变
                else:
                    p.grad = alpha*self.grad_backup[n] + (1-alpha)*p.grad
#                     p.grad = self.grad_backup[n] + p.grad
        
    
    def _train_one_epoch(self):
        self.model.train()  # 开启训练模式
        assert self.model.training
        
        for batch in self.train:
            self.current_step += 1
            
            q1, q2, y = (list(i) for i in batch)
            y_hat = self.model(q1,q2)
            y = torch.tensor(y).to(device)
             
            self.optimizer.zero_grad()
            l = self.loss(y_hat, y)
            l.backward(retain_graph=True) # 首先正常 back 得到 grad
            
            self._fgm_attack() # 得到对抗embedding  
            
            self.optimizer.zero_grad()
            l_adv = self.loss(y_hat, y)  # 得到攻击后的误差
            l_adv.backward() # 求攻击样本的 grad
            
            self._cal_grad() # 计算 grad 的值
            self._restore()  # 恢复embedding 的值, 顺便清空 backup，方便下一轮
            self.optimizer.step() # 梯度下降，更新参数
            
            self._post_processing_per_step()
            
            
    def _set_loss_optimizer(self):
        self.loss = nn.CrossEntropyLoss()
        print('loss initialled: cross entropy loss')
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=2e-5)
        print('optimizer initialled: Adam')
        
            
        def cosine_lr_v1(step):
            begin = 1000
            total_steps=31250
            if step <= begin:
                return 1
            else:
                # angle 从0到pi
                angle = np.pi*(step-begin)/(total_steps-begin)
                return 0.5*(1 + np.cos(angle))
                   
        self.scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer, cosine_lr_v1)
        
        print('scheduler initialled: cosine lr')           
    
    def _post_processing_per_step(self):
        self.scheduler.step() # 更新学习率 
        if self.current_step % 1000 == 0:
            lr = self.scheduler.get_last_lr()
            print(f'steps:{self.current_step} lr = {lr}')

## pgd

In [None]:
class TrainProcessorPGD(TrainProcessor):
    def __init__(self,*args):
        super().__init__(*args)
        self.data_backup = {} # param backup
        self.grad_backup={} # 
        self.emb_name ='bert.embeddings.word_embeddings.weight' 
        
    def attack(self, 
               epsilon=1., 
               alpha=0.3,  
               t=0):

        for n, p in self.model.named_parameters():
            # 第一次的时候 backup
            if (t==0) and n in self.emb_name: 
                    self.data_backup[n]=p.data.clone()
            if (t==0) and p.requires_grad and p.grad is not None:
                self.grad_backup[n]=p.grad.clone()
            
            # 攻击
            if p.requires_grad and self.emb_name in n:
                norm=torch.norm(p.grad)
                if norm !=0 and not torch.isnan(norm):
                    r_at = alpha * p.grad/norm
                    p.data.add_(r_at)
                    p.data=self.project(n,p.data,epsilon) 
                
    def project(self,param_name, param_data, epsilon):
        r = param_data - self.data_backup[param_name]
        if torch.norm(r)>epsilon:
            r = epsilon*r/torch.norm(r)
        return self.data_backup[param_name] + r            
    
    
    def restore(self):
        for n, p in self.model.named_parameters():
            if p.requires_grad and self.emb_name in n: 
                assert n in self.data_backup
                p.data = self.data_backup[n]  # 恢复emb.data
                p.grad = self.grad_backup[n]  # 恢复emb.grad 
        self.data_backup = {}

                
    def restore_grad(self):
        for n, p in self.model.named_parameters():
            if p.requires_grad and p.grad is not None:
                p.grad=self.grad_backup[n]

    
    def _train_one_epoch(self):
        self.model.train()  # 开启训练模式
        assert self.model.training
        
        for batch in self.train:
            q1, q2, y = (list(i) for i in batch)
            y_hat = self.model(q1,q2)
            y = torch.tensor(y).to(device)
             
            self.optimizer.zero_grad()
            l = self.loss(y_hat, y)
            l.backward(retain_graph=True) # 首先正常back得到grad
           
            
            #PGD对抗训练
            K=3
            for t in range(K):
                self.attack(t=t) # 在embedding上添加对抗扰动, first attack时备份param.data
                if t != K-1:
                    self.optimizer.zero_grad()
                else:
                    self.restore_grad()
                    loss_adv = self.loss(y_hat, y)
                    loss_adv.backward() # 反向传播，并在正常的grad基础上，累加对抗训练的梯度
            self.restore()# 恢复embedding参数
            #梯度下降，更新参数
            self.optimizer.step()

# Analysis

In [147]:
df_test = pd.DataFrame(test, columns=['q1','q2','label'])
df_test.head()

Unnamed: 0,q1,q2,label
0,为什么我无法看到额度,为什么开通了却没有额度,0
1,为啥换不了,为两次还都提示失败呢,0
2,借了钱，但还没有通过，可以取消吗？,可否取消,1
3,为什么我申请额度输入密码就一直是那个页面,为什么要输入支付密码来验证,0
4,今天借 明天还款可以？,今天借明天还要手续费吗,0


In [148]:
for i in test_loader:
    print(i)
    break

[('为什么我无法看到额度', '为啥换不了', '借了钱，但还没有通过，可以取消吗？', '为什么我申请额度输入密码就一直是那个页面', '今天借 明天还款可以？', '你好！今下午咱没有扣我款？', '所借的钱是否可以提现？', '不是邀请的客人就不能借款吗', '人脸失别不了，开不了户', '一天利息好多钱', '为啥还没开放啊', '开通.微粒贷', '咋么才能收到邀请', '扣款时间是几点', '为什么借款总是不通过', '为什么我的无法查看额度', '请问月息多少', '借钱可好取现', '可以开 结清证明吗？', '你好，我银行卡被法院封了，能否换我儿子的卡还款', '一般是什么时候自动扣款？', '我想问什么时候会再打电话过来呢？', '请问有这个手机端的app吗', '开不了户', '不满足微众银行条件', '那我刚刚申请的贷款。。取消掉怎么操作', '什么时候发出邀请呢', '为什么提前还清所有借款不能再借呢？', '我换手机号了', '为什么刚刚借钱要输入很多次验证码和支付密码是不是我手机问题还款就可以', '不借了 不要打来了', '需要提供什么借款材料'), ('为什么开通了却没有额度', '为两次还都提示失败呢', '可否取消', '为什么要输入支付密码来验证', '今天借明天还要手续费吗', '你好  今天怎么没有扣款呢', '该笔借款可以提现吗！', '一般什么样得人会受邀请', '我输入的资料都是正确的，为什么总说不符开户失败？', '1万利息一天是5元是吗', '不是微粒贷客户，怎么可以受邀', '帮我开通', '为什么我6号扣还款的到现在还没', '无利息的还款时间是多久？', '为什么审请不通过', '为什么我点进去没有额度呢', '2万块月息是多少', '可以提现金？', '还清钱后能继续借吗？', '换卡什么时候能换好', '一般几点扣款', '那什么时候打来电话？', '手机信号不好', '人脸失别不了，开不了户', '“您未满足微众银行审批要求，无法查看额度”，这是为什么？什么原因呢', '刚刚申请了贷款，可以取消吗？', '借款没打电话', '提前还款利息怎么算', '如果我换手机怎么办？', '借款连续输验证码和密码', '不想借钱了，但是不小心按到借钱按钮了', '是不是苹果手机 都没开通啊'), tensor(

In [149]:
pre, pre_label = trainer.predict_dataloader(test_loader)

In [150]:
df_test['pre_label'] = pre_label
df_test['pre'] = pre

In [151]:
df_test.head()

Unnamed: 0,q1,q2,label,pre_label,pre
0,为什么我无法看到额度,为什么开通了却没有额度,0,0,0.999987
1,为啥换不了,为两次还都提示失败呢,0,1,0.557989
2,借了钱，但还没有通过，可以取消吗？,可否取消,1,0,0.934388
3,为什么我申请额度输入密码就一直是那个页面,为什么要输入支付密码来验证,0,0,0.999917
4,今天借 明天还款可以？,今天借明天还要手续费吗,0,0,0.998913
