In [1]:
class Args():
  def __init__(self):
    self.summary_method = "none"
    self.toy = True
    self.toy_size = 80000
    self.batch_size = 16
    self.num_neg = 4
    self.max_len = 200
    self.lr = 1e-5
    self.steps = 50000
    self.clip = 1.0
    self.dist_func = "cosin"
    self.local_rank = 0
    self.gpu_ids = 0

args = Args()

In [2]:
from finetune_data import make_dataset, read_all_sequences
import torch
train_seqs, test_seqs = read_all_sequences(args)
test_dataset = make_dataset(args, test_seqs)
train_dataset = make_dataset(args, train_seqs)
torch.save({"train":train_dataset, "test":test_dataset},'finetune_dataset.pkl')

 29%|██▊       | 65/227 [00:00<00:00, 591.14it/s]Read all sequences begin=====
100%|██████████| 227/227 [00:00<00:00, 743.03it/s]


In [3]:
from torch.utils.data import TensorDataset, DataLoader
train_loader = DataLoader(train_dataset,batch_size=args.batch_size,shuffle=True,drop_last=True,num_workers=2)
test_loader = DataLoader(test_dataset,batch_size=args.batch_size,shuffle=True,drop_last=True,num_workers=2)

In [4]:
from transformers import RobertaConfig, RobertaForSequenceClassification
from copy import deepcopy
config = RobertaConfig.from_pretrained("roberta-base")
config.num_labels = 2
model = RobertaForSequenceClassification(config)
## Load Pretrained Weight
pretained_weight = torch.load("checkpoint.pkl", map_location='cpu')
for key in pretained_weight:
    pretained_weight[key] = pretained_weight[key].cpu()
model_weight = model.state_dict()
for key in pretained_weight:
    if "pooler" in key:
        continue
    new_key = key.replace("module.base_model","roberta")
    model_weight[new_key] = deepcopy(pretained_weight[key])

model.load_state_dict(model_weight)

<All keys matched successfully>

In [10]:
from opt import OpenAIAdam
optimizer = OpenAIAdam(model.parameters(),
                                  lr=args.lr,
                                  schedule='warmup_linear',
                                  warmup=0.002,
                                  t_total=args.steps,
                                  b1=0.9,
                                  b2=0.999,
                                  e=1e-08,
                                  l2=0.01,
                                  vector_l2=True,
                                  max_grad_norm=args.clip)
critirion = torch.nn.CrossEntropyLoss()
device = torch.device(args.gpu_ids)
model = model.to(device)

In [33]:
import tqdm
step = 0
loss_list = []
best_acc = 0
logs = []

while(step < args.steps):
    for batch in tqdm.tqdm(train_loader):
        optimizer.zero_grad()
        seq_ids, labels = [item.to(device) for item in batch]
        loss = model(seq_ids,labels = labels)[0]
        print(loss.item())

        # loss =critirion(logits,labels)
        
        loss.backward()
        loss_list.append(loss.item())

        optimizer.step()
        step += 1
        if (step % 10 == 0):
            print("step: ",step)
            print("loss: ",sum(loss_list)/step)
            log = {"step":step, "loss":sum(loss_list)/step}
            log = evaluate_model(model,test_loader,log)
            logs.append(log)
            torch.save(logs,"finetune_log.pkl")
            
            if (log["acc"] < best_acc):
                best_acc = log["acc"]
                torch.save(model.state_dict(),"finetune_checkpoint.pkl")
            model.train()




2 [00:37<00:21,  9.78it/s][A[A[A


 63%|██████▎   | 363/572 [00:37<00:21,  9.78it/s][A[A[A


 64%|██████▎   | 364/572 [00:37<00:21,  9.78it/s][A[A[A


 64%|██████▍   | 365/572 [00:37<00:21,  9.78it/s][A[A[A


 64%|██████▍   | 366/572 [00:37<00:21,  9.78it/s][A[A[A


 64%|██████▍   | 367/572 [00:37<00:20,  9.79it/s][A[A[A


 64%|██████▍   | 368/572 [00:37<00:20,  9.79it/s][A[A[A


 65%|██████▍   | 369/572 [00:37<00:20,  9.81it/s][A[A[A


 65%|██████▍   | 370/572 [00:37<00:20,  9.81it/s][A[A[A


 65%|██████▍   | 371/572 [00:38<00:20,  9.81it/s][A[A[A


 65%|██████▌   | 372/572 [00:38<00:20,  9.81it/s][A[A[A


 65%|██████▌   | 373/572 [00:38<00:20,  9.81it/s][A[A[A


 65%|██████▌   | 374/572 [00:38<00:20,  9.81it/s][A[A[A


 66%|██████▌   | 375/572 [00:38<00:20,  9.80it/s][A[A[A


 66%|██████▌   | 376/572 [00:38<00:19,  9.81it/s][A[A[A


 66%|██████▌   | 377/572 [00:38<00:19,  9.83it/s][A[A[A


 66%|██████▌   | 378/572 [00:38<00:19,  9.84it/

KeyboardInterrupt: 

In [32]:
def evaluate_model(model, test_loader, log):
    print("Evaluation Start======")
    model.eval()
    TP, TN, FN, FP = 0, 0, 0, 0
    
    with torch.no_grad():
        for batch in tqdm.tqdm(test_loader):
            seq_ids, labels = [item.to(device) for item in batch]
            logits = model(seq_ids,labels=labels)[1]

            prediction = torch.argmax(logits, dim = 1)
            TP += ((prediction == 1) & (labels == 1)).sum().item()
            # TN    predict 和 label 同时为0
            TN += ((prediction == 0) & (labels == 0)).sum().item()
            # FN    predict 0 label 1
            FN += ((prediction == 0) & (labels == 1)).sum().item()
            # FP    predict 1 label 0
            FP += ((prediction == 1) & (labels == 0)).sum().item()

    p = TP / (TP + FP)
    r = TP / (TP + FN)
    F1 = 2 * r * p / (r + p)
    acc = (TP + TN) / (TP + TN + FP + FN)
    print("recall: ",r)
    print("precision: ",p)
    print("F1: ",F1)
    print("Acc: ",acc)

    log["recall"] = r
    log["precision"] = p
    log["F1"] = F1
    log["acc"] = acc

    return log

<All keys matched successfully>

In [None]:
!python main.py --toy --toy_size 40000 --batch_size 16 --num_neg 4 --lr 1e-5 --clip 1.0 --steps 50000

In [None]:
from data import read_all_sequences, create_pos_samples, create_negative_samples, create_neutual_samples
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaModel
from torch.utils.data import TensorDataset, DataLoader
import argparse
import random
import torch.backends.cudnn as cudnn
from models import EmbNetwork
from builder import MoCo
import data
import os
import tqdm
from opt import OpenAIAdam
from train import train

In [None]:
# dataset = data.make_dataset(args)
dataset = torch.load('processed_data.pkl')

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
# os.environ['MASTER_ADDR'] = 'localhost'
# os.environ['MASTER_PORT'] = '29500'
# rank = 0
# torch.cuda.set_device(rank)
# torch.distributed.init_process_group(backend='nccl', init_method='env://',world_size = 2, rank = rank)

In [None]:
torch.cuda.device_count()

In [None]:
# train_dataset = data.make_dataset(args)
# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_loader = DataLoader(dataset, num_workers=2,batch_size=args.batch_size, shuffle=True, drop_last=True)

In [None]:
base_model = RobertaModel.from_pretrained("roberta-base")
model = EmbNetwork(base_model, pooling_strategy='last').cuda()
# model = torch.nn.parallel.DistributedDataParallel(model,device_ids=[0,1],output_device=args.local_rank)
model= torch.nn.DataParallel(model)

In [None]:
optimizer = OpenAIAdam(model.parameters(),
                                  lr=args.lr,
                                  schedule='warmup_linear',
                                  warmup=0.002,
                                  t_total=args.steps,
                                  b1=0.9,
                                  b2=0.999,
                                  e=1e-08,
                                  l2=0.01,
                                  vector_l2=True,
                                  max_grad_norm=args.clip)

In [None]:
def batch_forward(model, batch):
    anchor_ids, pos_ids, neg_ids, neural_ids = [item.cuda() for item in batch]
    bsz = anchor_ids.shape[0]
    max_len = anchor_ids.shape[1]
    anchor_feature = model(anchor_ids).unsqueeze(1) #(bsz,1,768)
    hid_dim = anchor_feature.shape[2]

    pos_feature = model(pos_ids).unsqueeze(1)   #(bsz,1,768)

    neg_ids  = neg_ids.view(-1,max_len)
    neg_feature = model(neg_ids).view(bsz,-1,hid_dim)   #(bsz,num_neg, 768)

    neural_ids = neural_ids.view(-1, max_len)
    neural_feature = model(neural_ids).view(bsz,-1,hid_dim) #(bsz,num_neg, 768)

    return anchor_feature, pos_feature, neg_feature, neural_feature


def train(args, model,train_loader,optimizer):
    step = 0
    bar = tqdm.tqdm(total=args.steps)
    bar.update(0)
    loss_list = []
    neg_loss_list = []
    neural_loss_list = []
    best_loss = float("inf")
    
    while(step < args.steps):
        for batch in train_loader:
            optimizer.zero_grad()
            anchor_feature, pos_feature, neg_feature, neural_feature = batch_forward(model,batch)

            d_pos = dist_function(anchor_feature, pos_feature,args.dist_func)
            d_neg = dist_function(anchor_feature, neg_feature,args.dist_func)
            d_neu = dist_function(anchor_feature, neural_feature,args.dist_func)

            loss_neg = triplet_loss(d_pos,d_neg,margin=2.0)
            loss_neu = triplet_loss(d_pos,d_neu,margin=1.0)

            loss = loss_neg + loss_neu
            
            loss.backward()
            loss_list.append(loss.item())
            neg_loss_list.append(loss_neg.item())
            neural_loss_list.append(loss_neu.item())

#             print(loss.item())

            ##loss.backward()

            optimizer.step()
            bar.update(1)
            step += 1
            if (step % 100 == 0):
                print("step: ",step)
                print("loss: ",sum(loss_list)/step)
                print("neg loss: ",sum(neg_loss_list)/step)
                print("neu loss: ",sum(neural_loss_list)/step)
                torch.save({"step":step, "loss":sum(loss_list)/step, "neg loss":sum(neg_loss_list)/step, "neu loss":sum(neural_loss_list)/step}, "log.pkl")
                if (sum(loss_list)/step < best_loss):
                    best_loss = sum(loss_list)/step
                    torch.save(model.state_dict(),"checkpoint.pkl")

def triplet_loss(d_pos,d_neg,margin=1.0,method='cosin',reduction='mean'):
    if method == 'cosin':
        loss = torch.clamp(d_neg - d_pos + margin, min=0.0)
    if method == 'ecludien':
        loss = None ##TO BE ADD

    if reduction=='mean':
        loss = torch.mean(loss)

    if reduction=='sum':
        loss = torch.sum(loss)

    return loss

def dist_function(x1,x2,method='cosin'):
    if method == 'cosin':
        dist = F.cosine_similarity(x1,x2,dim=2)
    
    if method == 'ecludien':
        dist = None         ##TO BE ADD

    return dist

In [None]:
train(args,model,train_loader,optimizer)

In [None]:
import torch
log = torch.load("log.pkl")

In [None]:
!pip install matplotlib -i https://pypi.tuna.tsinghua.edu.cn/simple

In [None]:
import matplotlib.pyplot as plt
steps = list(range(100,39101,100))
plt.xlabel("step")
plt.ylabel("loss")
plt.plot(steps, neg_loss, label = "neg loss")
plt.plot(steps, neu_loss, label = "neu loss")
plt.legend()

In [None]:
with open("untitled.txt","r") as fin:
    lines = fin.readlines()

In [None]:
neg_loss = []
neu_loss = []
steps = []
for line in lines:
    if "step: " in line:
        _,num = line.split("step: ")
        num = num.strip()
        steps.append(float(num))
        
    if "neg loss" in line:
        _,num = line.split("neg loss: ")
        num = num.strip()
        neg_loss.append(float(num))
        
    if "neu loss" in line:
        _,num = line.split("neu loss: ")
        num = num.strip()
        neu_loss.append(float(num))

In [None]:
neg_loss