In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tqdm import tqdm_notebook as tqdm
from transformers.optimization import AdamW, get_linear_schedule_with_warmup
import os
from collections import defaultdict

from dataset import * 
from model import * 
import config as config

# Hyper Param (Train)

In [3]:
TEST = True
LOG = True

device = 'cuda' if torch.cuda.is_available() else 'cpu'
if TEST:
    print(device)

# (instance/batchsize)*epcho = # batch
BATCH_SIZE = 8
NUM_EPOCHS = 6
LR = 0.00001 # 1e-5

NUM_WARMUP = 100

cuda


# Load Data using dataset.py

In [4]:
train_set = FGC_Dataset(config.FGC_TRAIN, mode="train")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=109540.0, style=ProgressStyle(descripti…




In [5]:
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, collate_fn=create_mini_batch)

In [6]:
# calc pos weight for BCE
total = 0
true_cnt = 0
for instance in train_set:
    if(instance[-1] == True):
        true_cnt += 1
    total += 1
print(true_cnt)
print(total)
# to increase the value of recall in the model's criterion
pos_weight = print(torch.tensor([(total-true_cnt)/true_cnt, 1]))
print(pos_weight)
# no need to applied pos_weight = torch.tensor([total/true_cnt, total/(1-true_cnt)])?

1875
31422
tensor([15.7584,  1.0000])
None


# Build and Train Model

In [7]:
bert_encoder = BertModel.from_pretrained(config.BERT_EMBEDDING)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=624.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=411577189.0, style=ProgressStyle(descri…




In [8]:
def calc_f1(pref, target):
    TP += ((pred == 1) & (target == 1)).cpu().sum()
    TN += ((pred == 0) & (target == 0)).cpu().sum()
    FN += ((pred == 0) & (target == 1)).cpu().sum()
    FP += ((pred == 1) & (target == 0)).cpu().sum()

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    F1 = 2 * recall * precision / (recall + precision)
    acc = (TP + TN) / (TP + TN + FP + FN)
    return precision, recall, F1, acc

In [9]:
model = BertSERModel(bert_encoder=bert_encoder, pos_weight=pos_weight)
model.to(device) # means model = model.to(device)
if LOG:
    print("model in cuda?", next(model.parameters()).is_cuda)
#    from time import sleep
#    sleep(0.25)

model in cuda? True


In [10]:
dev_set = FGC_Dataset(config.FGC_DEV)
dev_loader = DataLoader(dev_set, batch_size=BATCH_SIZE, collate_fn=create_mini_batch)
id2qid = dev_set.get_id2qid()

In [11]:
def eval_model(model, dev_set, id2qid):
    dev_loader = DataLoader(dev_set, batch_size=BATCH_SIZE, collate_fn=create_mini_batch)
    pred = []
    label = []
    for batch_i, batch in enumerate(dev_loader):
        #print(len(batch))
        label.extend(batch[3].cpu()) # batch[3] is label
        batch = [data.to(device) for data in batch]
        pred_batch = model._predict(batch)
        pred.extend(pred_batch)
    #print(label)
    #print(pred)
    stat = defaultdict(list)
    for idx in range(len(dev_set)):
        qid = id2qid[idx]
        state = label[idx].item()*2 + pred[idx]
        stat[qid].append(state)
        
    stat2 = list()
    sum_stat =  {
            "precision" : 0.0,
            "recall" : 0.0,
            "F1" : 0.0,
            "accuracy" : 0.0
        }
    
    for k, v in stat.items():
        TP = sum([1 if ins == 3 else 0 for ins in v])
        TN = sum([1 if ins == 0 else 0 for ins in v])
        FN = sum([1 if ins == 2 else 0 for ins in v])
        FP = sum([1 if ins == 1 else 0 for ins in v])
        
        # print(TP, TN, FN, FP)
        
        precision = TP / (TP + FP) if TP+FP > 0 else 0.0
        recall = TP / (TP + FN) if TP+FN > 0 else 0.0
        f1 = 2 * recall * precision / (recall + precision) if  (recall + precision) > 0 else 0.0
        acc = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0.0

        cur = {
            "precision" : precision,
            "recall" : recall,
            "F1" : f1,
            "accuracy" : acc
        }
        stat2.append(cur)
        for k in cur:
            sum_stat[k] += cur[k]
    
    # each question weight the sum
    # remain to do is get max if SE is all negetive
    for k in sum_stat:
        sum_stat[k] /= len(stat2)
    
    return stat2, sum_stat

In [12]:
stat, avg_stat = eval_model(model, dev_set, id2qid)

In [13]:
# saving directory
model_file_path = "baseline"

save_model_path = config.PARAM_PATH / model_file_path

if not os.path.exists(save_model_path):
    if LOG:
        print("directory {} doesn't exist, creating...".format(save_model_path))
    os.mkdir(save_model_path)

In [14]:
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [15]:
if train_set == None:
    train_set = FGC_Dataset(config.FGC_TRAIN, mode="train")
# train_set[0][0]
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, collate_fn=create_mini_batch)

In [16]:
# optimizer
optimizer = AdamW(optimizer_grouped_parameters, lr=LR)
num_train_optimization_steps = len(train_loader) * NUM_EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=NUM_WARMUP,
                                            num_training_steps=num_train_optimization_steps)

In [17]:
# Check that input does not exist MAX_BERT_LEN
for data in train_set:
    # print(data[0])
    assert(data[1].shape[0] <= config.BERT_MAX_INPUT_LEN)

In [87]:
# model.__dict__

In [18]:
model = BertSERModel(bert_encoder=bert_encoder, pos_weight=pos_weight)
model.to(device) # means model = model.to(device)
if LOG:
    print("model in cuda?", next(model.parameters()).is_cuda)
#    from time import sleep
#    sleep(0.25)

print('start training ... ')

stat = {
    "precision" : [],
    "recall" : [],
    "F1" :[],
    "accuracy" : []
}

for epoch_i in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    for batch_i, batch in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()

        batch = [data.to(device) for data in batch] # batch[0] = ids, batch[1] = ...
        loss = model(batch)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), config.MAX_GRAD_NORM) 
        optimizer.step()
        scheduler.step()
        running_loss += loss.item()
    learning_rate_scalar = scheduler.get_lr()[0]
    print('lr = %f' % learning_rate_scalar)
    print('epoch %d train_loss: %.3f' % (epoch_i, running_loss / len(train_loader)))
    
    eval_epoch_frequency = 1
    if epoch_i % eval_epoch_frequency == 0:
        ecopch_stat_by_questions, epoch_stat = eval_model(model, dev_set, id2qid)
        for k in stat:
            stat[k].append(epoch_stat[k])
        print('epoch %d eval_recall: %.3f eval_f1: %.3f' % 
              (epoch_i, epoch_stat['recall'], epoch_stat['F1']))
        model_to_save = model
        torch.save(model_to_save.state_dict(),
                    str(save_model_path / "model_epoch{0}_precision:{1:.3f}_recall:{2:.3f}_f1:{3:.3f}_acc:{4:.3f}.m".
                           format(epoch_i, epoch_stat['precision'], epoch_stat['recall'], epoch_stat['F1'],
                                  epoch_stat['accuracy'])))


model in cuda? True
start training ... 


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=3928.0), HTML(value='')))




RuntimeError: CUDA out of memory. Tried to allocate 48.00 MiB (GPU 0; 10.76 GiB total capacity; 7.54 GiB already allocated; 25.69 MiB free; 7.94 GiB reserved in total by PyTorch) (malloc at /opt/conda/conda-bld/pytorch_1591914880026/work/c10/cuda/CUDACachingAllocator.cpp:289)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x4e (0x7f1e00291b5e in /opt/conda/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1f39d (0x7f1e004dd39d in /opt/conda/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #2: <unknown function> + 0x2058e (0x7f1e004de58e in /opt/conda/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #3: THCStorage_resize + 0x96 (0x7f1db91a6046 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so)
frame #4: at::native::(anonymous namespace)::resize_cuda_(at::Tensor&, c10::ArrayRef<long>, c10::optional<c10::MemoryFormat>) + 0x799 (0x7f1dbacd9379 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so)
frame #5: <unknown function> + 0x2a02d53 (0x7f1dbacd9d53 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so)
frame #6: <unknown function> + 0xd7bc52 (0x7f1db9052c52 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so)
frame #7: <unknown function> + 0xead856 (0x7f1db9184856 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so)
frame #8: <unknown function> + 0xdd0c88 (0x7f1db90a7c88 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cuda.so)
frame #9: <unknown function> + 0xe20e70 (0x7f1de02ade70 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0x29f9b6e (0x7f1de1e86b6e in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0xe20e70 (0x7f1de02ade70 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #12: at::Tensor c10::Dispatcher::callUnboxed<at::Tensor, at::Tensor const&, at::Tensor const&>(c10::OperatorHandle const&, at::Tensor const&, at::Tensor const&) const + 0xb3 (0x7f1de4de85b3 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #13: <unknown function> + 0x260aa17 (0x7f1de1a97a17 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #14: torch::autograd::generated::MmBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x1c4 (0x7f1de1ad30c4 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #15: <unknown function> + 0x2ae7df5 (0x7f1de1f74df5 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #16: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x16f3 (0x7f1de1f720f3 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x3d2 (0x7f1de1f72ed2 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #18: torch::autograd::Engine::thread_init(int) + 0x39 (0x7f1de1f6b549 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7f1de50b1638 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #20: <unknown function> + 0xc819d (0x7f1e1d81019d in /opt/conda/bin/../lib/libstdc++.so.6)
frame #21: <unknown function> + 0x76db (0x7f1e20cf46db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #22: clone + 0x3f (0x7f1e20a1da3f in /lib/x86_64-linux-gnu/libc.so.6)


In [None]:
# save model here

In [None]:
print("hi")

In [None]:
eval_epoch_frequency = 1
if epoch_i % eval_epoch_frequency == 0:
    ecopch_stat_by_questions, epoch_stat = eval_model(model, dev_set, id2qid)
    for k in stat:
        stat[k].append(epoch_stat[k])
    print('epoch %d eval_recall: %.3f eval_f1: %.3f' % 
          (epoch_i, epoch_stat['recall'], epoch_stat['F1']))
    model_to_save = model
    torch.save(model_to_save.state_dict(),
                str(save_model_path / "model_epoch{0}_precision:{1:.3f}_recall:{2:.3f}_f1:{3:.3f}_acc:{4:.3f}.m".
                       format(epoch_i, epoch_stat['precision'], epoch_stat['recall'], epoch_stat['F1'],
                              epoch_stat['accuracy'])))