In [1]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: ignored

In [None]:
# !cp "drive/MyDrive/Question Generation/vae/models.py" .
%cd '/content/drive/MyDrive/Second/MCQ/vae/'
!pwd

In [None]:
!pip install transformers
!pip install json-lines
## scatter 1.12+cu113
# !pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.1+cu113.html
# scatter 1.13+cu116
!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.13.0+cu116.html
!pip install import-ipynb
import import_ipynb

In [None]:
import argparse
import os
import random

import numpy as np
import torch
from tqdm import tqdm, trange
from transformers import BertTokenizer

from eval import eval_vae
from trainer import VAETrainer
from utils import batch_to_device, get_harv_data_loader, get_squad_data_loader

In [None]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
def setArguments():
    args = dict()
    args["seed"]=1004
    args["debug"]=True
    args["train_dir"]='../data/sciq/squad_format/test.json'
    args["dev_dir"]='../data/sciq/squad_format/test.json'

    args["max_c_len"]=384
    args["max_q_len"]=64
    args['max_d_len']=5

    args["model_dir"]="../save/vae-checkpoint"
    args["epochs"]=20
    args["lr"]=1e-3
    args["batch_size"]=1
    args["weight_decay"]=0.0
    args["clip"]=5.0

    args["bert_model"]='bert-base-uncased'
    args["enc_nhidden"]=300
    args["enc_nlayers"]=1
    args["enc_dropout"]=0.2
    args["dec_a_nhidden"]=300
    args["dec_a_nlayers"]=1
    args["dec_a_dropout"]=0.2
    args["dec_q_nhidden"]=900
    args["dec_q_nlayers"]=2
    args["dec_q_dropout"]=0.3
    args['dec_d_nhidden']=900
    args['dec_d_nlayers']=2
    args['dec_d_dropout']=0.3
    args["nzqdim"]=50
    args['nzddim']=50
    args["nza"]=20
    args["nzadim"]=10
    args["lambda_kl"]=0.1
    args["lambda_info"]=1.0
    return dotdict(args)
args = setArguments()

if args.debug:
    print("Debug Mode On.")
    args.model_dir = "./dummy"
# set model dir
model_dir = args.model_dir
os.makedirs(model_dir, exist_ok=True)
args.model_dir = os.path.abspath(model_dir)

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)


In [None]:
def main(args):
    tokenizer = BertTokenizer.from_pretrained(args.bert_model)
    train_loader, _, _ = get_squad_data_loader(tokenizer, args.train_dir,
                                         shuffle=True, args=args)
    eval_data = get_squad_data_loader(tokenizer, args.dev_dir,
                                      shuffle=False, args=args)

    args.device = torch.cuda.current_device()

    trainer = VAETrainer(args)

    loss_log1 = tqdm(total=0, bar_format='{desc}')
    loss_log2 = tqdm(total=0, bar_format='{desc}')
    eval_log = tqdm(total=0, bar_format='{desc}')
    best_eval_log = tqdm(total=0, bar_format='{desc}')

    print("MODEL DIR: " + args.model_dir)

    best_bleu, best_em, best_f1 = 0.0, 0.0, 0.0
    for epoch in trange(int(args.epochs), desc="Epoch", position=0):
        for batch in tqdm(train_loader, desc="Train iter", leave=False, position=1):
            c_ids, q_ids, a_ids, start_positions, end_positions \
            = batch_to_device(batch, args.device)
            trainer.train(c_ids, q_ids, a_ids, start_positions, end_positions)
            
            str1 = 'Q REC : {:06.4f} A REC : {:06.4f}'
            str2 = 'ZQ KL : {:06.4f} ZA KL : {:06.4f} INFO : {:06.4f}'
            str1 = str1.format(float(trainer.loss_q_rec), float(trainer.loss_a_rec))
            str2 = str2.format(float(trainer.loss_zq_kl), float(trainer.loss_za_kl), float(trainer.loss_info))
            loss_log1.set_description_str(str1)
            loss_log2.set_description_str(str2)

        if epoch >= 0:
            metric_dict, bleu, _ = eval_vae(epoch, args, trainer, eval_data)
            f1 = metric_dict["f1"]
            em = metric_dict["exact_match"]
            bleu = bleu * 100
            _str = '{}-th Epochs Q-BLEU : {:02.2f} EM : {:02.2f} F1 : {:02.2f}'
            _str = _str.format(epoch, bleu, em, f1)
            eval_log.set_description_str(_str)
            if em > best_em:
                best_em = em
            if f1 > best_f1:
                best_f1 = f1
                trainer.save(os.path.join(args.model_dir, "best_f1_model.pt"))
            if bleu > best_bleu:
                best_bleu = bleu
                trainer.save(os.path.join(args.model_dir, "best_q_bleu_model.pt"))

            _str = 'BEST Q-BLEU : {:02.2f} EM : {:02.2f} F1 : {:02.2f}'
            _str = _str.format(best_bleu, best_em, best_f1)
            best_eval_log.set_description_str(_str)

In [None]:
print(args.train_dir)

In [None]:

main(args)