In [1]:
import sys
sys.path.append("/pfs/data5/home/st/st_us-051200/st_st169719/third_party")
print(sys.path)

['', '/home/st/st_us-051200/st_st169719/.local/lib/python3.6/site-packages', '/opt/bwhpc/common/jupyter/base/lib/python3.6/site-packages', '/usr/lib64/python36.zip', '/usr/lib64/python3.6', '/usr/lib64/python3.6/lib-dynload', '/pfs/data5/software_uc2/bwhpc/common/jupyter/base/lib64/python3.6/site-packages', '/pfs/data5/software_uc2/bwhpc/common/jupyter/base/lib/python3.6/site-packages', '/usr/lib64/python3.6/site-packages', '/usr/lib/python3.6/site-packages', '/opt/bwhpc/common/jupyter/base/lib/python3.6/site-packages/IPython/extensions', '/pfs/data5/home/st/st_us-051200/st_st169719/.ipython', '/pfs/data5/home/st/st_us-051200/st_st169719/third_party']


In [2]:
from typing import Dict
from pathlib import Path
import json
from functools import partial
from collections import OrderedDict
from argparse import ArgumentParser

import lineflow as lf
from transformers import AlbertForMultipleChoice, AlbertTokenizer, AdamW
import pytorch_lightning as pl

import torch
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
import json_lines
from transformers import AlbertConfig
import pickle
from pytorch_lightning.metrics import functional as FM

In [3]:
# fixed seed
import torch
torch.manual_seed(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)

In [4]:
# fixed seed for generating dataset
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

In [5]:
MAX_LEN = 512
BATCH_SIZE = 32

In [6]:
def load_test_dataloader_from_cache(cachedir :str):
    cachedir = Path(cachedir)
    
    test_file_name = "test_race.cache"
    test_path = Path(cachedir / test_file_name)
    if test_path.exists():
        print(f'Loading data from {test_file_name}...')
        with test_path.open('rb') as f:
            test_cache = pickle.load(f)

    test_dataloader = DataLoader(
            lf.core.CacheDataset(test_cache),
            batch_size=BATCH_SIZE,
            worker_init_fn=seed_worker,#new here, used for fixed seed when generating dataloader
            num_workers=80 # new here, 80 is chosen when using 4 gpus in unicluster
            )

    return test_dataloader

In [7]:
class TestModel(pl.LightningModule):

    def __init__(self, model, test_dataloader):
        super(TestModel, self).__init__()

        self.model = model
        self._test_dataloader = test_dataloader

    def test_step(self, batch, batch_idx):
        labels = batch["label"]
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        token_type_ids = batch["token_type_ids"]

        outputs = self.model(
                input_ids,
                token_type_ids=token_type_ids,
                attention_mask=attention_mask,
                labels=labels
                )
        
        labels_hat = torch.argmax(outputs.logits, dim=1)

        acc = FM.accuracy(labels_hat, labels)
        self.log('test_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)

    def test_dataloader(self):
        return self._test_dataloader

In [9]:
test_dataloader = load_test_dataloader_from_cache('/pfs/data5/home/st/st_us-051200/st_st169719/RACE/dataset/CacheFiles/BatchSize32')

Loading data from test_race.cache...


  cpuset_checked))


In [10]:
path_1 = '/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex01/e1-albert-race-epoch=00-val_loss_epoch=0.78.ckpt'
path_2 = '/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex02/e2-albert-race-epoch=00-val_loss_epoch=0.77.ckpt'
path_3 = '/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex02/e2-albert-race-epoch=01-val_loss_epoch=0.92.ckpt'
path_4 = '/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex03/e3-albert-race-epoch=00-val_loss_epoch=0.97.ckpt'
path_5 = '/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex03/e3-albert-race-epoch=01-val_loss_epoch=1.16.ckpt'

In [11]:
path_list = []
path_list.append(path_1)
path_list.append(path_2)
path_list.append(path_3)
path_list.append(path_4)
path_list.append(path_5)

In [None]:
# use AlbertModel, donot use AlbertforMultiChoice
config = AlbertConfig.from_pretrained('albert-base-v2')

# load checkpoint file which you want to use
for path in path_list:
    print('Model in path:' + path)
    checkpoint = torch.load(path)
    new_checkpoint = {}

    for key in checkpoint['state_dict'].keys():
      if 'model' in key:
        new_key = key[6:]
        new_checkpoint[new_key] = checkpoint['state_dict'][key]
      else:
        new_checkpoint[key] = checkpoint['state_dict'][key]
        
    m = AlbertForMultipleChoice.from_pretrained(pretrained_model_name_or_path= None, config=config, state_dict=new_checkpoint)
    trainer_for_test = pl.Trainer(gpus=4, accelerator='dp')
    model_for_test = TestModel(m, test_dataloader)
    trainer_for_test.test(model=model_for_test)

Model in path:/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex01/e1-albert-race-epoch=00-val_loss_epoch=0.78.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 1.0, 'test_acc_epoch': 0.6707816123962402}
--------------------------------------------------------------------------------
Model in path:/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex02/e2-albert-race-epoch=00-val_loss_epoch=0.77.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 1.0, 'test_acc_epoch': 0.6804590821266174}
--------------------------------------------------------------------------------
Model in path:/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex02/e2-albert-race-epoch=01-val_loss_epoch=0.92.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 1.0, 'test_acc_epoch': 0.6356079578399658}
--------------------------------------------------------------------------------
Model in path:/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex03/e3-albert-race-epoch=00-val_loss_epoch=0.97.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 1.0, 'test_acc_epoch': 0.7210918068885803}
--------------------------------------------------------------------------------
Model in path:/pfs/data5/home/st/st_us-051200/st_st169719/RACE/Checkpoints/Ex03/e3-albert-race-epoch=01-val_loss_epoch=1.16.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]