In [1]:
import sys
sys.path.append("/pfs/data5/home/st/st_us-051200/st_st169719/third_party")
print(sys.path)

['', '/home/st/st_us-051200/st_st169719/.local/lib/python3.6/site-packages', '/opt/bwhpc/common/jupyter/base/lib/python3.6/site-packages', '/usr/lib64/python36.zip', '/usr/lib64/python3.6', '/usr/lib64/python3.6/lib-dynload', '/pfs/data5/software_uc2/bwhpc/common/jupyter/base/lib64/python3.6/site-packages', '/pfs/data5/software_uc2/bwhpc/common/jupyter/base/lib/python3.6/site-packages', '/usr/lib64/python3.6/site-packages', '/usr/lib/python3.6/site-packages', '/opt/bwhpc/common/jupyter/base/lib/python3.6/site-packages/IPython/extensions', '/pfs/data5/home/st/st_us-051200/st_st169719/.ipython', '/pfs/data5/home/st/st_us-051200/st_st169719/third_party']


In [2]:
from typing import Dict
from pathlib import Path
import json
from functools import partial
from collections import OrderedDict
from argparse import ArgumentParser

import lineflow as lf
from transformers import AlbertForMultipleChoice, AlbertTokenizer, AdamW
import pytorch_lightning as pl

import torch
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
import json_lines
from transformers import AlbertConfig
import pickle
from pytorch_lightning.metrics import functional as FM

In [3]:
# fixed seed
import torch
torch.manual_seed(0)
import random
random.seed(0)
import numpy as np
np.random.seed(0)

pl.utilities.seed.seed_everything(seed=0, workers=False)

Global seed set to 0


0

In [4]:
# fixed seed for generating dataset
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

In [5]:
MAX_LEN = 512
BATCH_SIZE = 32

In [6]:
def load_dataloader_from_cache(cachedir :str):
    cachedir = Path(cachedir)
    
    test_file_name = "cache4_test_ob.cache"
    test_path = Path(cachedir / test_file_name)
    if test_path.exists():
        print(f'Loading data from {test_file_name}...')
        with test_path.open('rb') as f:
            test_cache = pickle.load(f)

    test_dataloader = DataLoader(
            lf.core.CacheDataset(test_cache),
            batch_size=BATCH_SIZE,
            worker_init_fn=seed_worker,
            num_workers=40
            )
    return test_dataloader

In [7]:
class TestModel(pl.LightningModule):

    def __init__(self, model, test_dataloader):
        super(TestModel, self).__init__()

        self.model = model
        self._test_dataloader = test_dataloader

    def test_step(self, batch, batch_idx):
        labels = batch["label"]
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        token_type_ids = batch["token_type_ids"]

        outputs = self.model(
                input_ids,
                token_type_ids=token_type_ids,
                attention_mask=attention_mask,
                labels=labels
                )
        
        labels_hat = torch.argmax(outputs.logits, dim=1)

        print("labels:", labels)
        print("labels_hat", labels_hat)
        
        acc = FM.accuracy(labels_hat, labels)
        self.log('test_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)

    def test_dataloader(self):
        return self._test_dataloader

In [8]:
test_dataloader = load_dataloader_from_cache('/pfs/data5/home/st/st_us-051200/st_st169719/Philippe/Caches_New/')

Loading data from cache4_test_ob.cache...


In [9]:
import glob
path_list = glob.glob("/pfs/data5/home/st/st_us-051200/st_st169719/OpenBook/Checkpoints/DR2e-5/*/*.ckpt")

In [10]:
# use AlbertModel, donot use AlbertforMultiChoice
config = AlbertConfig.from_pretrained('albert-base-v2')

# # load checkpoint file which you want to use
# for path in path_list:
#     print('Model in path:' + path)
#     checkpoint = torch.load(path)
#     new_checkpoint = {}

#     for key in checkpoint['state_dict'].keys():
#       if 'model' in key:
#         new_key = key[6:]
#         new_checkpoint[new_key] = checkpoint['state_dict'][key]
#       else:
#         new_checkpoint[key] = checkpoint['state_dict'][key]
        
#     m = AlbertForMultipleChoice.from_pretrained(pretrained_model_name_or_path= None, config=config, state_dict=new_checkpoint)
#     trainer_for_test = pl.Trainer(gpus=4, accelerator='dp')
#     model_for_test = TestModel(m, test_dataloader)
#     trainer_for_test.test(model=model_for_test)

path = path_list[0]
print('Model in path:' + path)
checkpoint = torch.load(path)
new_checkpoint = {}

for key in checkpoint['state_dict'].keys():
  if 'model' in key:
    new_key = key[6:]
    new_checkpoint[new_key] = checkpoint['state_dict'][key]
  else:
    new_checkpoint[key] = checkpoint['state_dict'][key]

m = AlbertForMultipleChoice.from_pretrained(pretrained_model_name_or_path= None, config=config, state_dict=new_checkpoint)
trainer_for_test = pl.Trainer(gpus=4, accelerator='dp')
model_for_test = TestModel(m, test_dataloader)
trainer_for_test.test(model=model_for_test)

Model in path:/pfs/data5/home/st/st_us-051200/st_st169719/OpenBook/Checkpoints/DR2e-5/Ex03/dr_ex03-albert-openbook-epoch=02-val_acc_epoch=0.596.ckpt


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

labels: labels: labels: labels: tensor([1, 0, 2, 2, 2, 2, 2, 1], device='cuda:0')
labels_hat tensor([0, 0, 2, 2, 2, 2, 1, 1], device='cuda:0')
tensor([3, 1, 2, 1, 2, 0, 2, 3], device='cuda:1')
labels_hat tensor([2, 2, 0, 1, 0, 0, 0, 1], device='cuda:2')tensor([1, 0, 0, 1, 2, 0, 3, 3], device='cuda:1')
labels_hat 
tensor([2, 0, 0, 3, 2, 0, 0, 3], device='cuda:2')
tensor([1, 1, 3, 0, 3, 1, 2, 3], device='cuda:3')
labels_hat tensor([1, 1, 1, 1, 3, 1, 2, 0], device='cuda:3')
labels: labels: labels: labels: tensor([1, 2, 0, 3, 0, 3, 2, 0], device='cuda:2')
labels_hat tensor([1, 2, 2, 3, 1, 0, 3, 1], device='cuda:2')tensor([3, 3, 0, 3, 0, 0, 0, 0], device='cuda:1')

labels_hat tensor([2, 3, 1, 1, 2, 1, 3, 0], device='cuda:1')
tensor([2, 3, 1, 0, 2, 2, 0, 1], device='cuda:3')
labels_hat tensor([1, 1, 2, 0, 2, 2, 2, 0], device='cuda:3')
tensor([1, 0, 1, 0, 0, 3, 3, 0], device='cuda:0')
labels_hat tensor([3, 3, 3, 2, 2, 2, 2, 0], device='cuda:0')
labels: labels: labels: labels: tensor([2, 0, 0,

[{'test_acc_epoch': 0.4000000059604645, 'test_acc': 0.20000000298023224}]

In [22]:
!cd /pfs/work7/workspace/scratch/st_st169719-TQA-0/Chen/ && ls && mkdir OpenBook && ls

Cosmos	RACE
Cosmos	OpenBook  RACE


In [23]:
!cp /pfs/data5/home/st/st_us-051200/st_st169719/OpenBook/Checkpoints/DR/Ex01/dr_ex01-albert-openbook-epoch=02-val_acc_epoch=0.557.ckpt /pfs/work7/workspace/scratch/st_st169719-TQA-0/Chen/OpenBook

In [24]:
!cd /pfs/work7/workspace/scratch/st_st169719-TQA-0/Chen/OpenBook/ && ls

'dr_ex01-albert-openbook-epoch=02-val_acc_epoch=0.557.ckpt'
