In [1]:
import datetime
import os
import sys
import warnings
warnings.filterwarnings("ignore")

import torch
import pytorch_lightning as pl
from pytorch_lightning.trainer import Trainer
import pytorch_lightning.callbacks as plc
from model_interface import MInterface
from data_interface import DInterface
from src.tools.logger import SetupCallback, BestCheckpointCallback, BackupCodeCallback, TempFileCleanupCallback
from shutil import ignore_patterns
import pytorch_lightning.loggers as plog
import pickle


In [2]:
args = {
    'res_dir': './results',
    'ex_name': 'debug',
    'check_val_every_n_epoch': 1,
    'dataset': 'PTM',
    'model_name': 'MeToken', # model name here
    'lr': 1e-4,
    'lr_scheduler': 'onecycle',
    'offline': 1,
    'seed': 114514,
    'batch_size': 16,
    'num_workers': 16,
    'pad': 1024,
    'min_length': 40,
    'path': './data_test/generalization/qPTM_dataset/', # data path here
    'with_null_ptm': 0,
    'epoch': 20,
    'augment_eps': 0.0,
    'module_type': 94,
    'weight_type': 0,
    'gamma': 2.0,
    'final_tau': 1e-4,
    'pretrain': 0,
    'test_only': 1,
    'inference_pos':None,
    'ckpt_from_deepspeed': 0,
    'ckpt_path': "pretrained_model/checkpoint.ckpt",
    'gpus': [0],
    'strategy': 'auto',
    'wandb_offline': 1
}

In [3]:
def load_callbacks(args):
    callbacks = []
    logdir = str(os.path.join(args['res_dir'], args['ex_name']))
    ckptdir = os.path.join(logdir, "checkpoints")
    callbacks.append(BackupCodeCallback(os.path.dirname(args['res_dir']),logdir, ignore_patterns=ignore_patterns('results*', 'pdb*', 'metadata*', 'vq_dataset*', 'bin*', 'data*', '__pycache__', 'info', 'lib', 'requirements', 'debug', 'wandb')))
    
    metric = "val_f1"
    early_stop_val = "max"
    sv_filename = 'best-{epoch:02d}-{val_f1:.3f}'
    callbacks.append(BestCheckpointCallback(
        monitor=metric,
        filename=sv_filename,
        save_top_k=15,
        mode='max',
        save_last=True,
        dirpath=ckptdir,
        verbose=True,
        every_n_epochs=args['check_val_every_n_epoch'],
    ))

    now = datetime.datetime.now().strftime("%m-%dT%H-%M-%S")
    cfgdir = os.path.join(logdir, "configs")
    callbacks.append(
        SetupCallback(
                now=now,
                logdir=logdir,
                ckptdir=ckptdir,
                cfgdir=cfgdir,
                config=args,
                argv_content=sys.argv + ["gpus: {}".format(torch.cuda.device_count())],)
    )

    callbacks.append(plc.EarlyStopping(monitor=metric, mode=early_stop_val, patience=20 if args['pretrain'] else 5))
    callbacks.append(TempFileCleanupCallback())
    return callbacks, ckptdir

pl.seed_everything(args['seed'])

data_module = DInterface(**args)
data_module.setup(stage="test")
gpu_count = torch.cuda.device_count()

logger = plog.WandbLogger(project='PTM-MeToken', dir='./wandb/', name=args['ex_name'], offline=args['wandb_offline'], config=args)

callbacks, ckptdir = load_callbacks(args)


Global seed set to 114514
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


In [4]:
import json


trainer_config = {
    'gpus': args['gpus'] if args['ex_name'] != 'debug' else [0],
    'max_epochs': args['epoch'],
    'strategy': args['strategy'],
    'accelerator': 'gpu',
    'callbacks': callbacks,
    'logger': logger,
    'gradient_clip_val': 1.0,
    'resume_from_checkpoint': args['ckpt_path']
}

trainer = Trainer(**trainer_config)
model = MInterface.load_from_checkpoint(trainer_config["resume_from_checkpoint"], strict=False,model_name=args["model_name"])
trainer.test(model,data_module)
if trainer.global_rank == 0:
    metrics = model.cal_metric(path=args["path"])
    with open(os.path.join(args["res_dir"], args["ex_name"], 'metrics.json'), 'w') as file_obj:
        json.dump(metrics, file_obj)

  rank_zero_deprecation(
  rank_zero_deprecation(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 27/27 [00:28<00:00,  1.05s/it]
accuracy: 0.8979, precision: 0.8588, recall: 0.6875, f1 score: 0.7241, mcc score: 0.7998, auroc: 0.9605, auprc: 0.6836
