In [1]:
%load_ext autoreload
%autoreload 2

import sys, glob, os, gzip
sys.path.insert(1, os.path.join(sys.path[0], '..'))

import numpy as np
import pandas as pd

import torch
import pytorch_lightning as pl
from pytorch_lightning.loggers.neptune import NeptuneLogger

from ablang_train import ABtokenizer, AbLang, trainingframe
from ablang_train.train_utils import callback_handler, datamodule, arghandler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def set_neptune_logger(args):
    """
    Initialize Neptune logger
    """

    neptune_args = { 'api_key':"eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0N2Y2YmIxMS02OWM3LTRhY2MtYTQxOC0xODU5N2E0ODFmMzEifQ==",
    'project':"tobiasheol/AbLangTraining",
    'name':args.name,
    'log_model_checkpoints':False,
    }

    return NeptuneLogger(**neptune_args)

In [3]:
# SET ARGUMENTS AND HPARAMS
arguments = arghandler.parse_args(args="", is_test=True)
logger = set_neptune_logger(arguments.model_specific_args)

arguments.model_specific_args.num_encoder_blocks = 1
arguments.model_specific_args.over_sample_data = 0
arguments.model_specific_args.data_path = '../data/single_data/'
arguments.model_specific_args.max_fit_batch_size = 10
arguments.model_specific_args.effective_batch_size = 1
arguments.model_specific_args.eval_batch_size = 1
arguments.model_specific_args.val_check_interval = 1
arguments.model_specific_args.use_tkn_dropout = False

arguments.trainer_args['logger'] = logger
arguments.trainer_args['log_every_n_steps'] = 1

callbacks = callback_handler.CallbackHandler(save_step_frequency=1, 
                                progress_refresh_rate=0, 
                                outpath=arguments.model_specific_args.out_path)

In [4]:
def enforce_reproducibility(seed=42):
    # Sets seed manually for both CPU and CUDA
    torch.manual_seed(seed)
    # For atomic operations there is currently
    # no simple way to enforce determinism, as
    # the order of parallel operations is not known.
    #
    # CUDNN
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # System based
    np.random.seed(seed)
    
    pl.seed_everything(seed)
    
# SET SEED - IMPORTANT FOR MULTIPLE GPUS, OTHERWISE GOOD FOR REPRODUCIBILITY
enforce_reproducibility(arguments.model_specific_args.seed)

Global seed set to 42


In [5]:
# LOAD AND INITIATE DATA
arguments.model_specific_args.mask_technique = 'random'

abrep_dm = datamodule.MyDataModule(arguments.model_specific_args, ABtokenizer) 
# You are supposed to just be able to add abrep to the fit function, but it doesn't work when using multiple GPUs
abrep_dm.setup('fit')

train = abrep_dm.train_dataloader()
val = abrep_dm.val_dataloader()

In [6]:
for batch in train:
    
    print(batch['input'][0])
    print(batch['labels'][:len(batch['input'][0])])
    break

tensor([ 0, 10,  7, 23, 23,  8, 10, 13, 13,  7, 20, 23,  6, 14, 13,  2, 10,  2,
        15, 23, 16,  7, 23,  7, 12, 23, 23,  7,  9, 16, 23,  9,  9, 23, 23,  9,
        23, 18, 23, 10, 20, 13, 12, 23,  8, 23,  4, 23, 23, 16, 18, 18,  5,  6,
        20, 20, 13, 23, 23, 23,  7,  5,  2, 17,  7, 12,  7,  4,  7, 12,  8, 23,
        14, 23, 20, 14, 16, 23, 12, 20, 10,  7, 23,  5,  6, 14,  5, 18, 18,  2,
        23,  8, 19,  5,  5,  7, 20,  9, 12, 19, 15, 23, 23, 12, 12,  8, 23, 20,
         8, 15, 20, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
        21, 21, 21, 21, 21, 21, 21])
ten

In [7]:
# LOAD MODEL
model = trainingframe.TrainingFrame(arguments.model_specific_args, AbLang, ABtokenizer)

# INITIALISE TRAINER
trainer = pl.Trainer(**arguments.trainer_args, callbacks=callbacks())

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.


In [None]:
trainer.fit(model, train, val)


  | Name    | Type             | Params
---------------------------------------------
0 | loss_fn | CrossEntropyLoss | 0     
1 | ablang  | AbLang           | 7.7 M 
---------------------------------------------
7.7 M     Trainable params
0         Non-trainable params
7.7 M     Total params
30.800    Total estimated model params size (MB)


https://app.neptune.ai/tobiasheol/AbLangTraining/e/ABLANG-329


Info (NVML): NVML Shared Library Not Found. GPU usage metrics may not be reported. For more information, see https://docs.neptune.ai/help/nvml_error/


Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


  self._container.define(self._path, value, wait)
  rank_zero_warn(
  rank_zero_warn(
