In [None]:
!pip install --user --no-cache-dir -r requirements.txt

In [None]:
import argparse
import torch
import numpy as np
import math
import logging
import pytorch_lightning as pl

from dataset import PairSequenceData
from pytorch_lightning.callbacks import ModelCheckpoint, TQDMProgressBar, EarlyStopping
import os
import psutil
from model import AttentionModel
logging.basicConfig(level=logging.INFO)



process = psutil.Process(os.getpid())
memory_before = process.memory_info().rss

# os.environ["TOKENIZERS_PARALLELISM"] = "false"
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
# os.environ["TORCH_USE_CUDA_DSA"] = "1"

# max_len = 802
# logging.info('Loading data with max_len + 2 tokens = {}'.format(max_len))
logging.info('Memory before loading data: {} Mb'.format(memory_before / 1024 / 1024))

dataset = PairSequenceData(actions_file="string12.0_experimental_score_500_train.tsv",
                           sequences_file="string12.0_experimental_score_500.fasta",
                          chunk_size=1000)

dataset_test = PairSequenceData(actions_file="string12.0_experimental_score_500_test.tsv",
                                sequences_file="string12.0_experimental_score_500.fasta",
                               chunk_size=1000)

logging.info('Memory after loading data: {} Mb'.format(process.memory_info().rss / 1024 / 1024))

parser = argparse.ArgumentParser()
parser = AttentionModel.add_model_specific_args(parser)
# parser = pl.Trainer.add_argparse_args(parser)
parser.add_argument("-f")
params = parser.parse_args()

params.max_len = dataset.max_len+2
params.batch_size = 2 
# params.accelerator = "gpu"

model = AttentionModel(params, ntoken=len(dataset.tokenizer), embed_dim=32)

# ckpt = torch.load("logs/AttentionModelBase/version_0/checkpoints/chkpt_loss_based_epoch=13-val_loss=0.085-val_BinaryF1Score=0.851.ckpt")
# model.load_state_dict(ckpt['state_dict'])

# model.load_data(dataset=dataset, valid_size=0.01)
train_set = model.train_dataloader(dataset, collate_fn=dataset.collate_fn, shuffle=False)
val_set = model.val_dataloader(dataset_test, collate_fn=dataset.collate_fn, shuffle=False)

logger = pl.loggers.TensorBoardLogger("logs", name='AttentionModelBase')

callbacks = [
    # TQDMProgressBar(refresh_rate=500),
    ModelCheckpoint(filename='chkpt_loss_based_{epoch}-{val_loss:.3f}-{val_BinaryF1Score:.3f}', verbose=True,
                    monitor='val_loss', mode='min', save_top_k=1),
    EarlyStopping(monitor="val_loss", patience=10,
                  verbose=False, mode="min")
]

torch.set_float32_matmul_precision('medium')

trainer = pl.Trainer(accelerator="gpu", num_nodes=1,
                     max_epochs=100,
                     logger=logger, callbacks=callbacks)

trainer.fit(model, train_set, val_set)

INFO:root:Memory before loading data: 452.34375 Mb
INFO:root:Reading sequences from string12.0_experimental_score_500.fasta
INFO:root:Max sequence length set to the length of the largest sequence: 1000
INFO:root:Reading sequences from string12.0_experimental_score_500.fasta
INFO:root:Max sequence length set to the length of the largest sequence: 1000
INFO:root:Memory after loading data: 523.57421875 Mb
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                    | Type                  | Params
------------------------------------------------------------------
0 | valid_metrics           | MetricCollection      | 0     
1 | train_metrics           | MetricCollection      | 0     
2 | test_metrics            | MetricCollection      | 0     
3 | embedding               | Embedding             | 1.0 K 
4 | positional_encoding    

Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]