In [1]:
!nvidia-smi

Mon Aug 15 02:56:11 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.48.07    Driver Version: 515.48.07    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A40          On   | 00000000:A3:00.0 Off |                    0 |
|  0%   47C    P0    61W / 300W |      0MiB / 46068MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
### Parameters
RANDOMIZE_WEIGHTS = False 
RESIZE_EMBEDDINGS = True #only used for using tokenizers with different vocab_size than orig. model

OUTPUT_PATH = './DEBERTA_DNABERTtokenizer_metrics.csv'

MODEL_NAME = "simecek/DNADebertaK6b"
TOKENIZER_NAME = "armheb/DNA_bert_6"
K = 6
STRIDE = 1


# MODEL_NAME = "Vlasta/DNADebertaSentencepiece30k"
# TOKENIZER_NAME = "Vlasta/DNADebertaSentencepiece30k"
# K = None
# STRIDE = None



# All datasets
# DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
#  ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
#  ('human_ensembl_regulatory', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0), ('drosophila_enhancers_stark', 0)]

# Quick check dataset
# DATASETS = [('demo_human_or_worm', 0)]


# Binary classification datasets (without human_ensembl_regulatory)
DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
 ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
  ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0), ('drosophila_enhancers_stark', 0)]

# if ensemble refuses connection - "[Errno 104] Connection reset by peer", use attribute use_cloud_cache=True
BENCHMARKS_FOLDER = '/home/jovyan/.genomic_benchmarks'
USE_CLOUD_CACHE = True
# if less than 1, only this fraction of each dataset is used
DATASET_THINING = 1 

BATCH_SIZE = 32
ACCUMULATION = 2
LEARNING_RATE = 1e-5
EPOCHS = 100 
RUNS = 1

print(DATASETS)

[('demo_coding_vs_intergenomic_seqs', 0), ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0), ('drosophila_enhancers_stark', 0)]


In [3]:
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback
warmup_ratio = 0.05 #5 epochs (for 100 epochs total train)
if(RANDOMIZE_WEIGHTS):
    warmup_ratio = 0
def get_trainargs():
    return TrainingArguments(
        'outputs', 
        learning_rate=LEARNING_RATE, 
        warmup_ratio=warmup_ratio, 
        lr_scheduler_type='linear',
        fp16=True,
        evaluation_strategy="epoch", 
        per_device_train_batch_size=BATCH_SIZE, 
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=ACCUMULATION,
        num_train_epochs=EPOCHS, 
        weight_decay=0.01,
        save_strategy='epoch',
        seed=randrange(1,10001), 
        report_to='none',
        load_best_model_at_end=True,
    )
#early stopping 5 epochs
callbacks= [
    EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.0),
]

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from itertools import product
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
if(K is not None and K>6):
    alphabet = ('A', 'C', 'T', 'G')
    vocab = list(map(''.join, product(alphabet, repeat=K)))
    tokenizer.add_tokens(vocab)

In [5]:
def kmers_strideK(s, k=K):
    return [s[i:i + k] for i in range(0, len(s), k) if i + k <= len(s)]

def kmers_stride1(s, k=K):
    return [s[i:i + k] for i in range(0, len(s)-k+1)]

if (STRIDE == 1):
  kmers = kmers_stride1
else:
  kmers = kmers_strideK

# function used for the actual tokenization
if(K is not None):
    def tok_func(x): return tokenizer(" ".join(kmers(x["seq"])), truncation=True)
else:
    def tok_func(x): return tokenizer(x["seq"], truncation=True)

# example
example = tok_func({'seq': 'ATGGAAAGAGGCACCATTCT'})    
print(example)
tokenizer.decode(example['input_ids'])

{'input_ids': [2, 501, 1989, 3848, 3089, 56, 212, 835, 3325, 999, 3983, 3629, 2214, 650, 2587, 2142, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


'[CLS] ATGGAA TGGAAA GGAAAG GAAAGA AAAGAG AAGAGG AGAGGC GAGGCA AGGCAC GGCACC GCACCA CACCAT ACCATT CCATTC CATTCT [SEP]'

## Download benchmark datasets and tokenizer

In [6]:
from genomic_benchmarks.loc2seq import download_dataset
from genomic_benchmarks.data_check.info import is_downloaded
from pathlib import Path
from tqdm.autonotebook import tqdm

for dataset_name, dataset_version in tqdm(DATASETS):
    if not is_downloaded(dataset_name):
        download_dataset(dataset_name, version=dataset_version, use_cloud_cache=USE_CLOUD_CACHE)

benchmark_root = Path(BENCHMARKS_FOLDER)

100%|██████████| 7/7 [00:00<00:00, 2906.37it/s]


## Function to extract dataframe metrics row from training logs

In [7]:
def get_log_from_history(history, dataset_name):
    eval_dicts = [x for x in history if 'eval_loss' in x]
    test_dicts = [x for x in history if 'test_loss' in x]
    test_log = test_dicts[0]
    test_acc = test_log['test_accuracy']
    test_f1 = test_log['test_f1']
    test_loss = test_log['test_loss']
    test_precision = test_log['test_precision']
    test_recall = test_log['test_recall']
    test_auroc_macro = test_log['test_rocauc_0_roc_auc']
    test_auroc_weighted = test_log['test_rocauc_1_roc_auc']
    test_pr_auc = test_log['test_pr_auc']
    
    
    min_loss_dict = min(eval_dicts, key=lambda x: x['eval_loss'])
    min_loss_epoch = min_loss_dict['epoch']
    # max_f1_dict = max(eval_dicts, key=lambda x: x['eval_f1'])
    # max_acc_dict = max(eval_dicts, key=lambda x: x['eval_accuracy'])
    row = {
        'dataset':dataset_name,
        'test_acc':test_acc,
        'test_f1':test_f1,
        'test_loss':test_loss,
        'test_precision':test_precision,
        'test_recall':test_recall,
        'test_auroc_macro':test_auroc_macro,
        'test_auroc_weighted':test_auroc_weighted,
        'test_pr_auc':test_pr_auc,
        
        'min_valid_loss_epoch':min_loss_epoch,
        'min_valid_loss_log':min_loss_dict,
        # 'max_valid_f1_log':max_f1_dict,
        # 'max_valid_acc_log':max_acc_dict,
    }
    return row

## Looping through datasets, fine-tuning the model for each of them, logging metrics

In [8]:
import evaluate
binary_metrics = evaluate.combine([
    'accuracy',
    'f1',
    'recall',
    'precision',
    #Order of roc_auc matters for logging -> macro first, then weighted
    evaluate.load('roc_auc', average='macro'),
    evaluate.load('roc_auc', average='weighted'),
    evaluate.load("Vlasta/pr_auc"),
])
# binary_metrics.compute(references=[0,1,1,1], predictions=[0,0,1,1], prediction_scores=[0.4,0.3,0.6,0.9])


In [9]:
import pandas as pd
import numpy as np
from random import random, randrange
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric
import torch

def compute_metrics_binary(eval_preds):
    logits, labels = eval_preds
    prediction_scores = torch.nn.functional.softmax(
        torch.from_numpy(logits).double(), dim=-1).numpy() 
    # predictions = np.argmax(logits, axis=-1) #equivalent
    predictions = np.argmax(prediction_scores, axis=-1)
    return binary_metrics.compute(
        predictions=predictions, 
        references=labels, 
        prediction_scores=prediction_scores[:,1] #taking only prediction percentage for the label 1
    )
    
#TODO human_ensembl_regulatory dataset multilabel metrics
def compute_metrics_multi(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

outputs = []

for dataset_name, dataset_version in tqdm(DATASETS):
    labels = sorted([x.stem for x in (benchmark_root / dataset_name / 'train').iterdir()])

    tmp_dict = {}

    for split in ['train', 'test']:
        for nlabel, label in enumerate(labels):
            for f in (benchmark_root / dataset_name / split / label).glob('*.txt'):
                txt = f.read_text()
                if not DATASET_THINING or DATASET_THINING==1:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)
                elif random() < DATASET_THINING:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)

    df = pd.DataFrame.from_dict(tmp_dict).T.rename(columns = {0: "dset", 1: "cat", 2: "seq"})

    ds = Dataset.from_pandas(df)

    tok_ds = ds.map(tok_func, batched=False, remove_columns=['__index_level_0__', 'seq'])
    tok_ds = tok_ds.rename_columns({'cat':'labels'})

    dds = DatasetDict({
        'train': tok_ds.filter(lambda x: x["dset"] == "train").remove_columns('dset'),
        'test':  tok_ds.filter(lambda x: x["dset"] == "test").remove_columns('dset')
    })
    train_valid_split = dds['train'].train_test_split(test_size=0.2, shuffle=True, seed=42)
    dds['train']=train_valid_split['train']
    dds['valid']=train_valid_split['test']

    compute_metrics = compute_metrics_binary if len(labels) == 2 else compute_metrics_multi

    for _ in range(RUNS):
        model_cls = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(labels))
        if(RANDOMIZE_WEIGHTS):
            # model_cls.init_weights() #Alternative
            model_cls = AutoModelForSequenceClassification.from_config(model_cls.config)   
            if(RESIZE_EMBEDDINGS):
                model_cls.resize_token_embeddings(len(tokenizer))
            
        args = get_trainargs()
        
        trainer = Trainer(model_cls, args, train_dataset=dds['train'], eval_dataset=dds['valid'],
                          tokenizer=tokenizer, compute_metrics=compute_metrics, 
                          callbacks=callbacks)
        trainer.train()
        trainer.evaluate(dds['test'], metric_key_prefix='test')
        training_log = get_log_from_history(trainer.state.log_history, dataset_name=dataset_name)
        outputs.append(training_log)
  

  0%|          | 0/7 [00:00<?, ?it/s]
  0%|          | 0/100000 [00:00<?, ?ex/s][A
  0%|          | 218/100000 [00:00<00:45, 2170.43ex/s][A
  0%|          | 436/100000 [00:00<00:47, 2084.56ex/s][A
  1%|          | 645/100000 [00:00<00:47, 2084.77ex/s][A
  1%|          | 861/100000 [00:00<00:46, 2111.64ex/s][A
  1%|          | 1073/100000 [00:00<00:51, 1911.41ex/s][A
  1%|▏         | 1291/100000 [00:00<00:49, 1995.92ex/s][A
  2%|▏         | 1516/100000 [00:00<00:47, 2073.10ex/s][A
  2%|▏         | 1742/100000 [00:00<00:46, 2128.65ex/s][A
  2%|▏         | 1965/100000 [00:00<00:45, 2157.72ex/s][A
  2%|▏         | 2182/100000 [00:01<00:49, 1978.84ex/s][A
  2%|▏         | 2408/100000 [00:01<00:47, 2057.40ex/s][A
  3%|▎         | 2634/100000 [00:01<00:46, 2114.94ex/s][A
  3%|▎         | 2860/100000 [00:01<00:45, 2155.80ex/s][A
  3%|▎         | 3078/100000 [00:01<00:48, 1991.61ex/s][A
  3%|▎         | 3303/100000 [00:01<00:46, 2063.41ex/s][A
  4%|▎         | 3529/100000 [00:01

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6412,0.368905,0.8952,0.89417,0.885585,0.902923,0.956435,0.956435,0.949817
1,0.33,0.266831,0.9016,0.904836,0.935725,0.875921,0.968255,0.968255,0.966807
2,0.2384,0.21632,0.915133,0.914558,0.908521,0.920676,0.972861,0.972861,0.972642
3,0.207,0.213958,0.912867,0.915498,0.944126,0.888554,0.974282,0.974282,0.974916
4,0.1883,0.206109,0.9194,0.919673,0.922923,0.916446,0.975693,0.975693,0.975209
5,0.1716,0.233084,0.907733,0.912238,0.959195,0.869665,0.974304,0.974304,0.974311
6,0.1502,0.23909,0.910133,0.913167,0.945193,0.88324,0.971582,0.971582,0.970829
7,0.1317,0.24818,0.909,0.911884,0.941859,0.883759,0.96965,0.96965,0.968027
8,0.1146,0.334852,0.8946,0.901082,0.960261,0.848774,0.962012,0.962012,0.957507
9,0.1011,0.313764,0.9046,0.90766,0.937858,0.879345,0.959446,0.959446,0.95337


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
 14%|█▍        | 1/7 [37:25<3:44:33, 2245.65s/it]
  0%|          | 0/100000 [00:00<?, ?ex/s][A
  0%|          | 216/100000 [00:00<00:46, 2155.53ex/s][A
  0%|          | 432/100000 [00:00<00:47, 2115.15ex/s][A
  1%|          | 651/100000 [00:00<00:46, 2145.45ex/s][A
  1%|          | 874/100000 [00:00<00:45, 2175.87ex/s][A
  1%|          | 1092/100000 [00:00<01:02, 1588.23ex/s][A
  1%|▏         | 1317/100000 [00:00<00:55, 1767.43ex/s][A
  2%|▏         | 1540/100000 [00:00<00:51, 1895.28ex/s][A
  2%|▏         | 1763/100000 [00:00<00:49, 1988.91ex/s][A
  2%|▏         | 1980/100000 [00:01<00:48, 2040.43ex/s][A
  2%|▏         | 2192/100000 [00:01<00:52, 1875.27ex/s][A
  2%|▏         | 2406/100000 [00:01<00:50, 1947.86ex/s][A
  3%|▎         | 2621/100000 [00:01<00:48, 2004.26ex/s][A
  3%|▎         | 2839/100000 [00:01<00:47, 2053.48ex/s][A
  3%|▎         | 3048/100000 [00:01<0

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6368,0.31975,0.929867,0.926934,0.889852,0.967242,0.975717,0.975717,0.981015
1,0.2509,0.188137,0.943133,0.940752,0.903054,0.981734,0.989996,0.989996,0.991313
2,0.1325,0.161968,0.9478,0.945599,0.907454,0.98709,0.989636,0.989636,0.992094
3,0.0986,0.172073,0.945533,0.942863,0.89892,0.991324,0.993653,0.993653,0.994503
4,0.0843,0.125058,0.958467,0.957361,0.932658,0.983408,0.994763,0.994763,0.994999
5,0.0689,0.160728,0.9536,0.951954,0.919456,0.986833,0.994515,0.994515,0.994907
6,0.056,0.174832,0.9488,0.9466,0.907721,0.988958,0.994489,0.994489,0.99478
7,0.0447,0.178211,0.9474,0.94509,0.905454,0.988355,0.994037,0.994037,0.994362
8,0.0336,0.236136,0.945667,0.943122,0.901053,0.989312,0.993771,0.993771,0.994063
9,0.029,0.188079,0.957333,0.956391,0.935858,0.977846,0.993521,0.993521,0.993909


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
 29%|██▊       | 2/7 [1:14:47<3:06:56, 2243.30s/it]
  0%|          | 0/27791 [00:00<?, ?ex/s][A
  0%|          | 97/27791 [00:00<00:28, 964.65ex/s][A
  1%|          | 195/27791 [00:00<00:28, 969.23ex/s][A
  1%|          | 296/27791 [00:00<00:27, 985.00ex/s][A
  1%|▏         | 396/27791 [00:00<00:27, 989.82ex/s][A
  2%|▏         | 495/27791 [00:00<00:27, 979.36ex/s][A
  2%|▏         | 593/27791 [00:00<00:27, 977.10ex/s][A
  2%|▏         | 692/27791 [00:00<00:27, 979.98ex/s][A
  3%|▎         | 791/27791 [00:00<00:27, 972.92ex/s][A
  3%|▎         | 889/27791 [00:00<00:28, 942.39ex/s][A
  4%|▎         | 984/27791 [00:01<00:28, 930.53ex/s][A
  4%|▍         | 1078/27791 [00:01<00:35, 747.97ex/s][A
  4%|▍         | 1174/27791 [00:01<00:33, 800.48ex/s][A
  5%|▍         | 1266/27791 [00:01<00:31, 831.20ex/s][A
  5%|▍         | 1362/27791 [00:01<00:30, 866.11ex/s][A
  5%|▌      

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,No log,0.635266,0.674982,0.628665,0.54619,0.740478,0.74317,0.74317,0.749411
2,0.637100,0.593101,0.689854,0.630043,0.524286,0.789247,0.789942,0.789942,0.799443
3,0.637100,0.537945,0.739986,0.735222,0.716667,0.754764,0.81685,0.81685,0.819436
4,0.535300,0.541145,0.736627,0.730882,0.71,0.75303,0.81424,0.81424,0.811899
5,0.535300,0.607826,0.697529,0.628352,0.507619,0.824439,0.806773,0.806773,0.816725
6,0.482400,0.565433,0.726313,0.699183,0.631429,0.783225,0.8142,0.8142,0.82224
7,0.482400,0.588366,0.733509,0.731058,0.719048,0.743476,0.808626,0.808626,0.813446
8,0.415800,0.649532,0.716479,0.680713,0.6,0.786517,0.796634,0.796634,0.808208


***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-261
Configuration saved in outputs/checkpoint-261/config.json
Model weights saved in outputs/checkpoint-261/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-261/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-261/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-522
Configuration saved in outputs/checkpoint-522/config.json
Model weights saved in outputs/checkpoint-522/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-522/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-522/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-783
Configuration saved in outputs/checkpoint-783/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
 43%|████▎     | 3/7 [1:40:45<2:08:41, 1930.43s/it]
  0%|          | 0/123872 [00:00<?, ?ex/s][A
  0%|          | 163/123872 [00:00<01:16, 1620.04ex/s][A
  0%|          | 328/123872 [00:00<01:15, 1634.08ex/s][A
  0%|          | 496/123872 [00:00<01:14, 1654.82ex/s][A
  1%|          | 670/123872 [00:00<01:13, 1686.21ex/s][A
  1%|          | 840/123872 [00:00<01:12, 1690.32ex/s][A
  1%|          | 1010/123872 [00:00<01:24, 1447.32ex/s][A
  1%|          | 1167/123872 [00:00<01:22, 1482.54ex/s][A
  1%|          | 1324/123872 [00:00<01:21, 1508.15ex/s][A
  1%|          | 1485/123872 [00:00<01:19, 1536.72ex/s][A
  1%|▏         | 1644/123872 [00:01<01:18, 1551.77ex/s][A
  1%|▏         | 1801/123872 [00:01<01:18, 1548.18ex/s][A
  2%|▏         | 1963/123872 [00:01<01:17, 1569.10ex/s][A
  2%|▏         | 2121/123872 [00:01<01:28, 1381.52ex/s][A
  2%|▏         | 2281/123872 [00:01<

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.5945,0.559488,0.733222,0.722778,0.691419,0.757118,0.804933,0.804933,0.778305
1,0.5208,0.453783,0.795974,0.79224,0.773379,0.812044,0.87877,0.87877,0.875067
2,0.4266,0.397716,0.822453,0.821782,0.813824,0.829896,0.904238,0.904238,0.90384
3,0.373,0.383882,0.825144,0.812077,0.751123,0.883797,0.917029,0.917029,0.9185
4,0.3297,0.323497,0.859964,0.863627,0.881554,0.846415,0.938662,0.938662,0.939979
5,0.2725,0.31557,0.863839,0.857913,0.817248,0.902837,0.943097,0.943097,0.944536
6,0.236,0.339237,0.870782,0.872525,0.8792,0.86595,0.943591,0.943591,0.94467
7,0.199,0.34356,0.865831,0.858312,0.807939,0.915384,0.944311,0.944311,0.945609
8,0.1683,0.359963,0.87245,0.869017,0.841215,0.89872,0.943588,0.943588,0.945366
9,0.1445,0.384169,0.871643,0.869107,0.847207,0.892169,0.941564,0.941564,0.943164


***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1161
Configuration saved in outputs/checkpoint-1161/config.json
Model weights saved in outputs/checkpoint-1161/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1161/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1161/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2322
Configuration saved in outputs/checkpoint-2322/config.json
Model weights saved in outputs/checkpoint-2322/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2322/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2322/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-3483
Configuration saved in outputs/checkpoint-3483/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
 57%|█████▋    | 4/7 [4:10:32<3:55:48, 4716.24s/it]
  0%|          | 0/36131 [00:00<?, ?ex/s][A
  0%|          | 179/36131 [00:00<00:20, 1783.81ex/s][A
  1%|          | 358/36131 [00:00<00:20, 1741.62ex/s][A
  1%|▏         | 534/36131 [00:00<00:20, 1749.36ex/s][A
  2%|▏         | 711/36131 [00:00<00:20, 1756.34ex/s][A
  2%|▏         | 890/36131 [00:00<00:19, 1767.49ex/s][A
  3%|▎         | 1067/36131 [00:00<00:22, 1543.96ex/s][A
  3%|▎         | 1248/36131 [00:00<00:21, 1622.72ex/s][A
  4%|▍         | 1430/36131 [00:00<00:20, 1679.79ex/s][A
  4%|▍         | 1614/36131 [00:00<00:19, 1727.05ex/s][A
  5%|▍         | 1799/36131 [00:01<00:19, 1761.62ex/s][A
  5%|▌         | 1977/36131 [00:01<00:19, 1761.06ex/s][A
  6%|▌         | 2155/36131 [00:01<00:21, 1569.75ex/s][A
  6%|▋         | 2337/36131 [00:01<00:20, 1638.27ex/s][A
  7%|▋         | 2520/36131 [00:01<00:19, 1690.11e

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,No log,0.513324,0.803506,0.802449,0.74715,0.866587,0.85647,0.85647,0.891063
2,0.589200,0.426128,0.833395,0.837033,0.801036,0.876417,0.892787,0.892787,0.914248
3,0.409800,0.417616,0.819926,0.838089,0.872539,0.806256,0.913656,0.913656,0.931861
4,0.409800,0.331475,0.870111,0.871486,0.824525,0.924119,0.926529,0.926529,0.947953
5,0.321400,0.314267,0.87583,0.881992,0.868739,0.895655,0.936618,0.936618,0.954493
6,0.257900,0.310433,0.880258,0.888851,0.896373,0.881454,0.946431,0.946431,0.961916
7,0.257900,0.293784,0.898339,0.901272,0.868739,0.936337,0.947111,0.947111,0.964017
8,0.195400,0.278234,0.904428,0.908156,0.884629,0.932969,0.954333,0.954333,0.968326
9,0.152200,0.316067,0.899631,0.899705,0.842832,0.964808,0.938621,0.938621,0.961546
10,0.152200,0.353684,0.896863,0.905013,0.919862,0.890635,0.957148,0.957148,0.970425


***** Running Evaluation *****
  Num examples = 5420
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-339
Configuration saved in outputs/checkpoint-339/config.json
Model weights saved in outputs/checkpoint-339/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-339/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-339/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5420
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-678
Configuration saved in outputs/checkpoint-678/config.json
Model weights saved in outputs/checkpoint-678/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-678/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-678/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5420
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1017
Configuration saved in outputs/checkpoint-1017/config.json
Model weights saved in output

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
 71%|███████▏  | 5/7 [4:33:13<1:56:52, 3506.29s/it]
  0%|          | 0/139804 [00:00<?, ?ex/s][A
  0%|          | 136/139804 [00:00<01:42, 1357.20ex/s][A
  0%|          | 282/139804 [00:00<01:38, 1411.86ex/s][A
  0%|          | 424/139804 [00:00<01:40, 1393.48ex/s][A
  0%|          | 564/139804 [00:00<01:40, 1390.81ex/s][A
  1%|          | 704/139804 [00:00<01:40, 1390.36ex/s][A
  1%|          | 845/139804 [00:00<01:39, 1395.97ex/s][A
  1%|          | 985/139804 [00:00<01:40, 1376.95ex/s][A
  1%|          | 1123/139804 [00:00<01:59, 1158.09ex/s][A
  1%|          | 1257/139804 [00:00<01:54, 1206.94ex/s][A
  1%|          | 1392/139804 [00:01<01:51, 1246.44ex/s][A
  1%|          | 1521/139804 [00:01<02:06, 1091.86ex/s][A
  1%|          | 1659/139804 [00:01<01:58, 1166.68ex/s][A
  1%|▏         | 1802/139804 [00:01<01:51, 1237.23ex/s][A
  1%|▏         | 1945/139804 [00:01<01

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,0.6688,0.621078,0.663535,0.687843,0.732705,0.648157,0.719414,0.719414,0.69413
2,0.5762,0.563477,0.713795,0.713672,0.704995,0.722566,0.787283,0.787283,0.775383
3,0.5383,0.521797,0.745077,0.755309,0.777663,0.734205,0.823009,0.823009,0.814908
4,0.4887,0.499115,0.754184,0.75023,0.729689,0.771961,0.839612,0.839612,0.832782
5,0.4589,0.495567,0.762195,0.771228,0.792271,0.751274,0.845212,0.845212,0.83867
6,0.4361,0.489684,0.766106,0.76202,0.740151,0.785221,0.850654,0.850654,0.844834
7,0.4038,0.507843,0.765581,0.783512,0.838454,0.735328,0.853032,0.853032,0.846916
8,0.3721,0.50737,0.766106,0.783873,0.83836,0.736036,0.855173,0.855173,0.849391
9,0.3433,0.538405,0.765867,0.782493,0.832422,0.738215,0.851194,0.851194,0.843885
10,0.3124,0.585577,0.755424,0.738169,0.681433,0.805212,0.847572,0.847572,0.843634


***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1311
Configuration saved in outputs/checkpoint-1311/config.json
Model weights saved in outputs/checkpoint-1311/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1311/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1311/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2622
Configuration saved in outputs/checkpoint-2622/config.json
Model weights saved in outputs/checkpoint-2622/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2622/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2622/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-3933
Configuration saved in outputs/checkpoint-3933/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
 86%|████████▌ | 6/7 [7:29:00<1:38:20, 5900.08s/it]
  0%|          | 0/6914 [00:00<?, ?ex/s][A
  0%|          | 23/6914 [00:00<00:31, 221.50ex/s][A
  1%|          | 46/6914 [00:00<00:30, 224.78ex/s][A
  1%|          | 70/6914 [00:00<00:30, 227.49ex/s][A
  1%|▏         | 94/6914 [00:00<00:29, 229.55ex/s][A
  2%|▏         | 118/6914 [00:00<00:29, 230.43ex/s][A
  2%|▏         | 142/6914 [00:00<00:29, 227.51ex/s][A
  2%|▏         | 167/6914 [00:00<00:29, 232.10ex/s][A
  3%|▎         | 191/6914 [00:00<00:28, 232.54ex/s][A
  3%|▎         | 215/6914 [00:00<00:28, 231.58ex/s][A
  3%|▎         | 239/6914 [00:01<00:28, 232.80ex/s][A
  4%|▍         | 263/6914 [00:01<00:28, 231.26ex/s][A
  4%|▍         | 287/6914 [00:01<00:28, 231.97ex/s][A
  4%|▍         | 311/6914 [00:01<00:28, 229.86ex/s][A
  5%|▍         | 334/6914 [00:01<00:28, 229.28ex/s][A
  5%|▌         | 357/6914 [00:01<0

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,No log,0.691875,0.556413,0.685363,0.965318,0.531283,0.612343,0.612343,0.579239
2,No log,0.680089,0.648023,0.654683,0.666667,0.643123,0.6916,0.6916,0.645314
3,No log,0.631086,0.677917,0.719328,0.824663,0.637854,0.718281,0.718281,0.670381
4,No log,0.634671,0.655738,0.727689,0.919075,0.602273,0.733991,0.733991,0.687659
5,No log,0.594768,0.699132,0.747981,0.8921,0.64395,0.762771,0.762771,0.722154
6,No log,0.631077,0.664417,0.734756,0.928709,0.607818,0.760754,0.760754,0.714472
7,No log,0.610391,0.688525,0.740145,0.88632,0.635359,0.766218,0.766218,0.722481
8,0.585900,0.636658,0.68756,0.73955,0.88632,0.634483,0.753695,0.753695,0.707275
9,0.585900,0.647187,0.689489,0.743631,0.899807,0.63365,0.757382,0.757382,0.703718
10,0.585900,0.64057,0.699132,0.745098,0.878613,0.646809,0.76088,0.76088,0.716289


***** Running Evaluation *****
  Num examples = 1037
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-65
Configuration saved in outputs/checkpoint-65/config.json
Model weights saved in outputs/checkpoint-65/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-65/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-65/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 1037
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-130
Configuration saved in outputs/checkpoint-130/config.json
Model weights saved in outputs/checkpoint-130/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-130/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-130/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 1037
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-195
Configuration saved in outputs/checkpoint-195/config.json
Model weights saved in outputs/check

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
100%|██████████| 7/7 [7:37:42<00:00, 3923.27s/it]  


## Outputs

In [10]:
outputs_df = pd.DataFrame(outputs)
outputs_df

Unnamed: 0,dataset,test_acc,test_f1,test_loss,test_precision,test_recall,test_auroc_macro,test_auroc_weighted,test_pr_auc,min_valid_loss_epoch,min_valid_loss_log
0,demo_coding_vs_intergenomic_seqs,0.91832,0.918587,0.216274,0.915594,0.9216,0.973802,0.973802,0.973179,5.0,"{'eval_loss': 0.2061094343662262, 'eval_accura..."
1,demo_human_or_worm,0.95932,0.95825,0.120152,0.984147,0.93368,0.995005,0.995005,0.995096,5.0,"{'eval_loss': 0.12505795061588287, 'eval_accur..."
2,human_enhancers_cohn,0.731434,0.723801,0.541107,0.744973,0.7038,0.81343,0.81343,0.80394,3.0,"{'eval_loss': 0.5379452109336853, 'eval_accura..."
3,human_enhancers_ensembl,0.862577,0.855856,0.321195,0.899865,0.815951,0.940856,0.940856,0.94238,6.0,"{'eval_loss': 0.31556957960128784, 'eval_accur..."
4,human_nontata_promoters,0.901041,0.907587,0.282666,0.922463,0.893184,0.955788,0.955788,0.969006,8.0,"{'eval_loss': 0.27823394536972046, 'eval_accur..."
5,human_ocr_ensembl,0.7706,0.763829,0.488325,0.787058,0.741932,0.85147,0.85147,0.841236,6.0,"{'eval_loss': 0.4896840751171112, 'eval_accura..."
6,drosophila_enhancers_stark,0.686705,0.738921,0.601318,0.633361,0.886705,0.767109,0.767109,0.734075,5.0,"{'eval_loss': 0.5947676301002502, 'eval_accura..."


In [None]:
# outputs_df.groupby('dataset').agg({'accuracy' : ['mean', 'sem'], 'f1' : ['mean','sem'], 'train_runtime': ['mean', 'sem']})

In [None]:
# saving outputs to csv file
outputs_df.to_csv(OUTPUT_PATH, index=False)