In [12]:
### Parameters
RANDOMIZE_WEIGHTS = True 
RESIZE_EMBEDDINGS = False #only used for using tokenizers with different vocab_size than orig. model

OUTPUT_PATH = './DEBERTA_RANDOM_Sentencepiece30tokenizer_metrics.csv'

MODEL_NAME = "Vlasta/DNADebertaSentencepiece30k_continuation"
TOKENIZER_NAME = "Vlasta/DNA_Sentencepiece_vocab_30000_max_tokenlen_100"
K = None
STRIDE = None

# All datasets
# DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
#  ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
#  ('human_ensembl_regulatory', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]

# Quick check dataset
# DATASETS = [('demo_human_or_worm', 0)]


# Binary classification datasets (without human_ensembl_regulatory)
DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
 ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
  ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


# if ensemble refuses connection - "[Errno 104] Connection reset by peer", use attribute use_cloud_cache=True
BENCHMARKS_FOLDER = '/home/jovyan/.genomic_benchmarks'
USE_CLOUD_CACHE = True
# if less than 1, only this fraction of each dataset is used
DATASET_THINING = 1 

BATCH_SIZE = 32
ACCUMULATION = 2
LEARNING_RATE = 1e-5 
EPOCHS = 100 
RUNS = 1

print(DATASETS)

[('demo_coding_vs_intergenomic_seqs', 0), ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


In [13]:
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback
warmup_ratio = 0.05 #5 epochs (for 100 epochs total train)
if(RANDOMIZE_WEIGHTS):
    warmup_ratio = 0
def get_trainargs():
    return TrainingArguments(
        'outputs', 
        learning_rate=LEARNING_RATE, 
        warmup_ratio=warmup_ratio, 
        lr_scheduler_type='linear',
        fp16=True,
        evaluation_strategy="epoch", 
        per_device_train_batch_size=BATCH_SIZE, 
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=ACCUMULATION,
        num_train_epochs=EPOCHS, 
        weight_decay=0.01, 
        save_strategy='epoch',
        seed=randrange(1,10001), 
        report_to='none',
        load_best_model_at_end=True,
    )
#early stopping 5 epochs
callbacks= [
    EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.0),
]

In [14]:
from itertools import product
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
if(K is not None and K>6):
    alphabet = ('A', 'C', 'T', 'G')
    vocab = list(map(''.join, product(alphabet, repeat=K)))
    tokenizer.add_tokens(vocab)

loading file https://huggingface.co/Vlasta/DNA_Sentencepiece_vocab_30000_max_tokenlen_100/resolve/main/tokenizer.json from cache at /home/jovyan/.cache/huggingface/transformers/244fb7ffba3334ecc4652bf01ae9958d2ed3a29db31e0378d390d9f529417bc9.2f9fcb8f2e1c5b0787f04a27a2eb0a4d5a97b7e29392cef09b8d71802a58bf95
loading file https://huggingface.co/Vlasta/DNA_Sentencepiece_vocab_30000_max_tokenlen_100/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/Vlasta/DNA_Sentencepiece_vocab_30000_max_tokenlen_100/resolve/main/special_tokens_map.json from cache at /home/jovyan/.cache/huggingface/transformers/f5065a1d9f7a806c40bcfa8d6e1ea96262cbed85223c77308a3c2d1bf5dccbc8.7da70648c6cb9951e284c9685f9ba7ae083dd59ed1d6d84bdfc0584a4ea94b6d
loading file https://huggingface.co/Vlasta/DNA_Sentencepiece_vocab_30000_max_tokenlen_100/resolve/main/tokenizer_config.json from cache at /home/jovyan/.cache/huggingface/transformers/54a4666f0d1ade048816e4ffeb5c88623316a461187112ef29cf4

In [15]:
def kmers_strideK(s, k=K):
    return [s[i:i + k] for i in range(0, len(s), k) if i + k <= len(s)]

def kmers_stride1(s, k=K):
    return [s[i:i + k] for i in range(0, len(s)-k+1)]

if (STRIDE == 1):
  kmers = kmers_stride1
else:
  kmers = kmers_strideK

# function used for the actual tokenization
if(K is not None):
    def tok_func(x): return tokenizer(" ".join(kmers(x["seq"])), truncation=True)
else:
    def tok_func(x): return tokenizer(x["seq"], truncation=True)

# example
example = tok_func({'seq': 'ATGGAAAGAGGCACCATTCT'})    
print(example)
tokenizer.decode(example['input_ids'])

{'input_ids': [2, 33, 1246, 2031, 6, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1]}


'[CLS]ATGGAAAGAGGCACCATTCT[SEP]'

## Download benchmark datasets and tokenizer

In [16]:
from genomic_benchmarks.loc2seq import download_dataset
from genomic_benchmarks.data_check.info import is_downloaded
from pathlib import Path
from tqdm.autonotebook import tqdm

for dataset_name, dataset_version in tqdm(DATASETS):
    if not is_downloaded(dataset_name):
        download_dataset(dataset_name, version=dataset_version, use_cloud_cache=USE_CLOUD_CACHE)

benchmark_root = Path(BENCHMARKS_FOLDER)

  0%|          | 0/6 [00:00<?, ?it/s]

## Function to extract dataframe metrics row from training logs

In [17]:
def get_log_from_history(history, dataset_name):
    eval_dicts = [x for x in history if 'eval_loss' in x]
    test_dicts = [x for x in history if 'test_loss' in x]
    test_log = test_dicts[0]
    test_acc = test_log['test_accuracy']
    test_f1 = test_log['test_f1']
    test_loss = test_log['test_loss']
    test_precision = test_log['test_precision']
    test_recall = test_log['test_recall']
    test_auroc_macro = test_log['test_rocauc_0_roc_auc']
    test_auroc_weighted = test_log['test_rocauc_1_roc_auc']
    test_pr_auc = test_log['test_pr_auc']
    
    
    min_loss_dict = min(eval_dicts, key=lambda x: x['eval_loss'])
    min_loss_epoch = min_loss_dict['epoch']
    # max_f1_dict = max(eval_dicts, key=lambda x: x['eval_f1'])
    # max_acc_dict = max(eval_dicts, key=lambda x: x['eval_accuracy'])
    row = {
        'dataset':dataset_name,
        'test_acc':test_acc,
        'test_f1':test_f1,
        'test_loss':test_loss,
        'test_precision':test_precision,
        'test_recall':test_recall,
        'test_auroc_macro':test_auroc_macro,
        'test_auroc_weighted':test_auroc_weighted,
        'test_pr_auc':test_pr_auc,
        
        'min_valid_loss_epoch':min_loss_epoch,
        'min_valid_loss_log':min_loss_dict,
        # 'max_valid_f1_log':max_f1_dict,
        # 'max_valid_acc_log':max_acc_dict,
    }
    return row

## Looping through datasets, fine-tuning the model for each of them, logging metrics

In [18]:
import evaluate
binary_metrics = evaluate.combine([
    'accuracy',
    'f1',
    'recall',
    'precision',
    #Order of roc_auc matters for logging -> macro first, then weighted
    evaluate.load('roc_auc', average='macro'),
    evaluate.load('roc_auc', average='weighted'),
    evaluate.load("Vlasta/pr_auc"),
])
# binary_metrics.compute(references=[0,1,1,1], predictions=[0,0,1,1], prediction_scores=[0.4,0.3,0.6,0.9])


In [19]:
import pandas as pd
import numpy as np
from random import random, randrange
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric
import torch

def compute_metrics_binary(eval_preds):
    logits, labels = eval_preds
    prediction_scores = torch.nn.functional.softmax(
        torch.from_numpy(logits).double(), dim=-1).numpy() 
    # predictions = np.argmax(logits, axis=-1) #equivalent
    predictions = np.argmax(prediction_scores, axis=-1)
    return binary_metrics.compute(
        predictions=predictions, 
        references=labels, 
        prediction_scores=prediction_scores[:,1] #taking only prediction percentage for the label 1
    )
    
#TODO human_ensembl_regulatory dataset multilabel metrics
def compute_metrics_multi(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

outputs = []

for dataset_name, dataset_version in tqdm(DATASETS):
    labels = sorted([x.stem for x in (benchmark_root / dataset_name / 'train').iterdir()])

    tmp_dict = {}

    for split in ['train', 'test']:
        for nlabel, label in enumerate(labels):
            for f in (benchmark_root / dataset_name / split / label).glob('*.txt'):
                txt = f.read_text()
                if not DATASET_THINING or DATASET_THINING==1:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)
                elif random() < DATASET_THINING:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)

    df = pd.DataFrame.from_dict(tmp_dict).T.rename(columns = {0: "dset", 1: "cat", 2: "seq"})

    ds = Dataset.from_pandas(df)

    tok_ds = ds.map(tok_func, batched=False, remove_columns=['__index_level_0__', 'seq'])
    tok_ds = tok_ds.rename_columns({'cat':'labels'})

    dds = DatasetDict({
        'train': tok_ds.filter(lambda x: x["dset"] == "train").remove_columns('dset'),
        'test':  tok_ds.filter(lambda x: x["dset"] == "test").remove_columns('dset')
    })
    train_valid_split = dds['train'].train_test_split(test_size=0.2, shuffle=True, seed=42)
    dds['train']=train_valid_split['train']
    dds['valid']=train_valid_split['test']

    compute_metrics = compute_metrics_binary if len(labels) == 2 else compute_metrics_multi

    for _ in range(RUNS):
        model_cls = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(labels))
        if(RANDOMIZE_WEIGHTS):
            # model_cls.init_weights() #Alternative
            model_cls = AutoModelForSequenceClassification.from_config(model_cls.config)   
            if(RESIZE_EMBEDDINGS):
                model_cls.resize_token_embeddings(len(tokenizer))
            
        args = get_trainargs()
        
        trainer = Trainer(model_cls, args, train_dataset=dds['train'], eval_dataset=dds['valid'],
                          tokenizer=tokenizer, compute_metrics=compute_metrics, 
                          callbacks=callbacks)
        trainer.train()
        trainer.evaluate(dds['test'], metric_key_prefix='test')
        training_log = get_log_from_history(trainer.state.log_history, dataset_name=dataset_name)
        outputs.append(training_log)
  

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece30k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/ed63502d3bd126c04d68de5b513e238d890d803f275c3974150696225df86729.7607ffdb351882cfaefe9c35d56c9ae147178c0ea5705855ec06b5f18f11a116
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece30k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.4395,0.335681,0.852533,0.86254,0.925457,0.807634,0.9409,0.9409,0.941421
1,0.2967,0.29634,0.872867,0.867634,0.833444,0.904748,0.949429,0.949429,0.949469
2,0.2448,0.313552,0.8716,0.878776,0.930924,0.832161,0.951121,0.951121,0.950919
3,0.2168,0.313519,0.878667,0.882307,0.909721,0.856497,0.951418,0.951418,0.950992
4,0.1913,0.324323,0.879467,0.880707,0.889985,0.871621,0.950603,0.950603,0.950234
5,0.1729,0.342937,0.877933,0.879373,0.889985,0.86901,0.949319,0.949319,0.949067
6,0.1567,0.351185,0.875067,0.876581,0.887452,0.865973,0.947942,0.947942,0.947483


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece30k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/ed63502d3bd126c04d68de5b513e238d890d803f275c3974150696225df86729.7607ffdb351882cfaefe9c35d56c9ae147178c0ea5705855ec06b5f18f11a116
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece30k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.3973,0.223991,0.9088,0.910165,0.924123,0.896623,0.970749,0.970749,0.971061
1,0.1959,0.221894,0.913733,0.909244,0.864382,0.959018,0.97724,0.97724,0.977242
2,0.1478,0.202793,0.923867,0.921598,0.895053,0.949767,0.978828,0.978828,0.97918
3,0.1202,0.223993,0.923,0.920907,0.896653,0.946509,0.979466,0.979466,0.980118
4,0.1022,0.20564,0.9288,0.928657,0.926924,0.930398,0.97961,0.97961,0.980331
5,0.0853,0.213778,0.9264,0.925375,0.912788,0.938314,0.979485,0.979485,0.979884
6,0.0767,0.244095,0.924333,0.92291,0.905987,0.940476,0.979252,0.979252,0.97991
7,0.0645,0.251929,0.9272,0.926484,0.917589,0.935554,0.978669,0.978669,0.979652


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/27791 [00:00<?, ?ex/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece30k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/ed63502d3bd126c04d68de5b513e238d890d803f275c3974150696225df86729.7607ffdb351882cfaefe9c35d56c9ae147178c0ea5705855ec06b5f18f11a116
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece30k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,No log,0.586488,0.685536,0.691892,0.700952,0.683063,0.75232,0.75232,0.762666
2,0.614700,0.575137,0.691773,0.677217,0.641905,0.71664,0.767795,0.767795,0.774365
3,0.614700,0.601509,0.686016,0.721667,0.808095,0.65194,0.771331,0.771331,0.775791
4,0.490700,0.62377,0.693452,0.673146,0.626667,0.727072,0.769859,0.769859,0.774205
5,0.490700,0.666814,0.684097,0.71074,0.770476,0.6596,0.762105,0.762105,0.764818
6,0.373500,0.734182,0.683617,0.678528,0.662857,0.694958,0.754879,0.754879,0.758361
7,0.373500,0.816842,0.669465,0.709773,0.802381,0.636329,0.752219,0.752219,0.755126


***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-261
Configuration saved in outputs/checkpoint-261/config.json
Model weights saved in outputs/checkpoint-261/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-261/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-261/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-522
Configuration saved in outputs/checkpoint-522/config.json
Model weights saved in outputs/checkpoint-522/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-522/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-522/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-783
Configuration saved in outputs/checkpoint-783/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/123872 [00:00<?, ?ex/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece30k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/ed63502d3bd126c04d68de5b513e238d890d803f275c3974150696225df86729.7607ffdb351882cfaefe9c35d56c9ae147178c0ea5705855ec06b5f18f11a116
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece30k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.5646,0.520691,0.743609,0.73239,0.697518,0.770932,0.822467,0.822467,0.821427
1,0.4751,0.486039,0.766213,0.768172,0.770062,0.76629,0.848381,0.848381,0.846257
2,0.4211,0.488169,0.775577,0.792702,0.853092,0.740297,0.862373,0.862373,0.860519
3,0.374,0.476148,0.787525,0.78953,0.792318,0.786762,0.868026,0.868026,0.865804
4,0.3375,0.493299,0.788278,0.798896,0.83608,0.764879,0.869819,0.869819,0.866908
5,0.3006,0.47985,0.795436,0.798921,0.807939,0.790101,0.874014,0.874014,0.869551
6,0.2712,0.525057,0.796943,0.79803,0.79756,0.7985,0.873443,0.873443,0.86714
7,0.2372,0.526513,0.799257,0.798596,0.791248,0.806082,0.872475,0.872475,0.866229
8,0.1994,0.623857,0.78844,0.806669,0.877488,0.746428,0.870499,0.870499,0.859948


***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1161
Configuration saved in outputs/checkpoint-1161/config.json
Model weights saved in outputs/checkpoint-1161/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1161/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1161/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2322
Configuration saved in outputs/checkpoint-2322/config.json
Model weights saved in outputs/checkpoint-2322/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2322/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2322/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-3483
Configuration saved in outputs/checkpoint-3483/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/25284 [00:00<?, ?ex/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece30k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/ed63502d3bd126c04d68de5b513e238d890d803f275c3974150696225df86729.7607ffdb351882cfaefe9c35d56c9ae147178c0ea5705855ec06b5f18f11a116
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece30k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,No log,0.344163,0.876615,0.678428,0.568548,0.840954,0.868187,0.868187,0.75888
1,No log,0.285389,0.898154,0.754996,0.685484,0.840198,0.903558,0.903558,0.827833
2,0.353900,0.251447,0.915692,0.791159,0.697581,0.913732,0.925161,0.925161,0.873455
3,0.353900,0.235438,0.926769,0.826783,0.763441,0.901587,0.936163,0.936163,0.896224
4,0.158400,0.261078,0.930462,0.828528,0.733871,0.95122,0.942441,0.942441,0.907562
5,0.158400,0.338033,0.881846,0.773318,0.880376,0.689474,0.948509,0.948509,0.916378
6,0.158400,0.416001,0.921846,0.7968,0.669355,0.98419,0.940124,0.940124,0.913384
7,0.075200,0.322623,0.937538,0.849071,0.767473,0.950083,0.948468,0.948468,0.919343
8,0.075200,0.309875,0.938154,0.857346,0.811828,0.908271,0.949079,0.949079,0.919357


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-203
Configuration saved in outputs/checkpoint-203/config.json
Model weights saved in outputs/checkpoint-203/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-203/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-203/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-406
Configuration saved in outputs/checkpoint-406/config.json
Model weights saved in outputs/checkpoint-406/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-406/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-406/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-609
Configuration saved in outputs/checkpoint-609/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/139804 [00:00<?, ?ex/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece30k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/ed63502d3bd126c04d68de5b513e238d890d803f275c3974150696225df86729.7607ffdb351882cfaefe9c35d56c9ae147178c0ea5705855ec06b5f18f11a116
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece30k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,0.6469,0.617521,0.659721,0.679339,0.712441,0.649176,0.716139,0.716139,0.70783
2,0.5864,0.619061,0.652377,0.604149,0.524317,0.712657,0.736602,0.736602,0.725238
3,0.5464,0.619036,0.668447,0.711386,0.807634,0.635635,0.74101,0.74101,0.73085
4,0.5162,0.624791,0.675314,0.675066,0.666635,0.683712,0.741678,0.741678,0.732066
5,0.4866,0.654466,0.663965,0.713408,0.826673,0.627441,0.739574,0.739574,0.73075
6,0.4523,0.693736,0.668828,0.694228,0.743073,0.651409,0.734377,0.734377,0.726006


***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1311
Configuration saved in outputs/checkpoint-1311/config.json
Model weights saved in outputs/checkpoint-1311/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1311/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1311/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2622
Configuration saved in outputs/checkpoint-2622/config.json
Model weights saved in outputs/checkpoint-2622/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2622/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2622/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-3933
Configuration saved in outputs/checkpoint-3933/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


## Outputs

In [20]:
outputs_df = pd.DataFrame(outputs)
outputs_df

Unnamed: 0,dataset,test_acc,test_f1,test_loss,test_precision,test_recall,test_auroc_macro,test_auroc_weighted,test_pr_auc,min_valid_loss_epoch,min_valid_loss_log
0,demo_coding_vs_intergenomic_seqs,0.86732,0.861544,0.306041,0.900759,0.8256,0.946358,0.946358,0.946889,2.0,"{'eval_loss': 0.29633966088294983, 'eval_accur..."
1,demo_human_or_worm,0.92516,0.923033,0.19672,0.950038,0.89752,0.979842,0.979842,0.979391,3.0,"{'eval_loss': 0.20279289782047272, 'eval_accur..."
2,human_enhancers_cohn,0.69574,0.680375,0.577102,0.716561,0.647668,0.768942,0.768942,0.765353,2.0,"{'eval_loss': 0.5751370787620544, 'eval_accura..."
3,human_enhancers_ensembl,0.788408,0.790726,0.477034,0.782158,0.799483,0.86769,0.86769,0.862644,4.0,"{'eval_loss': 0.4761483371257782, 'eval_accura..."
4,human_nontata_promoters,0.844255,0.837847,0.489875,0.966241,0.739573,0.932945,0.932945,0.95385,4.0,"{'eval_loss': 0.23543794453144073, 'eval_accur..."
5,human_ocr_ensembl,0.655299,0.675501,0.61926,0.638103,0.717556,0.712743,0.712743,0.695418,1.0,"{'eval_loss': 0.6175205111503601, 'eval_accura..."


In [21]:
# outputs_df.groupby('dataset').agg({'accuracy' : ['mean', 'sem'], 'f1' : ['mean','sem'], 'train_runtime': ['mean', 'sem']})

In [22]:
# saving outputs to csv file
outputs_df.to_csv(OUTPUT_PATH, index=False)