In [1]:
### Parameters
RANDOMIZE_WEIGHTS = False 
RESIZE_EMBEDDINGS = False #only used for using tokenizers with different vocab_size than orig. model

OUTPUT_PATH = './DEBERTA_Sentencepiece10tokenizer_metrics.csv'

MODEL_NAME = "Vlasta/DNADebertaSentencepiece10k_continuation"
TOKENIZER_NAME = "Vlasta/DNA_Sentencepiece_vocab_10000_max_tokenlen_45"
K = None
STRIDE = None

# All datasets
# DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
#  ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
#  ('human_ensembl_regulatory', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]

# Quick check dataset
# DATASETS = [('demo_human_or_worm', 0)]


# Binary classification datasets (without human_ensembl_regulatory)
DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
 ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
  ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


# if ensemble refuses connection - "[Errno 104] Connection reset by peer", use attribute use_cloud_cache=True
BENCHMARKS_FOLDER = '/home/jovyan/.genomic_benchmarks'
USE_CLOUD_CACHE = True
# if less than 1, only this fraction of each dataset is used
DATASET_THINING = 1 

BATCH_SIZE = 32
ACCUMULATION = 2
LEARNING_RATE = 1e-5
EPOCHS = 100 
RUNS = 1

print(DATASETS)

[('demo_coding_vs_intergenomic_seqs', 0), ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


In [2]:
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback
warmup_ratio = 0.05 #5 epochs (for 100 epochs total train)
if(RANDOMIZE_WEIGHTS):
    warmup_ratio = 0
def get_trainargs():
    return TrainingArguments(
        'outputs', 
        learning_rate=LEARNING_RATE, 
        warmup_ratio=warmup_ratio, 
        lr_scheduler_type='linear',
        fp16=True,
        evaluation_strategy="epoch", 
        per_device_train_batch_size=BATCH_SIZE, 
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=ACCUMULATION,
        num_train_epochs=EPOCHS, 
        weight_decay=0.01,
        save_strategy='epoch',
        seed=randrange(1,10001), 
        report_to='none',
        load_best_model_at_end=True,
    )
#early stopping 5 epochs
callbacks= [
    EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.0),
]



In [3]:
from itertools import product
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
if(K is not None and K>6):
    alphabet = ('A', 'C', 'T', 'G')
    vocab = list(map(''.join, product(alphabet, repeat=K)))
    tokenizer.add_tokens(vocab)

In [4]:
def kmers_strideK(s, k=K):
    return [s[i:i + k] for i in range(0, len(s), k) if i + k <= len(s)]

def kmers_stride1(s, k=K):
    return [s[i:i + k] for i in range(0, len(s)-k+1)]

if (STRIDE == 1):
  kmers = kmers_stride1
else:
  kmers = kmers_strideK

# function used for the actual tokenization
if(K is not None):
    def tok_func(x): return tokenizer(" ".join(kmers(x["seq"])), truncation=True)
else:
    def tok_func(x): return tokenizer(x["seq"], truncation=True)

# example
example = tok_func({'seq': 'ATGGAAAGAGGCACCATTCT'})    
print(example)
tokenizer.decode(example['input_ids'])

{'input_ids': [2, 33, 1246, 2031, 6, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1]}


'[CLS]ATGGAAAGAGGCACCATTCT[SEP]'

## Download benchmark datasets and tokenizer

In [5]:
from genomic_benchmarks.loc2seq import download_dataset
from genomic_benchmarks.data_check.info import is_downloaded
from pathlib import Path
from tqdm.autonotebook import tqdm

for dataset_name, dataset_version in tqdm(DATASETS):
    if not is_downloaded(dataset_name):
        download_dataset(dataset_name, version=dataset_version, use_cloud_cache=USE_CLOUD_CACHE)

benchmark_root = Path(BENCHMARKS_FOLDER)

  0%|          | 0/6 [00:00<?, ?it/s]

## Function to extract dataframe metrics row from training logs

In [6]:
def get_log_from_history(history, dataset_name):
    eval_dicts = [x for x in history if 'eval_loss' in x]
    test_dicts = [x for x in history if 'test_loss' in x]
    test_log = test_dicts[0]
    test_acc = test_log['test_accuracy']
    test_f1 = test_log['test_f1']
    test_loss = test_log['test_loss']
    test_precision = test_log['test_precision']
    test_recall = test_log['test_recall']
    test_auroc_macro = test_log['test_rocauc_0_roc_auc']
    test_auroc_weighted = test_log['test_rocauc_1_roc_auc']
    test_pr_auc = test_log['test_pr_auc']
    
    
    min_loss_dict = min(eval_dicts, key=lambda x: x['eval_loss'])
    min_loss_epoch = min_loss_dict['epoch']
    # max_f1_dict = max(eval_dicts, key=lambda x: x['eval_f1'])
    # max_acc_dict = max(eval_dicts, key=lambda x: x['eval_accuracy'])
    row = {
        'dataset':dataset_name,
        'test_acc':test_acc,
        'test_f1':test_f1,
        'test_loss':test_loss,
        'test_precision':test_precision,
        'test_recall':test_recall,
        'test_auroc_macro':test_auroc_macro,
        'test_auroc_weighted':test_auroc_weighted,
        'test_pr_auc':test_pr_auc,
        
        'min_valid_loss_epoch':min_loss_epoch,
        'min_valid_loss_log':min_loss_dict,
        # 'max_valid_f1_log':max_f1_dict,
        # 'max_valid_acc_log':max_acc_dict,
    }
    return row

## Looping through datasets, fine-tuning the model for each of them, logging metrics

In [7]:
import evaluate
binary_metrics = evaluate.combine([
    'accuracy',
    'f1',
    'recall',
    'precision',
    #Order of roc_auc matters for logging -> macro first, then weighted
    evaluate.load('roc_auc', average='macro'),
    evaluate.load('roc_auc', average='weighted'),
    evaluate.load("Vlasta/pr_auc"),
])
# binary_metrics.compute(references=[0,1,1,1], predictions=[0,0,1,1], prediction_scores=[0.4,0.3,0.6,0.9])


In [8]:
import pandas as pd
import numpy as np
from random import random, randrange
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric
import torch

def compute_metrics_binary(eval_preds):
    logits, labels = eval_preds
    prediction_scores = torch.nn.functional.softmax(
        torch.from_numpy(logits).double(), dim=-1).numpy() 
    # predictions = np.argmax(logits, axis=-1) #equivalent
    predictions = np.argmax(prediction_scores, axis=-1)
    return binary_metrics.compute(
        predictions=predictions, 
        references=labels, 
        prediction_scores=prediction_scores[:,1] #taking only prediction percentage for the label 1
    )
    
#TODO human_ensembl_regulatory dataset multilabel metrics
def compute_metrics_multi(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

outputs = []

for dataset_name, dataset_version in tqdm(DATASETS):
    labels = sorted([x.stem for x in (benchmark_root / dataset_name / 'train').iterdir()])

    tmp_dict = {}

    for split in ['train', 'test']:
        for nlabel, label in enumerate(labels):
            for f in (benchmark_root / dataset_name / split / label).glob('*.txt'):
                txt = f.read_text()
                if not DATASET_THINING or DATASET_THINING==1:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)
                elif random() < DATASET_THINING:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)

    df = pd.DataFrame.from_dict(tmp_dict).T.rename(columns = {0: "dset", 1: "cat", 2: "seq"})

    ds = Dataset.from_pandas(df)

    tok_ds = ds.map(tok_func, batched=False, remove_columns=['__index_level_0__', 'seq'])
    tok_ds = tok_ds.rename_columns({'cat':'labels'})

    dds = DatasetDict({
        'train': tok_ds.filter(lambda x: x["dset"] == "train").remove_columns('dset'),
        'test':  tok_ds.filter(lambda x: x["dset"] == "test").remove_columns('dset')
    })
    train_valid_split = dds['train'].train_test_split(test_size=0.2, shuffle=True, seed=42)
    dds['train']=train_valid_split['train']
    dds['valid']=train_valid_split['test']

    compute_metrics = compute_metrics_binary if len(labels) == 2 else compute_metrics_multi

    for _ in range(RUNS):
        model_cls = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(labels))
        if(RANDOMIZE_WEIGHTS):
            # model_cls.init_weights() #Alternative
            model_cls = AutoModelForSequenceClassification.from_config(model_cls.config)   
            if(RESIZE_EMBEDDINGS):
                model_cls.resize_token_embeddings(len(tokenizer))
            
        args = get_trainargs()
        
        trainer = Trainer(model_cls, args, train_dataset=dds['train'], eval_dataset=dds['valid'],
                          tokenizer=tokenizer, compute_metrics=compute_metrics, 
                          callbacks=callbacks)
        trainer.train()
        trainer.evaluate(dds['test'], metric_key_prefix='test')
        training_log = get_log_from_history(trainer.state.log_history, dataset_name=dataset_name)
        outputs.append(training_log)
  

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

Some weights of the model checkpoint at Vlasta/DNADebertaSentencepiece10k_continuation were not used when initializing DebertaForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.5887,0.304565,0.879533,0.880117,0.884518,0.875759,0.951101,0.951101,0.95084
1,0.292,0.260519,0.893667,0.890776,0.867316,0.915541,0.960903,0.960903,0.962131
2,0.2472,0.240098,0.902733,0.903217,0.907854,0.898627,0.965518,0.965518,0.966484
3,0.2225,0.235391,0.904133,0.906319,0.92759,0.886002,0.967931,0.967931,0.968384
4,0.1954,0.224067,0.9096,0.908687,0.89972,0.917834,0.969615,0.969615,0.97066
5,0.1703,0.250963,0.9102,0.909603,0.90372,0.915563,0.968715,0.968715,0.969329
6,0.1391,0.263706,0.908067,0.907094,0.89772,0.916667,0.967209,0.967209,0.967958
7,0.1138,0.306181,0.9006,0.903564,0.931458,0.877292,0.964624,0.964624,0.964315
8,0.0888,0.341496,0.901733,0.902565,0.910388,0.894875,0.961528,0.961528,0.96144
9,0.0743,0.36987,0.901667,0.902143,0.906654,0.897676,0.956513,0.956513,0.952181


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece10k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/baf8e4e3448eedf0439db2bda9d04b223c693bf70da2782a5349f4bc6bee0dee.032849a3607451735219b1e4c96ccc148fa1dee321ac1b245a3b741e6545c6ef
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece10k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.5745,0.187613,0.9384,0.93802,0.932391,0.943717,0.983712,0.983712,0.985388
1,0.1782,0.139777,0.948933,0.947919,0.929591,0.966986,0.990009,0.990009,0.9906
2,0.1297,0.120682,0.955333,0.955547,0.960261,0.950878,0.991718,0.991718,0.99203
3,0.1105,0.12199,0.956733,0.956931,0.961462,0.952444,0.992206,0.992206,0.992484
4,0.0914,0.158686,0.948333,0.946482,0.913855,0.981524,0.992789,0.992789,0.993086
5,0.0724,0.13563,0.956133,0.955839,0.949593,0.962167,0.991612,0.991612,0.99194
6,0.0587,0.151392,0.954067,0.953291,0.937592,0.969526,0.991821,0.991821,0.992148
7,0.0435,0.181287,0.9558,0.956067,0.961995,0.950211,0.990974,0.990974,0.991525


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/27791 [00:00<?, ?ex/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece10k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/baf8e4e3448eedf0439db2bda9d04b223c693bf70da2782a5349f4bc6bee0dee.032849a3607451735219b1e4c96ccc148fa1dee321ac1b245a3b741e6545c6ef
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece10k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,No log,0.590846,0.69633,0.692121,0.677619,0.707256,0.767592,0.767592,0.768614
2,0.613200,0.532681,0.730391,0.735902,0.745714,0.726345,0.811402,0.811402,0.813213
3,0.613200,0.522636,0.73255,0.72243,0.690952,0.756912,0.820151,0.820151,0.82099
4,0.507400,0.536138,0.729192,0.714683,0.673333,0.761443,0.818283,0.818283,0.821312
5,0.507400,0.53824,0.722715,0.744474,0.801905,0.694719,0.818157,0.818157,0.821367
6,0.454000,0.594672,0.719357,0.703497,0.660952,0.751896,0.808846,0.808846,0.81468
7,0.454000,0.599543,0.722236,0.720695,0.711429,0.730205,0.803984,0.803984,0.810169
8,0.368800,0.643448,0.706884,0.706673,0.700952,0.712488,0.783065,0.783065,0.791304


***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-261
Configuration saved in outputs/checkpoint-261/config.json
Model weights saved in outputs/checkpoint-261/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-261/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-261/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-522
Configuration saved in outputs/checkpoint-522/config.json
Model weights saved in outputs/checkpoint-522/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-522/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-522/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-783
Configuration saved in outputs/checkpoint-783/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/123872 [00:00<?, ?ex/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece10k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/baf8e4e3448eedf0439db2bda9d04b223c693bf70da2782a5349f4bc6bee0dee.032849a3607451735219b1e4c96ccc148fa1dee321ac1b245a3b741e6545c6ef
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece10k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.5564,0.472286,0.781497,0.790094,0.817569,0.764406,0.860494,0.860494,0.848655
1,0.4419,0.404293,0.815241,0.81404,0.80398,0.824355,0.901399,0.901399,0.897195
2,0.4,0.382126,0.826866,0.834949,0.87064,0.80207,0.911859,0.911859,0.90827
3,0.3615,0.366842,0.837953,0.843884,0.870747,0.81863,0.919647,0.919647,0.917124
4,0.3216,0.376104,0.839513,0.851153,0.912262,0.797717,0.925175,0.925175,0.922126
5,0.2768,0.351127,0.8513,0.851778,0.849454,0.854115,0.928896,0.928896,0.926238
6,0.2241,0.371838,0.85426,0.855342,0.856623,0.854064,0.929066,0.929066,0.925133
7,0.1831,0.413466,0.856359,0.858716,0.867858,0.849764,0.929771,0.929771,0.92691
8,0.1409,0.44977,0.859588,0.858414,0.846244,0.870939,0.931195,0.931195,0.927893
9,0.1177,0.475988,0.859265,0.863653,0.886155,0.842266,0.931317,0.931317,0.927784


***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1161
Configuration saved in outputs/checkpoint-1161/config.json
Model weights saved in outputs/checkpoint-1161/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1161/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1161/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2322
Configuration saved in outputs/checkpoint-2322/config.json
Model weights saved in outputs/checkpoint-2322/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2322/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2322/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-3483
Configuration saved in outputs/checkpoint-3483/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/25284 [00:00<?, ?ex/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece10k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/baf8e4e3448eedf0439db2bda9d04b223c693bf70da2782a5349f4bc6bee0dee.032849a3607451735219b1e4c96ccc148fa1dee321ac1b245a3b741e6545c6ef
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece10k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,No log,0.371415,0.881538,0.698984,0.600806,0.835514,0.854038,0.854038,0.753073
1,No log,0.297169,0.899077,0.74375,0.639785,0.88806,0.895579,0.895579,0.823122
2,0.433800,0.262147,0.912615,0.788376,0.711022,0.884615,0.911777,0.911777,0.854113
3,0.433800,0.247859,0.918154,0.809456,0.759409,0.866564,0.922823,0.922823,0.875674
4,0.260400,0.234099,0.928,0.821918,0.725806,0.947368,0.932673,0.932673,0.893344
5,0.260400,0.214541,0.935692,0.849964,0.795699,0.912173,0.93684,0.93684,0.90477
6,0.260400,0.226142,0.933538,0.834609,0.732527,0.969751,0.941896,0.941896,0.910436
7,0.166900,0.20275,0.942154,0.862974,0.795699,0.942675,0.948459,0.948459,0.920721
8,0.166900,0.212568,0.943385,0.863905,0.784946,0.960526,0.945527,0.945527,0.917539
9,0.099700,0.234764,0.944308,0.867593,0.797043,0.951846,0.94859,0.94859,0.921488


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-203
Configuration saved in outputs/checkpoint-203/config.json
Model weights saved in outputs/checkpoint-203/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-203/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-203/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-406
Configuration saved in outputs/checkpoint-406/config.json
Model weights saved in outputs/checkpoint-406/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-406/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-406/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-609
Configuration saved in outputs/checkpoint-609/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/139804 [00:00<?, ?ex/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaSentencepiece10k_continuation/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/baf8e4e3448eedf0439db2bda9d04b223c693bf70da2782a5349f4bc6bee0dee.032849a3607451735219b1e4c96ccc148fa1dee321ac1b245a3b741e6545c6ef
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaSentencepiece10k_continuation",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,


Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
1,0.6534,0.610195,0.669496,0.656728,0.624882,0.691995,0.738911,0.738911,0.717689
2,0.5726,0.563682,0.710219,0.702938,0.677663,0.730172,0.787306,0.787306,0.772575
3,0.5421,0.54003,0.72605,0.714449,0.67738,0.75581,0.809596,0.809596,0.799731
4,0.5066,0.53446,0.736779,0.747484,0.770028,0.726222,0.816334,0.816334,0.805671
5,0.4791,0.520775,0.739354,0.754778,0.792837,0.720205,0.821782,0.821782,0.813743
6,0.4379,0.537755,0.745315,0.740716,0.719039,0.76374,0.826466,0.826466,0.81879
7,0.3905,0.593559,0.729007,0.703408,0.635156,0.788095,0.825092,0.825092,0.81732
8,0.3375,0.624163,0.730485,0.71942,0.682941,0.760017,0.818394,0.818394,0.809452
9,0.2949,0.668886,0.733346,0.740559,0.752215,0.729258,0.81501,0.81501,0.804347
10,0.2517,0.736018,0.726336,0.724126,0.709896,0.738938,0.811078,0.811078,0.80198


***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-1311
Configuration saved in outputs/checkpoint-1311/config.json
Model weights saved in outputs/checkpoint-1311/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1311/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1311/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-2622
Configuration saved in outputs/checkpoint-2622/config.json
Model weights saved in outputs/checkpoint-2622/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2622/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2622/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 32
Saving model checkpoint to outputs/checkpoint-3933
Configuration saved in outputs/checkpoint-3933/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


## Outputs

In [9]:
outputs_df = pd.DataFrame(outputs)
outputs_df

Unnamed: 0,dataset,test_acc,test_f1,test_loss,test_precision,test_recall,test_auroc_macro,test_auroc_weighted,test_pr_auc,min_valid_loss_epoch,min_valid_loss_log
0,demo_coding_vs_intergenomic_seqs,0.91012,0.909076,0.224621,0.919758,0.89864,0.969634,0.969634,0.970662,5.0,"{'eval_loss': 0.22406677901744843, 'eval_accur..."
1,demo_human_or_worm,0.95152,0.951782,0.124606,0.94666,0.95696,0.991599,0.991599,0.991899,3.0,"{'eval_loss': 0.12068185955286026, 'eval_accur..."
2,human_enhancers_cohn,0.732585,0.721773,0.52629,0.752185,0.693725,0.816845,0.816845,0.805225,3.0,"{'eval_loss': 0.5226361155509949, 'eval_accura..."
3,human_enhancers_ensembl,0.85205,0.852165,0.353624,0.851506,0.852825,0.927938,0.927938,0.925206,6.0,"{'eval_loss': 0.35112687945365906, 'eval_accur..."
4,human_nontata_promoters,0.866726,0.862431,0.461015,0.983581,0.767854,0.93484,0.93484,0.95862,8.0,"{'eval_loss': 0.20274955034255981, 'eval_accur..."
5,human_ocr_ensembl,0.738756,0.751395,0.528071,0.716719,0.789597,0.8174,0.8174,0.801343,5.0,"{'eval_loss': 0.5207751393318176, 'eval_accura..."


In [10]:
# outputs_df.groupby('dataset').agg({'accuracy' : ['mean', 'sem'], 'f1' : ['mean','sem'], 'train_runtime': ['mean', 'sem']})

In [11]:
# saving outputs to csv file
outputs_df.to_csv(OUTPUT_PATH, index=False)