In [1]:
### Parameters
RANDOMIZE_WEIGHTS = True 
RESIZE_EMBEDDINGS = False #only used for using tokenizers with different vocab_size than orig. model

OUTPUT_PATH = './DEBERTA_RANDOM_Kmer7tokenizer_metrics.csv'

MODEL_NAME = "Vlasta/DNADebertaK7"
TOKENIZER_NAME = "armheb/DNA_bert_6"
K = 7
STRIDE = 1

# All datasets
# DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
#  ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
#  ('human_ensembl_regulatory', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]

# Quick check dataset
# DATASETS = [('demo_human_or_worm', 0)]


# Binary classification datasets (without human_ensembl_regulatory)
DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
 ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
  ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


# if ensemble refuses connection - "[Errno 104] Connection reset by peer", use attribute use_cloud_cache=True
BENCHMARKS_FOLDER = '/home/jovyan/.genomic_benchmarks'
USE_CLOUD_CACHE = True
# if less than 1, only this fraction of each dataset is used
DATASET_THINING = 1 

BATCH_SIZE = 16
ACCUMULATION = 4
LEARNING_RATE = 1e-5 
EPOCHS = 100 
RUNS = 1

print(DATASETS)

[('demo_coding_vs_intergenomic_seqs', 0), ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


In [2]:
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback
warmup_ratio = 0.05 #5 epochs (for 100 epochs total train)
if(RANDOMIZE_WEIGHTS):
    warmup_ratio = 0
def get_trainargs():
    return TrainingArguments(
        'outputs', 
        learning_rate=LEARNING_RATE, 
        warmup_ratio=warmup_ratio, 
        lr_scheduler_type='linear',
        fp16=True,
        evaluation_strategy="epoch", 
        per_device_train_batch_size=BATCH_SIZE, 
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=ACCUMULATION,
        num_train_epochs=EPOCHS, 
        weight_decay=0.01, 
        save_strategy='epoch',
        seed=randrange(1,10001), 
        report_to='none',
        load_best_model_at_end=True,
    )
#early stopping 5 epochs
callbacks= [
    EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.0),
]



In [3]:
from itertools import product
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
if(K is not None and K>6):
    alphabet = ('A', 'C', 'T', 'G')
    vocab = list(map(''.join, product(alphabet, repeat=K)))
    tokenizer.add_tokens(vocab)

In [4]:
def kmers_strideK(s, k=K):
    return [s[i:i + k] for i in range(0, len(s), k) if i + k <= len(s)]

def kmers_stride1(s, k=K):
    return [s[i:i + k] for i in range(0, len(s)-k+1)]

if (STRIDE == 1):
  kmers = kmers_stride1
else:
  kmers = kmers_strideK

# function used for the actual tokenization
if(K is not None):
    def tok_func(x): return tokenizer(" ".join(kmers(x["seq"])), truncation=True)
else:
    def tok_func(x): return tokenizer(x["seq"], truncation=True)

# example
example = tok_func({'seq': 'ATGGAAAGAGGCACCATTCT'})    
print(example)
tokenizer.decode(example['input_ids'])

{'input_ids': [2, 7109, 16136, 19473, 16440, 4308, 4930, 7417, 17366, 8010, 19737, 17495, 8527, 5422, 9387, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


'[CLS] ATGGAAA TGGAAAG GGAAAGA GAAAGAG AAAGAGG AAGAGGC AGAGGCA GAGGCAC AGGCACC GGCACCA GCACCAT CACCATT ACCATTC CCATTCT [SEP]'

## Download benchmark datasets and tokenizer

In [5]:
from genomic_benchmarks.loc2seq import download_dataset
from genomic_benchmarks.data_check.info import is_downloaded
from pathlib import Path
from tqdm.autonotebook import tqdm

for dataset_name, dataset_version in tqdm(DATASETS):
    if not is_downloaded(dataset_name):
        download_dataset(dataset_name, version=dataset_version, use_cloud_cache=USE_CLOUD_CACHE)

benchmark_root = Path(BENCHMARKS_FOLDER)

  0%|          | 0/6 [00:00<?, ?it/s]

## Function to extract dataframe metrics row from training logs

In [6]:
def get_log_from_history(history, dataset_name):
    eval_dicts = [x for x in history if 'eval_loss' in x]
    test_dicts = [x for x in history if 'test_loss' in x]
    test_log = test_dicts[0]
    test_acc = test_log['test_accuracy']
    test_f1 = test_log['test_f1']
    test_loss = test_log['test_loss']
    test_precision = test_log['test_precision']
    test_recall = test_log['test_recall']
    test_auroc_macro = test_log['test_rocauc_0_roc_auc']
    test_auroc_weighted = test_log['test_rocauc_1_roc_auc']
    test_pr_auc = test_log['test_pr_auc']
    
    
    min_loss_dict = min(eval_dicts, key=lambda x: x['eval_loss'])
    min_loss_epoch = min_loss_dict['epoch']
    # max_f1_dict = max(eval_dicts, key=lambda x: x['eval_f1'])
    # max_acc_dict = max(eval_dicts, key=lambda x: x['eval_accuracy'])
    row = {
        'dataset':dataset_name,
        'test_acc':test_acc,
        'test_f1':test_f1,
        'test_loss':test_loss,
        'test_precision':test_precision,
        'test_recall':test_recall,
        'test_auroc_macro':test_auroc_macro,
        'test_auroc_weighted':test_auroc_weighted,
        'test_pr_auc':test_pr_auc,
        
        'min_valid_loss_epoch':min_loss_epoch,
        'min_valid_loss_log':min_loss_dict,
        # 'max_valid_f1_log':max_f1_dict,
        # 'max_valid_acc_log':max_acc_dict,
    }
    return row

## Looping through datasets, fine-tuning the model for each of them, logging metrics

In [7]:
import evaluate
binary_metrics = evaluate.combine([
    'accuracy',
    'f1',
    'recall',
    'precision',
    #Order of roc_auc matters for logging -> macro first, then weighted
    evaluate.load('roc_auc', average='macro'),
    evaluate.load('roc_auc', average='weighted'),
    evaluate.load("Vlasta/pr_auc"),
])
# binary_metrics.compute(references=[0,1,1,1], predictions=[0,0,1,1], prediction_scores=[0.4,0.3,0.6,0.9])


In [8]:
import pandas as pd
import numpy as np
from random import random, randrange
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric
import torch

def compute_metrics_binary(eval_preds):
    logits, labels = eval_preds
    prediction_scores = torch.nn.functional.softmax(
        torch.from_numpy(logits).double(), dim=-1).numpy() 
    # predictions = np.argmax(logits, axis=-1) #equivalent
    predictions = np.argmax(prediction_scores, axis=-1)
    return binary_metrics.compute(
        predictions=predictions, 
        references=labels, 
        prediction_scores=prediction_scores[:,1] #taking only prediction percentage for the label 1
    )
    
#TODO human_ensembl_regulatory dataset multilabel metrics
def compute_metrics_multi(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

outputs = []

for dataset_name, dataset_version in tqdm(DATASETS):
    labels = sorted([x.stem for x in (benchmark_root / dataset_name / 'train').iterdir()])

    tmp_dict = {}

    for split in ['train', 'test']:
        for nlabel, label in enumerate(labels):
            for f in (benchmark_root / dataset_name / split / label).glob('*.txt'):
                txt = f.read_text()
                if not DATASET_THINING or DATASET_THINING==1:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)
                elif random() < DATASET_THINING:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)

    df = pd.DataFrame.from_dict(tmp_dict).T.rename(columns = {0: "dset", 1: "cat", 2: "seq"})

    ds = Dataset.from_pandas(df)

    tok_ds = ds.map(tok_func, batched=False, remove_columns=['__index_level_0__', 'seq'])
    tok_ds = tok_ds.rename_columns({'cat':'labels'})

    dds = DatasetDict({
        'train': tok_ds.filter(lambda x: x["dset"] == "train").remove_columns('dset'),
        'test':  tok_ds.filter(lambda x: x["dset"] == "test").remove_columns('dset')
    })
    train_valid_split = dds['train'].train_test_split(test_size=0.2, shuffle=True, seed=42)
    dds['train']=train_valid_split['train']
    dds['valid']=train_valid_split['test']

    compute_metrics = compute_metrics_binary if len(labels) == 2 else compute_metrics_multi

    for _ in range(RUNS):
        model_cls = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(labels))
        if(RANDOMIZE_WEIGHTS):
            # model_cls.init_weights() #Alternative
            model_cls = AutoModelForSequenceClassification.from_config(model_cls.config)   
            if(RESIZE_EMBEDDINGS):
                model_cls.resize_token_embeddings(len(tokenizer))
            
        args = get_trainargs()
        
        trainer = Trainer(model_cls, args, train_dataset=dds['train'], eval_dataset=dds['valid'],
                          tokenizer=tokenizer, compute_metrics=compute_metrics, 
                          callbacks=callbacks)
        trainer.train()
        trainer.evaluate(dds['test'], metric_key_prefix='test')
        training_log = get_log_from_history(trainer.state.log_history, dataset_name=dataset_name)
        outputs.append(training_log)
  

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

Downloading:   0%|          | 0.00/705 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226M [00:00<?, ?B/s]

Some weights of the model checkpoint at Vlasta/DNADebertaK7 were not used when initializing DebertaForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at Vlasta/DNADeb

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.4209,0.254879,0.897467,0.893001,0.855847,0.933527,0.964487,0.964487,0.962525
1,0.2357,0.243312,0.906,0.907735,0.924923,0.891173,0.967246,0.967246,0.964978
2,0.2002,0.237611,0.906333,0.904958,0.891986,0.918314,0.967175,0.967175,0.964856
3,0.1887,0.250502,0.905,0.905698,0.912522,0.898975,0.966243,0.966243,0.963904
4,0.1746,0.259896,0.903,0.903776,0.911188,0.896484,0.965173,0.965173,0.96293
5,0.1643,0.272209,0.9016,0.900566,0.891319,0.910007,0.964319,0.964319,0.961993
6,0.156,0.271728,0.8996,0.899627,0.899987,0.899267,0.962919,0.962919,0.960723
7,0.1428,0.281992,0.898733,0.899357,0.905054,0.893732,0.962226,0.962226,0.960111


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.3339,0.140131,0.948267,0.94749,0.933591,0.961808,0.988553,0.988553,0.98913
1,0.1208,0.13667,0.949467,0.948217,0.925457,0.972125,0.990389,0.990389,0.990846
2,0.0961,0.131615,0.952067,0.952159,0.954127,0.950199,0.990318,0.990318,0.990573
3,0.0738,0.164159,0.950067,0.949003,0.929324,0.969533,0.990398,0.990398,0.99064
4,0.0646,0.160812,0.951933,0.951412,0.941326,0.961717,0.990048,0.990048,0.990306
5,0.0571,0.183811,0.9508,0.950536,0.945593,0.955532,0.989754,0.989754,0.98989
6,0.0492,0.205981,0.9494,0.949444,0.950393,0.948496,0.989458,0.989458,0.989683
7,0.0419,0.209591,0.946867,0.947243,0.954127,0.940457,0.989267,0.989267,0.989397


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/27791 [00:00<?, ?ex/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,No log,0.62169,0.652675,0.547217,0.416667,0.796903,0.774754,0.774754,0.771425
1,0.608100,0.539292,0.722955,0.729445,0.741429,0.717842,0.801943,0.801943,0.79717
2,0.608100,0.553506,0.71504,0.688679,0.625714,0.765734,0.808594,0.808594,0.806689
3,0.470000,0.561246,0.721276,0.72161,0.717143,0.726133,0.806808,0.806808,0.804802
4,0.470000,0.588736,0.718398,0.734509,0.773333,0.699397,0.800664,0.800664,0.799394
5,0.394800,0.630441,0.709763,0.703722,0.684286,0.724294,0.793418,0.793418,0.79275
6,0.394800,0.677274,0.708323,0.727842,0.774286,0.686655,0.788443,0.788443,0.788711


***** Running Evaluation *****
  Num examples = 4169
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-260
Configuration saved in outputs/checkpoint-260/config.json
Model weights saved in outputs/checkpoint-260/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-260/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-260/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-520
Configuration saved in outputs/checkpoint-520/config.json
Model weights saved in outputs/checkpoint-520/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-520/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-520/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-780
Configuration saved in outputs/checkpoint-780/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/123872 [00:00<?, ?ex/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.5269,0.521304,0.747699,0.710724,0.616199,0.839504,0.847624,0.847624,0.843902
1,0.4582,0.472567,0.781228,0.796312,0.850203,0.748846,0.86694,0.86694,0.870305
2,0.4196,0.446974,0.798073,0.793597,0.771774,0.816689,0.873685,0.873685,0.875875
3,0.3934,0.448329,0.799688,0.808795,0.842285,0.777866,0.879408,0.879408,0.881986
4,0.3592,0.447454,0.808245,0.813015,0.828804,0.797816,0.884081,0.884081,0.886381
5,0.3191,0.465973,0.803778,0.817572,0.874171,0.767857,0.887193,0.887193,0.887124
6,0.2977,0.488199,0.806846,0.81913,0.86957,0.774221,0.885744,0.885744,0.884834
7,0.2687,0.472556,0.818524,0.81966,0.819923,0.819397,0.886639,0.886639,0.885222


***** Running Evaluation *****
  Num examples = 18581
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1161
Configuration saved in outputs/checkpoint-1161/config.json
Model weights saved in outputs/checkpoint-1161/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1161/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1161/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2322
Configuration saved in outputs/checkpoint-2322/config.json
Model weights saved in outputs/checkpoint-2322/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2322/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2322/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-3483
Configuration saved in outputs/checkpoint-3483/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/25284 [00:00<?, ?ex/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,No log,0.298063,0.897538,0.759567,0.706989,0.820593,0.894445,0.894445,0.822216
1,No log,0.23734,0.916615,0.810886,0.780914,0.843251,0.930852,0.930852,0.891038
2,0.336200,0.221932,0.937846,0.844615,0.737903,0.98741,0.94312,0.94312,0.915139
3,0.336200,0.206618,0.945538,0.86918,0.790323,0.965517,0.948447,0.948447,0.924212
4,0.116500,0.207115,0.944,0.871469,0.829301,0.918155,0.949613,0.949613,0.926581
5,0.116500,0.248172,0.946462,0.871111,0.790323,0.970297,0.944809,0.944809,0.925195
6,0.116500,0.257516,0.948308,0.878261,0.814516,0.95283,0.941843,0.941843,0.923551
7,0.049800,0.262832,0.951385,0.885174,0.818548,0.963608,0.934543,0.934543,0.911863
8,0.049800,0.255867,0.951692,0.887455,0.831989,0.950845,0.932901,0.932901,0.912505


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-203
Configuration saved in outputs/checkpoint-203/config.json
Model weights saved in outputs/checkpoint-203/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-203/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-203/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-406
Configuration saved in outputs/checkpoint-406/config.json
Model weights saved in outputs/checkpoint-406/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-406/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-406/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-609
Configuration saved in outputs/checkpoint-609/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/139804 [00:00<?, ?ex/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6152,0.572747,0.702065,0.724976,0.776155,0.680129,0.772173,0.772173,0.758422
1,0.5498,0.552821,0.718754,0.732104,0.759566,0.706558,0.791967,0.791967,0.780183
2,0.5174,0.562704,0.713748,0.747784,0.838737,0.674627,0.797817,0.797817,0.786437
3,0.4992,0.54472,0.726718,0.731783,0.736852,0.726783,0.801629,0.801629,0.791854
4,0.4899,0.588116,0.704783,0.748854,0.869934,0.657361,0.801767,0.801767,0.792238
5,0.4715,0.556899,0.721091,0.707828,0.667766,0.753002,0.803087,0.803087,0.793391
6,0.4525,0.577096,0.707453,0.747541,0.856079,0.663429,0.799831,0.799831,0.792095
7,0.4317,0.603253,0.720185,0.749166,0.825919,0.685466,0.801338,0.801338,0.793351
8,0.4146,0.600806,0.713271,0.744443,0.825448,0.677916,0.794773,0.794773,0.784562


***** Running Evaluation *****
  Num examples = 20971
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1310
Configuration saved in outputs/checkpoint-1310/config.json
Model weights saved in outputs/checkpoint-1310/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1310/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1310/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2620
Configuration saved in outputs/checkpoint-2620/config.json
Model weights saved in outputs/checkpoint-2620/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2620/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2620/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-3930
Configuration saved in outputs/checkpoint-3930/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


## Outputs

In [9]:
outputs_df = pd.DataFrame(outputs)
outputs_df

Unnamed: 0,dataset,test_acc,test_f1,test_loss,test_precision,test_recall,test_auroc_macro,test_auroc_weighted,test_pr_auc,min_valid_loss_epoch,min_valid_loss_log
0,demo_coding_vs_intergenomic_seqs,0.901,0.899452,0.251458,0.913743,0.8856,0.963838,0.963838,0.962015,3.0,"{'eval_loss': 0.23761099576950073, 'eval_accur..."
1,demo_human_or_worm,0.9554,0.955622,0.124257,0.950891,0.9604,0.991569,0.991569,0.991967,3.0,"{'eval_loss': 0.13161461055278778, 'eval_accur..."
2,human_enhancers_cohn,0.720927,0.724456,0.546071,0.715408,0.733736,0.797471,0.797471,0.788885,2.0,"{'eval_loss': 0.5392917990684509, 'eval_accura..."
3,human_enhancers_ensembl,0.795221,0.790319,0.452315,0.809701,0.771844,0.870675,0.870675,0.870248,3.0,"{'eval_loss': 0.4469737112522125, 'eval_accura..."
4,human_nontata_promoters,0.871043,0.867146,0.480199,0.986508,0.77355,0.946988,0.946988,0.965614,4.0,"{'eval_loss': 0.2066183090209961, 'eval_accura..."
5,human_ocr_ensembl,0.723764,0.725624,0.547127,0.72077,0.730545,0.79997,0.79997,0.786248,4.0,"{'eval_loss': 0.5447200536727905, 'eval_accura..."


In [10]:
# outputs_df.groupby('dataset').agg({'accuracy' : ['mean', 'sem'], 'f1' : ['mean','sem'], 'train_runtime': ['mean', 'sem']})

In [11]:
# saving outputs to csv file
outputs_df.to_csv(OUTPUT_PATH, index=False)