In [1]:
### Parameters
RANDOMIZE_WEIGHTS = False 
RESIZE_EMBEDDINGS = False #only used for using tokenizers with different vocab_size than orig. model

OUTPUT_PATH = './DEBERTA_Kmer8tokenizer_metrics.csv'

MODEL_NAME = "davidcechak/DNADebertaK8b"
TOKENIZER_NAME = "armheb/DNA_bert_6"
K = 8
STRIDE = 1

# All datasets
# DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
#  ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
#  ('human_ensembl_regulatory', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]

# Quick check dataset
# DATASETS = [('demo_human_or_worm', 0)]


# Binary classification datasets (without human_ensembl_regulatory)
DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
 ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0),
  ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


# if ensemble refuses connection - "[Errno 104] Connection reset by peer", use attribute use_cloud_cache=True
BENCHMARKS_FOLDER = '/home/jovyan/.genomic_benchmarks'
USE_CLOUD_CACHE = True
# if less than 1, only this fraction of each dataset is used
DATASET_THINING = 1 

BATCH_SIZE = 16
ACCUMULATION = 4
LEARNING_RATE = 1e-5 
EPOCHS = 100 
RUNS = 1

print(DATASETS)

[('demo_coding_vs_intergenomic_seqs', 0), ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_enhancers_ensembl', 0), ('human_nontata_promoters', 0), ('human_ocr_ensembl', 0)]


In [2]:
from transformers import TrainingArguments
from transformers import EarlyStoppingCallback
warmup_ratio = 0.05 #5 epochs (for 100 epochs total train)
if(RANDOMIZE_WEIGHTS):
    warmup_ratio = 0
def get_trainargs():
    return TrainingArguments(
        'outputs', 
        learning_rate=LEARNING_RATE, 
        warmup_ratio=warmup_ratio, 
        lr_scheduler_type='linear',
        fp16=True,
        evaluation_strategy="epoch", 
        per_device_train_batch_size=BATCH_SIZE, 
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=ACCUMULATION,
        num_train_epochs=EPOCHS, 
        weight_decay=0.01, 
        save_strategy='epoch',
        seed=randrange(1,10001), 
        report_to='none',
        load_best_model_at_end=True,
    )
#early stopping 5 epochs
callbacks= [
    EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.0),
]



In [3]:
from itertools import product
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
if(K is not None and K>6):
    alphabet = ('A', 'C', 'T', 'G')
    vocab = list(map(''.join, product(alphabet, repeat=K)))
    tokenizer.add_tokens(vocab)

In [4]:
def kmers_strideK(s, k=K):
    return [s[i:i + k] for i in range(0, len(s), k) if i + k <= len(s)]

def kmers_stride1(s, k=K):
    return [s[i:i + k] for i in range(0, len(s)-k+1)]

if (STRIDE == 1):
  kmers = kmers_stride1
else:
  kmers = kmers_strideK

# function used for the actual tokenization
if(K is not None):
    def tok_func(x): return tokenizer(" ".join(kmers(x["seq"])), truncation=True)
else:
    def tok_func(x): return tokenizer(x["seq"], truncation=True)

# example
example = tok_func({'seq': 'ATGGAAAGAGGCACCATTCT'})    
print(example)
tokenizer.decode(example['input_ids'])

{'input_ids': [2, 16136, 52241, 65592, 53460, 4930, 7417, 17366, 57162, 19737, 66647, 57679, 21806, 9387, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


'[CLS] ATGGAAAG TGGAAAGA GGAAAGAG GAAAGAGG AAAGAGGC AAGAGGCA AGAGGCAC GAGGCACC AGGCACCA GGCACCAT GCACCATT CACCATTC ACCATTCT [SEP]'

## Download benchmark datasets and tokenizer

In [5]:
from genomic_benchmarks.loc2seq import download_dataset
from genomic_benchmarks.data_check.info import is_downloaded
from pathlib import Path
from tqdm.autonotebook import tqdm

for dataset_name, dataset_version in tqdm(DATASETS):
    if not is_downloaded(dataset_name):
        download_dataset(dataset_name, version=dataset_version, use_cloud_cache=USE_CLOUD_CACHE)

benchmark_root = Path(BENCHMARKS_FOLDER)

  0%|          | 0/6 [00:00<?, ?it/s]

## Function to extract dataframe metrics row from training logs

In [6]:
def get_log_from_history(history, dataset_name):
    eval_dicts = [x for x in history if 'eval_loss' in x]
    test_dicts = [x for x in history if 'test_loss' in x]
    test_log = test_dicts[0]
    test_acc = test_log['test_accuracy']
    test_f1 = test_log['test_f1']
    test_loss = test_log['test_loss']
    test_precision = test_log['test_precision']
    test_recall = test_log['test_recall']
    test_auroc_macro = test_log['test_rocauc_0_roc_auc']
    test_auroc_weighted = test_log['test_rocauc_1_roc_auc']
    test_pr_auc = test_log['test_pr_auc']
    
    
    min_loss_dict = min(eval_dicts, key=lambda x: x['eval_loss'])
    min_loss_epoch = min_loss_dict['epoch']
    # max_f1_dict = max(eval_dicts, key=lambda x: x['eval_f1'])
    # max_acc_dict = max(eval_dicts, key=lambda x: x['eval_accuracy'])
    row = {
        'dataset':dataset_name,
        'test_acc':test_acc,
        'test_f1':test_f1,
        'test_loss':test_loss,
        'test_precision':test_precision,
        'test_recall':test_recall,
        'test_auroc_macro':test_auroc_macro,
        'test_auroc_weighted':test_auroc_weighted,
        'test_pr_auc':test_pr_auc,
        
        'min_valid_loss_epoch':min_loss_epoch,
        'min_valid_loss_log':min_loss_dict,
        # 'max_valid_f1_log':max_f1_dict,
        # 'max_valid_acc_log':max_acc_dict,
    }
    return row

## Looping through datasets, fine-tuning the model for each of them, logging metrics

In [7]:
import evaluate
binary_metrics = evaluate.combine([
    'accuracy',
    'f1',
    'recall',
    'precision',
    #Order of roc_auc matters for logging -> macro first, then weighted
    evaluate.load('roc_auc', average='macro'),
    evaluate.load('roc_auc', average='weighted'),
    evaluate.load("Vlasta/pr_auc"),
])
# binary_metrics.compute(references=[0,1,1,1], predictions=[0,0,1,1], prediction_scores=[0.4,0.3,0.6,0.9])


In [8]:
import pandas as pd
import numpy as np
from random import random, randrange
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric
import torch

def compute_metrics_binary(eval_preds):
    logits, labels = eval_preds
    prediction_scores = torch.nn.functional.softmax(
        torch.from_numpy(logits).double(), dim=-1).numpy() 
    # predictions = np.argmax(logits, axis=-1) #equivalent
    predictions = np.argmax(prediction_scores, axis=-1)
    return binary_metrics.compute(
        predictions=predictions, 
        references=labels, 
        prediction_scores=prediction_scores[:,1] #taking only prediction percentage for the label 1
    )
    
#TODO human_ensembl_regulatory dataset multilabel metrics
def compute_metrics_multi(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

outputs = []

for dataset_name, dataset_version in tqdm(DATASETS):
    labels = sorted([x.stem for x in (benchmark_root / dataset_name / 'train').iterdir()])

    tmp_dict = {}

    for split in ['train', 'test']:
        for nlabel, label in enumerate(labels):
            for f in (benchmark_root / dataset_name / split / label).glob('*.txt'):
                txt = f.read_text()
                if not DATASET_THINING or DATASET_THINING==1:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)
                elif random() < DATASET_THINING:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)

    df = pd.DataFrame.from_dict(tmp_dict).T.rename(columns = {0: "dset", 1: "cat", 2: "seq"})

    ds = Dataset.from_pandas(df)

    tok_ds = ds.map(tok_func, batched=False, remove_columns=['__index_level_0__', 'seq'])
    tok_ds = tok_ds.rename_columns({'cat':'labels'})

    dds = DatasetDict({
        'train': tok_ds.filter(lambda x: x["dset"] == "train").remove_columns('dset'),
        'test':  tok_ds.filter(lambda x: x["dset"] == "test").remove_columns('dset')
    })
    train_valid_split = dds['train'].train_test_split(test_size=0.2, shuffle=True, seed=42)
    dds['train']=train_valid_split['train']
    dds['valid']=train_valid_split['test']

    compute_metrics = compute_metrics_binary if len(labels) == 2 else compute_metrics_multi

    for _ in range(RUNS):
        model_cls = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(labels))
        if(RANDOMIZE_WEIGHTS):
            # model_cls.init_weights() #Alternative
            model_cls = AutoModelForSequenceClassification.from_config(model_cls.config)   
            if(RESIZE_EMBEDDINGS):
                model_cls.resize_token_embeddings(len(tokenizer))
            
        args = get_trainargs()
        
        trainer = Trainer(model_cls, args, train_dataset=dds['train'], eval_dataset=dds['valid'],
                          tokenizer=tokenizer, compute_metrics=compute_metrics, 
                          callbacks=callbacks)
        trainer.train()
        trainer.evaluate(dds['test'], metric_key_prefix='test')
        training_log = get_log_from_history(trainer.state.log_history, dataset_name=dataset_name)
        outputs.append(training_log)
  

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

Downloading:   0%|          | 0.00/705 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/370M [00:00<?, ?B/s]

Some weights of the model checkpoint at davidcechak/DNADebertaK8b were not used when initializing DebertaForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at davidce

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6385,0.350201,0.882933,0.87928,0.85278,0.907478,0.954651,0.954651,0.953234
1,0.3154,0.258538,0.902667,0.89978,0.873983,0.927147,0.966284,0.966284,0.965724
2,0.2266,0.261869,0.904,0.898491,0.849847,0.953043,0.970619,0.970619,0.971208
3,0.1687,0.234953,0.913067,0.910575,0.885318,0.937315,0.972879,0.972879,0.973629
4,0.1242,0.322767,0.902333,0.896327,0.844513,0.954916,0.970983,0.970983,0.972554
5,0.0969,0.39114,0.896467,0.888937,0.828777,0.958513,0.963281,0.963281,0.968063
6,0.0747,0.428969,0.9006,0.894218,0.840379,0.955428,0.964512,0.964512,0.968263
7,0.0601,0.428155,0.9082,0.904182,0.866382,0.945431,0.959098,0.959098,0.964208
8,0.0459,0.430846,0.907,0.904668,0.882651,0.92781,0.953692,0.953692,0.953368


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/278836dd585e1bc9074b24351191c8c2f0e47de5d9fff14e48f9b11a5b92b512.b628e4d5e7f344695b21fd7ca542aa0f8188268bc6c6b3380dc1e19a16c2447e
Model config DebertaConfig {
  "_name_or_path": "davidcechak/DNADebertaK8b",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformer

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6553,0.311404,0.918133,0.921724,0.964129,0.882892,0.978231,0.978231,0.974758
1,0.2401,0.15716,0.9504,0.950826,0.959195,0.942603,0.99017,0.99017,0.99035
2,0.1257,0.122596,0.958533,0.95883,0.965862,0.951899,0.992958,0.992958,0.993274
3,0.0756,0.190998,0.944933,0.94716,0.987198,0.910242,0.993557,0.993557,0.993311
4,0.0454,0.15101,0.956733,0.957339,0.971063,0.943998,0.993788,0.993788,0.993902
5,0.0287,0.153284,0.9612,0.961334,0.964795,0.957898,0.994016,0.994016,0.994166
6,0.0168,0.265644,0.951667,0.95315,0.983464,0.924649,0.993985,0.993985,0.994144
7,0.0099,0.295142,0.952333,0.953755,0.983198,0.926024,0.99352,0.99352,0.99331


***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-937
Configuration saved in outputs/checkpoint-937/config.json
Model weights saved in outputs/checkpoint-937/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-937/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-937/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1874
Configuration saved in outputs/checkpoint-1874/config.json
Model weights saved in outputs/checkpoint-1874/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1874/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1874/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 15000
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2811
Configuration saved in outputs/checkpoint-2811/config.json
Model weights saved i

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/27791 [00:00<?, ?ex/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/278836dd585e1bc9074b24351191c8c2f0e47de5d9fff14e48f9b11a5b92b512.b628e4d5e7f344695b21fd7ca542aa0f8188268bc6c6b3380dc1e19a16c2447e
Model config DebertaConfig {
  "_name_or_path": "davidcechak/DNADebertaK8b",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformer

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,No log,0.635572,0.685536,0.637945,0.55,0.759369,0.78811,0.78811,0.786583
1,0.633300,0.556901,0.728232,0.760516,0.856667,0.68377,0.812682,0.812682,0.807688
2,0.633300,0.54839,0.729192,0.758503,0.844286,0.688544,0.819227,0.819227,0.819919
3,0.495300,0.596893,0.712641,0.756306,0.885238,0.660156,0.816715,0.816715,0.81551
4,0.495300,0.61161,0.729911,0.74922,0.800952,0.703766,0.811309,0.811309,0.801708
5,0.362600,0.705708,0.727033,0.758694,0.851905,0.683869,0.798417,0.798417,0.753275
6,0.362600,0.755803,0.720556,0.743222,0.802857,0.691834,0.789598,0.789598,0.757273
7,0.256700,0.838616,0.704246,0.749848,0.88,0.653234,0.766329,0.766329,0.70032


***** Running Evaluation *****
  Num examples = 4169
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-260
Configuration saved in outputs/checkpoint-260/config.json
Model weights saved in outputs/checkpoint-260/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-260/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-260/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-520
Configuration saved in outputs/checkpoint-520/config.json
Model weights saved in outputs/checkpoint-520/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-520/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-520/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 4169
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-780
Configuration saved in outputs/checkpoint-780/config.json
Model weights saved in outputs/

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/123872 [00:00<?, ?ex/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

  0%|          | 0/124 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/278836dd585e1bc9074b24351191c8c2f0e47de5d9fff14e48f9b11a5b92b512.b628e4d5e7f344695b21fd7ca542aa0f8188268bc6c6b3380dc1e19a16c2447e
Model config DebertaConfig {
  "_name_or_path": "davidcechak/DNADebertaK8b",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformer

Downloading:   0%|          | 0.00/370M [00:00<?, ?B/s]

storing https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/pytorch_model.bin in cache at /home/jovyan/.cache/huggingface/transformers/403ad776a219e74251dd29a4ebd75b74cecba2d52f4bcdd092af144b0ae6dd10.3dc5eb0973cd9baee8beb20ef034e6c4d40ed5e1af864a515c66fb131b285406
creating metadata file for /home/jovyan/.cache/huggingface/transformers/403ad776a219e74251dd29a4ebd75b74cecba2d52f4bcdd092af144b0ae6dd10.3dc5eb0973cd9baee8beb20ef034e6c4d40ed5e1af864a515c66fb131b285406
loading weights file https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/pytorch_model.bin from cache at /home/jovyan/.cache/huggingface/transformers/403ad776a219e74251dd29a4ebd75b74cecba2d52f4bcdd092af144b0ae6dd10.3dc5eb0973cd9baee8beb20ef034e6c4d40ed5e1af864a515c66fb131b285406
Some weights of the model checkpoint at davidcechak/DNADebertaK8b were not used when initializing DebertaForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictio

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6021,0.545631,0.741995,0.766032,0.839718,0.704235,0.813686,0.813686,0.78528
1,0.4903,0.458061,0.791669,0.808471,0.874171,0.751956,0.880273,0.880273,0.874522
2,0.416,0.453284,0.797212,0.820246,0.919859,0.7401,0.90305,0.90305,0.899329
3,0.3131,0.380786,0.839621,0.848269,0.89129,0.809209,0.9229,0.9229,0.920151
4,0.2199,0.391281,0.850546,0.860669,0.917719,0.810298,0.933321,0.933321,0.929228
5,0.1472,0.448267,0.848878,0.859922,0.922213,0.805514,0.932588,0.932588,0.928124
6,0.1034,0.524661,0.845756,0.858901,0.93334,0.795459,0.934645,0.934645,0.931021
7,0.075,0.601627,0.841289,0.855618,0.934945,0.788699,0.933686,0.933686,0.925691
8,0.0581,0.786145,0.831817,0.849303,0.942221,0.773066,0.930071,0.930071,0.917719


***** Running Evaluation *****
  Num examples = 18581
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1161
Configuration saved in outputs/checkpoint-1161/config.json
Model weights saved in outputs/checkpoint-1161/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1161/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1161/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2322
Configuration saved in outputs/checkpoint-2322/config.json
Model weights saved in outputs/checkpoint-2322/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2322/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2322/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 18581
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-3483
Configuration saved in outputs/checkpoint-3483/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/25284 [00:00<?, ?ex/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

  0%|          | 0/26 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/278836dd585e1bc9074b24351191c8c2f0e47de5d9fff14e48f9b11a5b92b512.b628e4d5e7f344695b21fd7ca542aa0f8188268bc6c6b3380dc1e19a16c2447e
Model config DebertaConfig {
  "_name_or_path": "davidcechak/DNADebertaK8b",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformer

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,No log,0.50381,0.771077,0.0,0.0,0.0,0.870998,0.870998,0.783391
1,No log,0.328387,0.908,0.791055,0.760753,0.823872,0.891964,0.891964,0.837386
2,0.488300,0.264519,0.925846,0.822924,0.752688,0.907618,0.91374,0.91374,0.872093
3,0.488300,0.22399,0.936923,0.852199,0.794355,0.919129,0.934418,0.934418,0.896273
4,0.229900,0.207222,0.943077,0.862658,0.780914,0.963516,0.948989,0.948989,0.925209
5,0.229900,0.20443,0.935692,0.861864,0.876344,0.847854,0.95215,0.95215,0.93239
6,0.229900,0.192288,0.946769,0.883502,0.88172,0.88529,0.960961,0.960961,0.939037
7,0.109700,0.172902,0.955077,0.897472,0.858871,0.939706,0.962786,0.962786,0.946159
8,0.109700,0.188073,0.956,0.900763,0.872312,0.931133,0.962021,0.962021,0.945786
9,0.053100,0.196102,0.955077,0.893895,0.826613,0.973101,0.963157,0.963157,0.94663


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to outputs/checkpoint-203
Configuration saved in outputs/checkpoint-203/config.json
Model weights saved in outputs/checkpoint-203/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-203/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-203/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-406
Configuration saved in outputs/checkpoint-406/config.json
Model weights saved in outputs/checkpoint-406/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-406/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-406/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-609
Configuration saved in outputs/che

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


  0%|          | 0/139804 [00:00<?, ?ex/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

  0%|          | 0/140 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/davidcechak/DNADebertaK8b/resolve/main/config.json from cache at /home/jovyan/.cache/huggingface/transformers/278836dd585e1bc9074b24351191c8c2f0e47de5d9fff14e48f9b11a5b92b512.b628e4d5e7f344695b21fd7ca542aa0f8188268bc6c6b3380dc1e19a16c2447e
Model config DebertaConfig {
  "_name_or_path": "davidcechak/DNADebertaK8b",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformer

Epoch,Training Loss,Validation Loss,Accuracy,F1,Recall,Precision,Rocauc 0 Roc Auc,Rocauc 1 Roc Auc,Pr Auc
0,0.6617,0.609639,0.673835,0.700656,0.754477,0.654003,0.733673,0.733673,0.706219
1,0.5719,0.567825,0.706261,0.74442,0.845523,0.664913,0.785535,0.785535,0.766002
2,0.5263,0.541593,0.729293,0.763409,0.863242,0.684273,0.815334,0.815334,0.802457
3,0.4401,0.535412,0.741929,0.778487,0.896324,0.688034,0.842835,0.842835,0.832886
4,0.3672,0.530817,0.758571,0.784177,0.866918,0.715853,0.848638,0.848638,0.840528
5,0.2864,0.642199,0.753421,0.778648,0.85721,0.713277,0.840705,0.840705,0.830182
6,0.2108,0.710671,0.749607,0.770408,0.830349,0.718538,0.83118,0.83118,0.817859
7,0.1497,0.833393,0.752945,0.770315,0.81885,0.727212,0.82466,0.82466,0.791684
8,0.1173,0.918585,0.746936,0.760849,0.795664,0.728953,0.809244,0.809244,0.768286
9,0.0964,0.968436,0.748701,0.760476,0.788501,0.734375,0.798489,0.798489,0.747134


***** Running Evaluation *****
  Num examples = 20971
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-1310
Configuration saved in outputs/checkpoint-1310/config.json
Model weights saved in outputs/checkpoint-1310/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-1310/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-1310/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-2620
Configuration saved in outputs/checkpoint-2620/config.json
Model weights saved in outputs/checkpoint-2620/pytorch_model.bin
tokenizer config file saved in outputs/checkpoint-2620/tokenizer_config.json
Special tokens file saved in outputs/checkpoint-2620/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 20971
  Batch size = 16
Saving model checkpoint to outputs/checkpoint-3930
Configuration saved in outputs/checkpoint-3930/config.json
Model weights sa

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


## Outputs

In [9]:
outputs_df = pd.DataFrame(outputs)
outputs_df

Unnamed: 0,dataset,test_acc,test_f1,test_loss,test_precision,test_recall,test_auroc_macro,test_auroc_weighted,test_pr_auc,min_valid_loss_epoch,min_valid_loss_log
0,demo_coding_vs_intergenomic_seqs,0.91148,0.908926,0.240552,0.935927,0.88344,0.971529,0.971529,0.972589,4.0,"{'eval_loss': 0.23495250940322876, 'eval_accur..."
1,demo_human_or_worm,0.95808,0.958557,0.122,0.947764,0.9696,0.993469,0.993469,0.993667,3.0,"{'eval_loss': 0.12259555608034134, 'eval_accur..."
2,human_enhancers_cohn,0.722798,0.750065,0.554903,0.682892,0.831894,0.811191,0.811191,0.796354,3.0,"{'eval_loss': 0.5483898520469666, 'eval_accura..."
3,human_enhancers_ensembl,0.83442,0.842516,0.396149,0.803244,0.885825,0.918401,0.918401,0.915678,4.0,"{'eval_loss': 0.38078615069389343, 'eval_accur..."
4,human_nontata_promoters,0.912553,0.913755,0.329312,0.985866,0.851475,0.966116,0.966116,0.977803,8.0,"{'eval_loss': 0.17290177941322327, 'eval_accur..."
5,human_ocr_ensembl,0.755579,0.779882,0.53776,0.709351,0.865988,0.845863,0.845863,0.833512,5.0,"{'eval_loss': 0.5308170318603516, 'eval_accura..."


In [10]:
# outputs_df.groupby('dataset').agg({'accuracy' : ['mean', 'sem'], 'f1' : ['mean','sem'], 'train_runtime': ['mean', 'sem']})

In [11]:
# saving outputs to csv file
outputs_df.to_csv(OUTPUT_PATH, index=False)