In [1]:
!pip install -qq transformers genomic-benchmarks datasets

[K     |████████████████████████████████| 4.4 MB 5.1 MB/s 
[K     |████████████████████████████████| 362 kB 73.7 MB/s 
[K     |████████████████████████████████| 6.6 MB 67.0 MB/s 
[K     |████████████████████████████████| 101 kB 14.8 MB/s 
[K     |████████████████████████████████| 596 kB 71.5 MB/s 
[K     |████████████████████████████████| 2.3 MB 78.4 MB/s 
[K     |████████████████████████████████| 271 kB 87.2 MB/s 
[K     |████████████████████████████████| 1.1 MB 54.5 MB/s 
[K     |████████████████████████████████| 140 kB 2.4 MB/s 
[K     |████████████████████████████████| 212 kB 69.7 MB/s 
[K     |████████████████████████████████| 127 kB 69.7 MB/s 
[K     |████████████████████████████████| 144 kB 68.1 MB/s 
[K     |████████████████████████████████| 94 kB 4.3 MB/s 
[?25h  Building wheel for genomic-benchmarks (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
### Parameters
MODEL_NAME = "Vlasta/DNADebertaK7" #Original DNABert model
TOKENIZER_NAME = "armheb/DNA_bert_6"
K = 7
STRIDE = 1

# if less than 1, only this fraction of each dataset is used
DATASET_THINING = 1

BENCHMARKS_FOLDER = '/root/.genomic_benchmarks'
# BENCHMARKS_FOLDER = '/home/jovyan/.genomic_benchmarks/' (for INFRA HUB)

DATASETS = [('demo_coding_vs_intergenomic_seqs', 0),
 ('demo_human_or_worm', 0), ('human_enhancers_cohn', 0), ('human_nontata_promoters', 0)]

BATCH_SIZE = 8
ACCUMULATION = 8

LEARNING_RATE = 1e-5
EPOCHS = 4
RUNS = 5

# do not forget to attach drive
OUTPUT_PATH = 'drive/MyDrive/genomic_benchmarks/RandomizedDNADebertaK7.csv'

## Download benchmark datasets and tokenizer

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

In [3]:
from itertools import product

alphabet = ('A', 'C', 'T', 'G')
vocab = list(map(''.join, product(alphabet, repeat=K)))

tokenizer.add_tokens(vocab)

16384

In [4]:
from genomic_benchmarks.loc2seq import download_dataset
from genomic_benchmarks.data_check.info import is_downloaded
from pathlib import Path
from tqdm.autonotebook import tqdm

for dataset_name, dataset_version in tqdm(DATASETS):
    if not is_downloaded(dataset_name):
        download_dataset(dataset_name, version=dataset_version, use_cloud_cache=True)

benchmark_root = Path(BENCHMARKS_FOLDER)

  0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
def kmers_strideK(s, k=K):
    return [s[i:i + k] for i in range(0, len(s), k) if i + k <= len(s)]

def kmers_stride1(s, k=K):
    return [s[i:i + k] for i in range(0, len(s)-k+1)]

if (STRIDE == 1):
  kmers = kmers_stride1
else:
  kmers = kmers_strideK

# function used for the actual tokenization
def tok_func(x): return tokenizer(" ".join(kmers(x["seq"])))

# example
tok_func({'seq': 'ATGGAAAGAGGCACCATTCT'})    

{'input_ids': [2, 7109, 16136, 19473, 16440, 4308, 4930, 7417, 17366, 8010, 19737, 17495, 8527, 5422, 9387, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

## Looping through datasets, fine-tuning the model for each of them, logging metrics

In [6]:
import pandas as pd
import numpy as np
from random import random, randrange
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric

def compute_metrics_binary(eval_preds):
    metric = load_metric("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

def compute_metrics_multi(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

outputs = []

for dataset_name, dataset_version in tqdm(DATASETS):
    

    labels = sorted([x.stem for x in (benchmark_root / dataset_name / 'train').iterdir()])

    tmp_dict = {}

    for split in ['train', 'test']:
        for nlabel, label in enumerate(labels):
            for f in (benchmark_root / dataset_name / split / label).glob('*.txt'):
                txt = f.read_text()
                if not DATASET_THINING or DATASET_THINING==1:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)
                elif random() < DATASET_THINING:
                    tmp_dict[f"{label} {f.stem}"] = (split, nlabel, txt)

    df = pd.DataFrame.from_dict(tmp_dict).T.rename(columns = {0: "dset", 1: "cat", 2: "seq"})

    ds = Dataset.from_pandas(df)

    tok_ds = ds.map(tok_func, batched=False, remove_columns=['__index_level_0__', 'seq'])
    tok_ds = tok_ds.rename_columns({'cat':'labels'})

    dds = DatasetDict({
        'train': tok_ds.filter(lambda x: x["dset"] == "train").remove_columns('dset'),
        'test':  tok_ds.filter(lambda x: x["dset"] == "test").remove_columns('dset')
    })

    compute_metrics = compute_metrics_binary if len(labels) == 2 else compute_metrics_multi

    for _ in range(RUNS):

        model_cls = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(labels))
        model_cls.deberta.encoder.layer.apply(model_cls.deberta._init_weights)

        args = TrainingArguments('outputs', learning_rate=LEARNING_RATE, warmup_ratio=0.1, lr_scheduler_type='cosine', fp16=True,
            evaluation_strategy="epoch", per_device_train_batch_size=BATCH_SIZE, per_device_eval_batch_size=BATCH_SIZE*2, gradient_accumulation_steps=ACCUMULATION,
            num_train_epochs=EPOCHS, weight_decay=0.01, save_steps=100000, seed=randrange(1,10001), report_to='none')
        
        trainer = Trainer(model_cls, args, train_dataset=dds['train'], eval_dataset=dds['test'],
                          tokenizer=tokenizer, compute_metrics=compute_metrics)
        trainer.train()
        
        max_accuracy = max([x['eval_accuracy'] for x in trainer.state.log_history if 'eval_accuracy' in x])
        max_f1 = max([x['eval_f1'] for x in trainer.state.log_history if 'eval_f1' in x]) if len(labels) == 2 else np.nan
        train_runtime = max([x['train_runtime'] for x in trainer.state.log_history if 'train_runtime' in x])
        
        outputs.append((dataset_name, max_accuracy, max_f1, train_runtime))
        outputs_df = pd.DataFrame(outputs, columns = ['dataset', 'accuracy', 'f1', 'train_runtime'])
        outputs_df.to_csv(OUTPUT_PATH, index=False)




  0%|          | 0/4 [00:00<?, ?it/s]



  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

Some weights of the model checkpoint at Vlasta/DNADebertaK7 were not used when initializing DebertaForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at Vlasta/DNADeb

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.2653,0.265085,0.89224,0.897869
1,0.2196,0.234414,0.9082,0.907805
2,0.1955,0.245953,0.90608,0.903557
3,0.1797,0.241679,0.90876,0.908109


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.2678,0.244041,0.90368,0.901158
1,0.2204,0.232924,0.9086,0.908207
2,0.1939,0.239312,0.90856,0.907186
3,0.1787,0.24185,0.91036,0.91009


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.2696,0.240854,0.90384,0.90234
1,0.2203,0.232893,0.90744,0.906179
2,0.1952,0.236668,0.90836,0.906874
3,0.1769,0.24329,0.90928,0.908282


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.2646,0.245242,0.90284,0.905084
1,0.2247,0.244925,0.90832,0.907677
2,0.1916,0.234628,0.90872,0.907776
3,0.1806,0.241357,0.90876,0.908117


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.269,0.246154,0.90164,0.899052
1,0.2227,0.235092,0.90792,0.90708
2,0.1952,0.237297,0.90896,0.907742
3,0.1782,0.241432,0.90984,0.909054


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)




  0%|          | 0/100000 [00:00<?, ?ex/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

  0%|          | 0/100 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": "4.20.1

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.1586,0.132229,0.95096,0.949701
1,0.1081,0.118773,0.95896,0.959305
2,0.0838,0.122939,0.9596,0.959949
3,0.0747,0.123608,0.96168,0.961766


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.1573,0.12648,0.95496,0.955036
1,0.1137,0.113123,0.95988,0.959926
2,0.0871,0.132585,0.9518,0.953009
3,0.0681,0.122409,0.9604,0.960605


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.1532,0.124681,0.956,0.956123
1,0.1147,0.115699,0.95904,0.959339
2,0.0869,0.117771,0.96052,0.9606
3,0.0712,0.121504,0.96032,0.960481


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.1609,0.137367,0.95488,0.955281
1,0.1107,0.123709,0.9558,0.956559
2,0.0869,0.121325,0.96068,0.960549
3,0.0717,0.123756,0.9602,0.960344


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings"

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,0.1569,0.12955,0.95608,0.956279
1,0.1107,0.116209,0.95904,0.959429
2,0.0869,0.115695,0.96052,0.960619
3,0.0741,0.123491,0.96068,0.960779


***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)




  0%|          | 0/27791 [00:00<?, ?ex/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

  0%|          | 0/28 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": "4.20.1

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.562993,0.707686,0.672049
1,0.585400,0.537389,0.73057,0.72068
2,0.585400,0.544066,0.734312,0.740073
3,0.504500,0.543215,0.735463,0.73645


***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.547678,0.720063,0.723682
1,0.583800,0.536431,0.734312,0.736512
2,0.583800,0.538938,0.739206,0.742027
3,0.504700,0.541113,0.738198,0.742461


***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.550097,0.719919,0.721682
1,0.587000,0.550365,0.724813,0.701343
2,0.587000,0.540919,0.731721,0.728596
3,0.505100,0.543797,0.73388,0.735517


***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.553039,0.717185,0.724365
1,0.589300,0.538189,0.729131,0.73984
2,0.589300,0.540525,0.736039,0.734203
3,0.504100,0.542274,0.735176,0.734947


***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.549206,0.718768,0.712985
1,0.589700,0.543433,0.727116,0.745024
2,0.589700,0.538007,0.739637,0.746461
3,0.504800,0.540846,0.739925,0.740037


***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16
***** Running Evaluation *****
  Num examples = 6948
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)




  0%|          | 0/36131 [00:00<?, ?ex/s]

  0%|          | 0/37 [00:00<?, ?ba/s]

  0%|          | 0/37 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": null,
  "position_biased_input": true,
  "relative_attention": false,
  "torch_dtype": "float32",
  "transformers_version": "4.20.1

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.378704,0.842595,0.846071
1,0.473000,0.297007,0.881337,0.891212
2,0.283200,0.246867,0.908014,0.913356
3,0.202200,0.246045,0.909342,0.914643


***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.369879,0.844476,0.849394
1,0.468000,0.294582,0.88333,0.893406
2,0.285600,0.272325,0.896945,0.906272
3,0.198000,0.247967,0.910671,0.915771


***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.391437,0.828647,0.844172
1,0.465300,0.30009,0.879898,0.889477
2,0.293700,0.262425,0.90259,0.909762
3,0.207200,0.256532,0.907682,0.913718


***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.385674,0.840049,0.847911
1,0.472300,0.29225,0.886761,0.89235
2,0.301900,0.25409,0.90569,0.91099
3,0.206600,0.253161,0.908568,0.914298


***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


loading configuration file https://huggingface.co/Vlasta/DNADebertaK7/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3fa25099222ee8392148633f9da2ccd667d7c3946c0cb204e54570049d533aca.c59d09939f4655d9540dd690f39534d728c6afa978a255af7840e17b5db6c906
Model config DebertaConfig {
  "_name_or_path": "Vlasta/DNADebertaK7",
  "architectures": [
    "DebertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 51

Epoch,Training Loss,Validation Loss,Accuracy,F1
0,No log,0.385638,0.832743,0.829053
1,0.465900,0.281141,0.887868,0.892131
2,0.287700,0.246649,0.906686,0.911958
3,0.200900,0.249288,0.908457,0.914733


***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16
***** Running Evaluation *****
  Num examples = 9034
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)




## Outputs

In [7]:
outputs_df = pd.DataFrame(outputs, columns = ['dataset', 'accuracy', 'f1', 'train_runtime'])
outputs_df

Unnamed: 0,dataset,accuracy,f1,train_runtime
0,demo_coding_vs_intergenomic_seqs,0.90876,0.908109,1593.71
1,demo_coding_vs_intergenomic_seqs,0.91036,0.91009,1597.5654
2,demo_coding_vs_intergenomic_seqs,0.90928,0.908282,1597.7165
3,demo_coding_vs_intergenomic_seqs,0.90876,0.908117,1596.3733
4,demo_coding_vs_intergenomic_seqs,0.90984,0.909054,1597.9557
5,demo_human_or_worm,0.96168,0.961766,1602.1379
6,demo_human_or_worm,0.9604,0.960605,1599.1343
7,demo_human_or_worm,0.96052,0.9606,1605.7688
8,demo_human_or_worm,0.96068,0.960549,1608.9437
9,demo_human_or_worm,0.96068,0.960779,1608.4374


In [8]:
outputs_df.groupby('dataset').agg({'accuracy' : ['mean', 'sem'], 'f1' : ['mean','sem'], 'train_runtime': ['mean', 'sem']})

Unnamed: 0_level_0,accuracy,accuracy,f1,f1,train_runtime,train_runtime
Unnamed: 0_level_1,mean,sem,mean,sem,mean,sem
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
demo_coding_vs_intergenomic_seqs,0.9094,0.000312,0.908731,0.000382,1596.66418,0.787381
demo_human_or_worm,0.960792,0.000228,0.96086,0.00023,1604.88442,1.877376
human_enhancers_cohn,0.736903,0.001149,0.74087,0.001791,577.15112,0.044168
human_nontata_promoters,0.908944,0.000506,0.914633,0.000336,596.25504,0.862439


In [9]:
# saving outputs to csv file
outputs_df.to_csv(OUTPUT_PATH, index=False)

In [10]:
model_cls.deberta.encoder.layer.apply(model_cls.deberta._init_weights)

ModuleList(
  (0): DebertaLayer(
    (attention): DebertaAttention(
      (self): DisentangledSelfAttention(
        (in_proj): Linear(in_features=768, out_features=2304, bias=False)
        (dropout): StableDropout()
      )
      (output): DebertaSelfOutput(
        (dense): Linear(in_features=768, out_features=768, bias=True)
        (LayerNorm): DebertaLayerNorm()
        (dropout): StableDropout()
      )
    )
    (intermediate): DebertaIntermediate(
      (dense): Linear(in_features=768, out_features=3072, bias=True)
      (intermediate_act_fn): GELUActivation()
    )
    (output): DebertaOutput(
      (dense): Linear(in_features=3072, out_features=768, bias=True)
      (LayerNorm): DebertaLayerNorm()
      (dropout): StableDropout()
    )
  )
  (1): DebertaLayer(
    (attention): DebertaAttention(
      (self): DisentangledSelfAttention(
        (in_proj): Linear(in_features=768, out_features=2304, bias=False)
        (dropout): StableDropout()
      )
      (output): DebertaSe

In [11]:
model_cls.deberta.encoder.layer[1].intermediate.dense.weight

Parameter containing:
tensor([[-0.0108, -0.0033,  0.0046,  ...,  0.0033,  0.0180,  0.0032],
        [-0.0306, -0.0175,  0.0011,  ..., -0.0134,  0.0182, -0.0247],
        [-0.0026, -0.0368, -0.0020,  ...,  0.0002, -0.0233,  0.0100],
        ...,
        [-0.0158, -0.0048,  0.0221,  ...,  0.0244,  0.0183,  0.0162],
        [ 0.0130,  0.0132, -0.0029,  ..., -0.0278,  0.0005, -0.0168],
        [ 0.0354,  0.0184, -0.0151,  ...,  0.0071,  0.0070,  0.0044]],
       device='cuda:0', requires_grad=True)