# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from tqdm import tqdm_notebook, tqdm
from collections import defaultdict
import numpy as np
from caduceus.sparse_vector.sparse_vector import SparseVector
import os
from joblib import load, dump, Parallel, delayed
from torch.utils import data

In [3]:
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import StratifiedKFold

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data Preparation & Dataset

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset, get_dataset_config_names
from transformers import DataCollatorWithPadding

def load_gue(add_special_tokens=False):
    model_name = "kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3"
    masked_lm_model_path = model_name
    tokenizer = AutoTokenizer.from_pretrained(masked_lm_model_path)
    
    config_names = get_dataset_config_names("leannmlindsey/GUE")
    dataset = load_dataset("leannmlindsey/GUE", name="prom_core_all")
    
    tokenized_dataset = dataset.map(
        lambda examples: tokenizer(examples["sequence"], 
            add_special_tokens=add_special_tokens
        ),
        batched=True
    )
    
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    return tokenizer, dataset, tokenized_dataset, data_collator

tokenizer, dataset, tokenized_dataset, data_collator = load_gue(add_special_tokens=True)

In [6]:
params = {'batch_size':128,
          'num_workers':4,
          'shuffle':True}

loader_train = data.DataLoader(tokenized_dataset['train'], **params)
loader_val = data.DataLoader(tokenized_dataset['dev'], **params)
loader_test = data.DataLoader(tokenized_dataset['test'], **params)

## Модель учитель

Архитектура - Caduceus-Ph

In [7]:
# gue dataset

model_name = "kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3"
teacher = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
teacher.score = nn.Sequential(teacher.score, nn.Softmax(dim=-1))

teacher.load_state_dict(torch.load('caduceus/gue_0.8295-ph-1k.pt'))

The repository for kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3 contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y
The repository for kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3 contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y


Some weights of CaduceusForSequenceClassification were not initialized from the model checkpoint at kuleshov-group/caduceus-ph_seqlen-1k_d_model-256_n_layer-4_lr-8e-3 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<All keys matched successfully>

## Модель ученик

Архитектура - Mamba

In [7]:
from transformers import MambaConfig, MambaForCausalLM
import sys

from mamba_lrp.model.mamba_huggingface import ModifiedMambaForCausalLM
from mamba_lrp.model.utils import *
from mamba_lrp.lrp.utils import relevance_propagation
from mamba_lrp.dataset.general_dataset import get_sst_dataset

from hf_mamba_classification import MambaForSequenceClassification # https://github.com/getorca/mamba_for_sequence_classification/blob/main/src/hf_mamba_classification.py

In [8]:
model_path = 'state-spaces/mamba-130m-hf'

id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

student = MambaForSequenceClassification.from_pretrained(
    model_path, 
    num_labels=2, 
    id2label=id2label, 
    label2id=label2id,
    use_cache=False,  # This needs to be passed when using eval and training Mamba for sequence classification otherwise it will raise an error
    intermediate_size=512,
    hidden_size=256,
    num_hidden_layers=4,
    ignore_mismatched_sizes=True
)
resize_token_embeddings(student, len(tokenizer))
student.lm_head = torch.nn.Linear(256, 2, bias=True)

Some weights of MambaForSequenceClassification were not initialized from the model checkpoint at state-spaces/mamba-130m-hf and are newly initialized: ['classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of MambaForSequenceClassification were not initialized from the model checkpoint at state-spaces/mamba-130m-hf and are newly initialized because the shapes did not match:
- backbone.embeddings.weight: found shape torch.Size([50280, 768]) in the checkpoint and torch.Size([50280, 256]) in the model instantiated
- backbone.layers.0.mixer.A_log: found shape torch.Size([1536, 16]) in the checkpoint and torch.Size([512, 16]) in the model instantiated
- backbone.layers.0.mixer.D: found shape torch.Size([1536]) in the checkpoint and torch.Size([512]) in the model instantiated
- backbone.layers.0.mixer.conv1d.bias: found shape torch.Size([1536]) in the checkpoint and torch.Size([512]) in the model insta

## Train Student

In [23]:
def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [77]:
from transformers import Trainer, TrainingArguments
from sklearn.metrics import matthews_corrcoef, f1_score, accuracy_score
import gc
from IPython.display import clear_output
import time

def compute_metrics(predictions, labels):
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy': accuracy_score(labels, predictions),
            'mcc': matthews_corrcoef(labels, predictions),
            'f1': f1_score(labels, predictions)
           }

def distillation_loss(student_logits, teacher_logits, temperature=2.0):
    """
    KL-дивергенция между student и teacher логитами.
    """
    student_log_probs = F.log_softmax(student_logits / temperature, dim=-1)
    teacher_probs = F.softmax(teacher_logits / temperature, dim=-1)
    return F.kl_div(student_log_probs, teacher_probs, reduction='batchmean') * (temperature ** 2)

def train_epoch(student_model, teacher_model, dataloader, optimizer, scheduler=None,
                                    device='cuda', temperature=2.0, alpha=0.5):
    """
    Обучает student модель с использованием дистилляции знаний от teacher модели.

    Args:
        student_model: torch.nn.Module
        teacher_model: torch.nn.Module (в режиме eval, не обучается)
        dataloader: DataLoader с батчами (input_ids, attention_mask, labels)
        optimizer: torch.optim.Optimizer
        device: 'cuda' или 'cpu'
        temperature: температура для сглаживания логитов
        alpha: вес дистилляционной потери (1 - alpha для обычной cross-entropy)
    """
    student_model.train()
    teacher_model.eval()

    total_loss = 0

    for batch in tqdm(dataloader):
        input_ids = torch.stack(batch['input_ids'], dim=1).to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()

        with torch.no_grad():
            teacher_logits = teacher_model(input_ids=input_ids).logits

        student_logits = student_model(input_ids=input_ids).logits
        student_logits = nn.Softmax(dim=0)(student_logits)
        
        # Классическая cross-entropy по логитам
        ce_loss = F.cross_entropy(student_logits, labels)

        # Дистилляция
        kd_loss = distillation_loss(student_logits, teacher_logits, temperature)
        
        # Общая потеря
        loss = (1 - alpha) * ce_loss + alpha * kd_loss

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

def test(student_model, teacher_model, dataloader, device='cuda', temperature=2.0, alpha=0.5):

    student_model.eval()
    teacher_model.eval()
    
    f1_log, mcc_log, acc_log, loss_log = [], [], [], []
    
    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids = torch.stack(batch['input_ids'], dim=1).to(device)
            labels = batch['label'].to(device)

            teacher_logits = teacher_model(input_ids=input_ids).logits
            student_logits = student_model(input_ids=input_ids).logits
            student_logits = nn.Softmax(dim=0)(student_logits)

            # Классическая cross-entropy по логитам
            ce_loss = F.cross_entropy(student_logits, labels)
            # Дистилляция
            kd_loss = distillation_loss(student_logits, teacher_logits, temperature)
            # Общая потеря
            loss = (1 - alpha) * ce_loss + alpha * kd_loss
            
            metrics = compute_metrics(student_logits.cpu(), labels.cpu())
            
            f1_log.append(metrics['f1'])
            acc_log.append(metrics['accuracy'])
            mcc_log.append(metrics['mcc'])
            loss_log.append(loss.item())
            torch.cuda.empty_cache()
            
    return f1_log, mcc_log, acc_log, loss_log

def plot_history(train_history, valid_history, title, BatchSize, epoch_to_show=20):
    plt.figure(figsize=(epoch_to_show, 4))
    plt.title(title)    
    
    epoch_num = len(valid_history)
    train_history = np.array([None] * (BatchSize * epoch_to_show) + train_history)
    valid_history = np.array([None] * epoch_to_show + valid_history)
    
    plt.plot(np.linspace(epoch_num-epoch_to_show+1, epoch_num+1, (epoch_to_show+1)*BatchSize), 
             train_history[-(epoch_to_show+1)*BatchSize:], c='red', label='train')
    plt.plot(np.linspace(epoch_num-epoch_to_show+1, epoch_num+1, epoch_to_show+1),
                valid_history[-epoch_to_show-1:], c='green', label='test')
    
    plt.ylim((0, 1))
    plt.yticks(np.linspace(0, 1, 11))
    plt.xticks(np.arange(epoch_num-epoch_to_show+1, epoch_num+2), 
              np.arange(epoch_num-epoch_to_show, epoch_num+1).astype(int))
    plt.xlabel('train steps')
    plt.legend(loc='best')
    plt.grid()
    plt.show()
    
def train(student_model, teacher_model, train_loader, val_loader, optimizer, n_epochs,
                                    device='cuda', temperature=2.0, alpha=0.5, scheduler=None):
    best_f1 = 0.
    
    train_f1_log, train_mcc_log, train_acc_log, train_loss_log = [], [], [], []
    val_f1_log, val_mcc_log, val_acc_log, val_loss_log = [], [], [], []
    
    for epoch in range(n_epochs):
        gc.collect()
        torch.cuda.empty_cache() 
        start_time = time.time()
        print("Epoch {} of {}".format(epoch + 1, n_epochs))
        train_loss = train_epoch(student_model, teacher_model, train_loader, opt, scheduler)
        val_f1, val_mcc, val_acc, val_loss = test(student_model, teacher_model, val_loader)
        
        end_time = time.time()
        BatchSize = 1#len(train_loss)
        
        # train_f1_log.extend(train_f1)
        # train_mcc_log.extend(train_mcc)
        # train_acc_log.extend(train_acc)
        train_loss_log.append(train_loss)

        val_f1_log.append(np.mean(val_f1))
        val_mcc_log.append(np.mean(val_mcc))
        val_acc_log.append(np.mean(val_acc))
        val_loss_log.append(np.mean(val_loss))
        
        if val_f1_log[-1] > best_f1:
            best_f1 = val_f1_log[-1]
            torch.save(student_model.state_dict(), f'gue_{epoch}_f1_{best_f1}.pt')
        
        if (epoch % 1) == 0:
            clear_output()
            # plot_history(train_loss_log, val_loss_log,     'Loss',     BatchSize)    
            # plot_history(train_acc_log, val_acc_log, 'Accuracy', BatchSize)
            # plot_history(train_mcc_log, val_mcc_log, 'MCC',      BatchSize)
            # plot_history(train_f1_log, val_f1_log,   'F1',       BatchSize)
            print("Time: ", end_time/60)
            print("Epoch {}: MCC = {:.3}".format(epoch+1, val_mcc_log[-1]))
            print("Epoch {}: F1-score = {:.3}".format(epoch+1, val_f1_log[-1]))
        # with open("metrics", "wb") as fp:   #Pickling
        #     pickle.dump({'train_loss_log':train_loss_log,
        #                 'val_loss_log': val_loss_log,
        #                 'train_f1_log': train_f1_log,
        #                 'val_f1_log': val_f1_log,
        #                 'train_mcc_log': train_mcc_log,
        #                 'val_mcc_log': val_mcc_log}, fp)

    print("Final Accuracy = {:.3}".format(val_acc_log[-1]))
    print("Final F1-score = {:.3}".format(val_f1_log[-1]))
    print("Final MCC-score = {:.3}".format(val_mcc_log[-1]))

    return val_f1_log, val_mcc_log, val_acc_log, val_loss_log

In [82]:
teacher.cuda()
student.cuda()

MambaForSequenceClassification(
  (backbone): MambaModel(
    (embeddings): Embedding(12, 256)
    (layers): ModuleList(
      (0-3): 4 x MambaBlock(
        (norm): MambaRMSNorm(256, eps=1e-05)
        (mixer): MambaMixer(
          (conv1d): Conv1d(512, 512, kernel_size=(4,), stride=(1,), padding=(3,), groups=512)
          (act): SiLU()
          (in_proj): Linear(in_features=256, out_features=1024, bias=False)
          (x_proj): Linear(in_features=512, out_features=80, bias=False)
          (dt_proj): Linear(in_features=48, out_features=512, bias=True)
          (out_proj): Linear(in_features=512, out_features=256, bias=False)
        )
      )
    )
    (norm_f): MambaRMSNorm(256, eps=1e-05)
  )
  (classifier): Linear(in_features=256, out_features=2, bias=False)
  (lm_head): Linear(in_features=256, out_features=2, bias=True)
)

In [62]:
teacher

CaduceusForSequenceClassification(
  (caduceus): Caduceus(
    (backbone): CaduceusMixerModel(
      (embeddings): CaduceusEmbeddings(
        (word_embeddings): Embedding(16, 256)
      )
      (layers): ModuleList(
        (0-3): 4 x Block(
          (mixer): BiMambaWrapper(
            (mamba_fwd): Mamba(
              (in_proj): Linear(in_features=256, out_features=1024, bias=False)
              (conv1d): Conv1d(512, 512, kernel_size=(4,), stride=(1,), padding=(3,), groups=512)
              (act): SiLU()
              (x_proj): Linear(in_features=512, out_features=48, bias=False)
              (dt_proj): Linear(in_features=16, out_features=512, bias=True)
              (out_proj): Linear(in_features=512, out_features=256, bias=False)
            )
            (mamba_rev): Mamba(
              (in_proj): Linear(in_features=256, out_features=1024, bias=False)
              (conv1d): Conv1d(512, 512, kernel_size=(4,), stride=(1,), padding=(3,), groups=512)
              (act): SiLU(

In [86]:
opt = torch.optim.Adam(student.parameters(), lr=1e-3, betas=(0.95, 0.9))

In [59]:
val_f1_log, val_mcc_log, val_acc_log, val_loss_log = train(student, teacher, loader_train, loader_val, opt, 5, alpha=0.8)

Time:  29122066.269212425
Epoch 5: MCC = 0.154
Epoch 5: F1-score = 0.592
Final Accuracy = 0.577
Final F1-score = 0.592
Final MCC-score = 0.154


In [88]:
val_f1_log, val_mcc_log, val_acc_log, val_loss_log = train(student, teacher, loader_train, loader_val, opt, 15, alpha=0.7)

Time:  29122096.84237338
Epoch 15: MCC = 0.557
Epoch 15: F1-score = 0.768
Final Accuracy = 0.777
Final F1-score = 0.768
Final MCC-score = 0.557


# Interpretation of Student

## LRP

In [9]:
student.load_state_dict(torch.load('gue_2_f1_0.791.pt'))

<All keys matched successfully>

In [13]:
student.cuda()

MambaForSequenceClassification(
  (backbone): MambaModel(
    (embeddings): Embedding(12, 256)
    (layers): ModuleList(
      (0-3): 4 x MambaBlock(
        (norm): MambaRMSNorm(256, eps=1e-05)
        (mixer): MambaMixer(
          (conv1d): Conv1d(512, 512, kernel_size=(4,), stride=(1,), padding=(3,), groups=512)
          (act): SiLU()
          (in_proj): Linear(in_features=256, out_features=1024, bias=False)
          (x_proj): Linear(in_features=512, out_features=80, bias=False)
          (dt_proj): Linear(in_features=48, out_features=512, bias=True)
          (out_proj): Linear(in_features=512, out_features=256, bias=False)
        )
      )
    )
    (norm_f): MambaRMSNorm(256, eps=1e-05)
  )
  (classifier): Linear(in_features=256, out_features=2, bias=False)
  (lm_head): Linear(in_features=256, out_features=2, bias=True)
)

In [10]:
modified_model = ModifiedMambaForCausalLM(student, is_fast_forward_available=False)
modified_model.eval()
pretrained_embeddings = student.backbone.embeddings

In [21]:
from captum.attr import visualization as viz

def interpret(seq):
    input_ids = torch.Tensor(seq['input_ids']).unsqueeze(0).long().to(device)
    label = torch.tensor(seq['label']).unsqueeze(0).long().to(device)
    
    embeddings = pretrained_embeddings(input_ids)
    
    R, prediction = relevance_propagation(
        model=modified_model,
        embeddings=embeddings,
        targets=label,
        n_classes=2
    )

    tokens = []
    for id in input_ids[0][: -1]:
        tokens.append(tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens([id.item()])))
    attributions = R[0][: -1]
    attributions = attributions / attributions.max()
    
    # Visualize the attributions
    viz.visualize_text([viz.VisualizationDataRecord(
        attributions,
        torch.max(student(input_ids).logits, dim=1).values.item(),
        torch.argmax(student(input_ids).logits, dim=1).item(),
        true_class=label.item(),
        attr_class=label.item(),
        attr_score=attributions.sum(),
        raw_input_ids=tokens,
        convergence_score=None
    )])
    return attributions

In [22]:
interpret(tokenized_dataset['dev'][0])

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,0 (8.25),0.0,5.84,C A T G C G G G T C G A T A T C C T A T C T G A A T C T C T C A G C C C A A G A G G G A G T C C G C T C A T C T A T T C G G C A G T A C T G
,,,,


array([ 6.72210334e-03,  2.15818435e-02,  3.86709832e-02, -2.13353261e-02,
        1.01793945e-01,  1.07518800e-01, -3.04763461e-03, -1.64330602e-02,
        6.65676668e-02,  4.84690480e-02,  1.14426471e-01,  1.34323686e-01,
        1.68689638e-01,  1.86531827e-01,  1.45841554e-01,  2.54637092e-01,
        3.64330739e-01,  2.78281188e-03,  1.00265265e-01,  5.58791272e-02,
        1.66581720e-01,  1.03888646e-01,  2.90230047e-02,  7.84010515e-02,
        9.77228582e-02,  2.53225081e-02,  2.12155655e-01,  1.26610205e-01,
        2.77987886e-02, -3.17472965e-02, -9.53893084e-03,  2.23059184e-03,
       -1.71525236e-02, -1.39998337e-02,  2.03353688e-02, -4.45991568e-03,
        2.07689195e-03,  4.04050713e-03, -2.91609243e-02,  3.99324894e-02,
       -1.51179582e-02, -2.00845730e-02, -1.03130294e-02, -3.60995182e-03,
       -1.43598430e-02,  6.18170109e-03, -2.27786135e-02,  1.48677509e-02,
        3.91354263e-02, -2.70854402e-02,  2.21197633e-03, -1.11815082e-02,
        9.99522381e-05,  

In [29]:
attributions = interpret(tokenized_dataset['dev'][2])
attributions

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (11.89),1.0,5.6,G C C C A G C C C C G C C C C G C C C C G C C T C C G G G T G T T G C T C T G G A G T T G C G G G G G C C A C G G G C G G G C C A C C G A G
,,,,


array([-0.03609234, -0.00835373, -0.00382846,  0.00534417,  0.00153251,
        0.01494397,  0.01478018,  0.00198679,  0.00885551,  0.00702265,
        0.00558628,  0.01392846,  0.01200448,  0.01006189,  0.007658  ,
        0.0074113 ,  0.01461757,  0.01269516,  0.01230921,  0.0092136 ,
        0.00960317,  0.02015091,  0.00977878,  0.02145576,  0.02785897,
        0.01579092, -0.0011204 ,  0.03129504,  0.02773942,  0.00921753,
        0.03633547,  0.01452735, -0.01961955,  0.03155735, -0.06700066,
        0.04113669,  0.02057458, -0.00149072,  0.10439104,  0.07376799,
       -0.01259244,  0.03321167, -0.00234799, -0.02270972,  0.07544976,
       -0.08090506, -0.00297617,  0.04621961,  0.03180333,  0.03363496,
        0.05094346,  0.02093089,  0.02530858,  0.00639113,  0.04032256,
        0.05341797,  0.08154644,  0.11904973,  0.05179458,  0.11345383,
        0.17656948,  0.22456512,  0.18765053,  0.24774458,  0.0219623 ,
        0.4937292 ,  1.        ,  0.99354744,  0.3445358 ,  0.72

In [173]:
attributions = interpret(tokenized_dataset['dev'][6])

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.96),1.0,1.78,G G G C T C C T C C T G C T G C T G G G A C A G T G C T C T A G T A G A A C A G A C A G A C C T A C T G A C A C A G G G G A G G T G A G A A
,,,,


In [126]:
lrp_scores = []
seqs = []

i = -1
while True:
    i += 1
    if len(lrp_scores) == 300:
        break
    if tokenized_dataset['test'][i]['label'] == 0:
        continue
    with torch.no_grad():
        outputs = student(**tokenizer(tokenized_dataset['test'][i]['sequence'], return_tensors='pt').to(device)).logits.argmax().item
    if outputs == 0:
        continue
    scores = interpret(tokenized_dataset['test'][i])
    lrp_scores.append(scores)
    seqs.append(tokenized_dataset['test'][i])

# [interpret(tokenized_dataset['dev'][i]['label']) for i in range(500)]

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.63),1.0,-7.55,A G G T T A A A T A G G G G T T G A G A T A T G A T G C T C A G G A G A A G C G C T T T C T T T C G C G A G C A C C C T G A A C C A G A C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.83),1.0,-0.54,G G C T A T T T A A G T C A A G G G C C G G C T G G C A A C C C C A G C A A G C T G T C C T G T G A G C C G C C A G C A T G G A T G A C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.09),1.0,-16.17,G A T T A C A A A T G A G A C T G G G A A A C C C T C T T C A A T A A G A C C T G T G T G A T G A T A G A T T G T G T C C T G A G C C C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.81),1.0,-0.29,G A G T A T A T A A G T T C C A G T A C C A G C A A C A G C A G C A G A A G A A A C A A C A T C T G T T T C A G G G C C A T T G G A C T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (11.21),1.0,-114.53,T G C T A C T T A A G G C G T C G T G G C C T C C C C T G C C C C G C C T T A G C T C C C G C G C T A G A G A G A A A C A T G T A T C G T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.59),1.0,-1.14,C T G T A C A T A A G C T G C C C A T T C C C C C T C C A G C C T G T G G T A C C C A G T C C T C A G G T G C A A C C C C C T G C G T G G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.00),1.0,2.77,C A C T T T A T A A A C A T C C C C A G C C T G T G A G A G C A G A G G G C A G G G A G A T A G T G T G A G A C A G G A G C C C A G G G G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.32),1.0,-47.55,C T A A T T T A A A G A C A G T G A G G G G G T G A G T G G T T T G G C G G G A A A A A T G G T T A C C G C A G A G C A G G A A A T C G G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.92),1.0,-8.02,A T G C A T A A A A A G C C A G T T G G C T G G G A A C A C T A C A C C A G T T C T A A G G G A C C A T A C A G A G T A T T C C T C T C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.35),1.0,-3.53,A A A T A T A A C A T C A G G T G A T A C C A C A A C T A T C C T G C C T G C T G C T T G C T G C A C C A T G A A G T C T G C C A A G C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.63),1.0,-2.36,C G C C T A T A T A G G G C G T C G G C G C G C G G G G C C G G T G T C C G C G C C A G C C C G G G A C G C G C T T G G C C T T G C C C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.63),1.0,-2.51,G T C T A T A A A T A G A G C A G C C A G T T G C A G G G C T C C A T T C T G C T T T C C A A C T G C C T G A C T G C T T G T T C G T C T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.05),1.0,0.47,A G A T A A A T G G G C C C T G A A G C G G G G T A G A G G G T C A G A C C A C A G G A C A G T A G T G C C T G G C C C C A G C C C C A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.08),1.0,-2.26,G G T A T A T A A G C G G G G C G C G A G G G C G C T G C T G C T G C C A C C G C T C C T G C C A C T G C A G T G C T C G A G C C C C G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.61),1.0,-1.43,T A T A T A A A A G G G C C C C G G C C G C G C G G G G T C T C T A A T C T G C C A T T T T C T G T C C C T G A G T G A G T C T C T G G C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.32),1.0,0.73,C A G T A T A T A T A A A C C T C T C T G G A G C T C G G G C A T G A G C C A G C A A G G C C A C C C A T C C A G G C A C C T C T C A G C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (6.32),1.0,-7.15,G C C C A A T A A A T C T G C A A C C C A C A A T C A C G A G C T G C T C C C G T A A G C C C C A A G G C G A C C T C C A G C T G T C A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.06),1.0,1.44,G C T T T T T A A G A C G G C C G G G A G C G C C T G C G A G C T G G A T C T G G T G G A G G A T G C T G C G G C A G G T G C T T C G C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.47),1.0,-13.96,C T G C T T T T A A G G C C A C T G C C C G C C C C G T C C C G C C T G C C T G T G C T G T G C C T G C C T C C T A G A G C T C A T T C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.42),1.0,-148.04,C T A T A T A A A A G C G C C T T G T C A T A C C C T G C T C A C G C T G T T T T T C C T T T T C G T T G G C G C T T T A T A G C T A C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.07),1.0,-126.43,C T A T A A A T G A A C A G G G C C T C G G C G G G A G T G A T T A T T T T C T C A G G T G T T T G C A A C A G T G T T C T A A C T A T T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.10),1.0,-61.97,G G G A T A A A T G C T G T C T A G A T T C C A T G G G A A G G A G T G G G C T G C T G C T T T C C T G A T T G C A G T T T T G G T T G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.17),1.0,1.88,G G G A T A A A G T T C C C G G A G A A A G G A A A G G A G A G C G T G G G A T A G T A A A A G A G A A G A C G C G G A G A A G A G G A G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.29),1.0,-18.1,G C A A A T A A A A T G C T C T T A G A G G G A A G G A A A G G G A A A T A C T C G T C T C T G G T A A A G T C T G A G C A G G A C A G G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.34),1.0,-3.27,T C C C T A G A T A T C G C G A G A G G G C G G G T C C G C T T G G C T T C G G C G T C G C G T C G C T C C T G C G C T G G A G C T C T A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.47),1.0,0.17,G A T A T A A A T T T C C T G G C G G T G T C T C C T G C A G A C A G T G C A T G A A G T A T G C T C A G T G T G C C A G C A A G G G C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.18),1.0,0.19,T T T A A A A G G A A G T T A T C T G G A C T C A A G A G G G T C A C A G C A C C C T C C T G A A A A C T G C A G C T T C C T T C T C A C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.93),1.0,1.05,T A T A A G G A G G C C A G G G C G G C G G G C G C G G C C C C C A G A G C A C G T C A G G C G G C G C C A T G C T C A G C G C C C A G G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.51),1.0,1.93,T G T A T A A A G G C C G C G C T T G G G C C C G C C C T C C T C A C A G C C C C G G A G C G C G G C C T G C C G G G G A G G T G G A T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.29),1.0,-5.1,T G A G A T A A A A G C A G G G C A A G G T T C T G C A A C T C C A A A T C A G G G A G G C G C A G C T C C T A C A C C A A C G C C T T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.78),1.0,-3.64,C A T T T A A A C C A G C G A A T C C G C G T T C A C C T T G T C C T T C C A C C T C T G C C C T A T C T C G G C A G A T G C T C C A C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.91),1.0,-6.93,C A G C T A T A A A A G C A T G T T G G G C C A G T C C T C A G C A T C C T A G T T C G C C A C T G T C T G C T G C C A C A C G A T G C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.52),1.0,-5.51,C A A T A A G A G G C A G A G A C A G C A G C C A G A G G A A C C G A G A G G C T G A G A C T A A C C C A G A A A C A T C C A A T T C T C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.09),1.0,-5.87,T A T A A A G T C C C C A T C C G G A C T C A A G A A G T T C T C A G G A C T C A G A G G C T G G G A T C A T G G T A G A T G G A A C C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.13),1.0,3.35,A A A G T T T A A A A T C T C T C C T C C T T C C T T C A C T C C A G A C A C T G C C C G C T C T C C G G G A C T G C C G C G C C G C T C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.07),1.0,1.51,C G C T T T A A G G A C G G G C G G G G C G G G C T G G G C G A C A G C G C T G G A C A C C T G G A G C T G C C C G A G G A C G C G G A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (10.05),1.0,2.05,T C T A T A A A A G A G G A G C T A G A T G C A G A A G C C A C T G C A T T T T C C G G C A A G C C A A G G G C T G T C T G T G C C T C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.98),1.0,-12.01,T T C T A T A A A A A C G C C C A A G C A G G G A T T T A A G G C A T C T C C T G C A T G C A C A G T T G C A G T T A G T T A T T C C A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.39),1.0,-1.51,T G G G A A T A A A G G G C T A A A G C T A T T G G C A T T G G C A G C T G T C T C C A G G C G T C G T C C G C G G A G C C T G A C C C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.52),1.0,2.58,A T A T A T A C A G C C C C T G A G A C C A G G T C T G G C T C C A C A G C T C T G T C C T G C T C T G T G T C T T T C C C T G C T G C T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.23),1.0,-2.83,T A T A A A A G C T T C C A G C C A A A C G G C A T T G A A G T T G A A G A T A C A A C C T G A C A G C A C A G C C T G A G A T C T T G G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.63),1.0,3.13,G G C A T A A A G G G G C G C G G C G C G G G G C C C C G G A G C C T G G C T C C C G C G C A G C A T G C C C G C C A G C G C C C C G C C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.97),1.0,-9.9,C T A C T A T A T A G A G A C G T T T C C G C C T C C T G C T T G A A A C T A A C C C C T C T T T T T C T C C A A A G G A G T G C T T G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.69),1.0,-34.31,A G C A A T A A A T A T G C T C C C A G C T A A T C T T G T G G G G A T T T T A T T G G C A C T C G G A G C A G A G G T T C C T T T A C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.56),1.0,-9.52,C A A T A A G A G G G G G C G G T G G C A A A G G G G G C G G G A C G T C C G T G G T C C T T G T C G C A C G T C G C A G C G C C T G G C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.99),1.0,3.21,C T G G A A T A A A A G A G G G C A G A A A A G G C G C C G G G C G G G C C C G A C A C A C G C C G G A G G A G C C G G G T G A G C T G C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.83),1.0,-2.05,T T G C A T A A A G G T G G T T A G G A G A G T G G T T T C C T T A G G C A A A A G G A A A G A G A A T G A G A C G G G G T A G A G C T G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.58),1.0,-3.02,A A T C T A T A A G A A G C G T C G T T C A G C G A G T T C G C T G C T C A G A A G C G C C G A G A G C G C G G C C G G G A C G G T T G G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.53),1.0,2.22,C C T T T A A G A G C G G G A G G G G C G C C C C C T G G C G G C G G A G C G G T G C G T G C G G C C G G A G C C G G A G C G G A T C C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.14),1.0,0.55,G T T A T A A A T A T C C C G G T G C C A G C G C G G A G A T C C G C T C G G G T G G C C T C T C T C T T C C C C T C T C C C C T T C T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.57),1.0,0.71,C C C T A T A A A A G G C A G G C A G A G C C A C C G A G G A G C A G A G A G G T T G A G A A C A A C C C A G A A A C C T T C A C C T C T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.81),1.0,1.29,C G C C T T A A A G G G C C C T G C G C T T G G C T G G C C G C C A G C C C T G C G T G A A G C T A C A G G A C C A T G T G C C G C G C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.36),1.0,-16.74,A T C C T A T A A A G G C A G T A G C T C C A A G C A C A T T A C A G A G G G A C C C A A C T C C A T T A A A C C A C C A C C A G C T C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.61),1.0,-3.99,G C T T A T A A G G G G C A T G A G C A T C T C A G G G C T G C C A G A A T G G C T T T T G C T G A G T G C A T A G C A C C A G C G T G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.31),1.0,-19.49,C T A T T T A A G G A G G A C T G G G T T T G T T G T G A A G T T G C G G T G A T C C A G C G C A G A G C C C C G T C C T G A T T G A T C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.01),1.0,-36.91,G G T A T A A A T A C C A G C A G C C A G A G G A G G T G C A G C A C A T T G T T C T G A T C A T C T G A A G A T C A G C T A T T A G A A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.21),1.0,0.13,G C G G A T A A A G G C T G A G G G C C G C G C G C T T G G C C C A G A C C G G C C C G G C C A G C G C G C A T T C G G C C C C G G A C G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.88),1.0,1.92,A T T T T A A A T G T C C C G C T C T G A G C C G G G C G C A G G A G C A G C C G G C G C G G C C G C C A G C G C G G T G T A G G G G G C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.38),1.0,1.12,C C A T A A A G A T C T G T G A C C G G C A G C C C C A G A C C T G C C T G C C T T C C T G A C T T C T G T T C C A G A G C A A A G G T C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.83),1.0,-21.44,G G T A T T T A A G A C C T G G G T G T A T A C A C C C T C A G T C T G G G G A G T A A T A G C A T G G G C A A C C A T T A T C C T G T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.94),1.0,-11.23,C C T C T T T A A A A C A A C A T T T A A T C G C C G T T T T C T G T T C T T G G A C A T C A A A G C T G G G G C T T A G G C G C T G T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.26),1.0,-0.54,C T C T A T A A G A G A G A T C C A G C T T G C C T C C T C T T G A G C A G T C A G C A A C A G G G T C C C G T C C T T G A C A C C T C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.20),1.0,-12.26,T A T A A A A C A G T T G C T T T T C T G T T T A C T T A G G T G G A A G T A A A T G A G C T A A A A A G T G A A A A G C A A G A A A A T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.66),1.0,-1.66,T C C A A T T A A A G C G G C C C A G C T G C G C C T G G C T G C G C A C A G A G C T C C C T C C C A G G C C C G C G A A C T T G G C C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.52),1.0,1.65,C A A T A A A A T T C T G G G C T T C T G C A C T T C T A C C C T G T G T C T G C A T C T G C A G G A G C C T A G C A G C C T G G G G G T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.78),1.0,-18.13,G C C C T T A A A A G G A A C A G G G A T T G C T C A C T C G G G G A G C T C G G C T C T T G A G A C A G G A A T C T T G C C C A T T C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.79),1.0,-0.05,G G C T T A T A A T G C A G G G T G G G G C C A C C T G C C G G T A G G T G T G C G G T A G G C T T T T C T C C G T C G C A G G A C G C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.52),1.0,-8.74,T A T T T A A T G T T G T C A C A A T G G A G A A T C C A A G G C A G A G T G G A A C T T C C C A G C C T C A G A A A G G C A G A A A C C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (11.40),1.0,-38.95,G G A T A A A T C A A G G A G C C G G C A C G T A T G G A T T C C G T T A T A G G G C A G T A C T G G T T G G A G A G A G G A G C T T A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.91),1.0,-91.72,C A G T T A T A T A G A C C G G C G G C G G A G C A C G C G T G T G T G C G G A C G C A G T T G C G T G A G G G G T T T G T A C T A T C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.80),1.0,-19.56,C T T T A A A A G G A A G A A G G G G C T T A T C G T T A A G T C G C T T G T G A T C T T T T C A G T T T C T C C A G C T G C T G G C T T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.11),1.0,-0.38,A G G C A G A T A A A G A G A G A T C A A A G A G A T G A G C A T G A G A T A C A G T C C C C T C G T G C C C A A G G A G A C A G G G T G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.92),1.0,-3.81,T G C A A A T A A A T G C C T T A C C A G A C C T G C C C T G C C A C C C C A C T C G C A G C C A C C C A G C A A G A G C A G C A T G T C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.63),1.0,1.56,G C T T T G A A A A G C C G G C G G G T C G G G C C T T G A G G T G G A A G G C G G G A A A A T G G C G G A T T C C T C G G G G C G A G G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.83),1.0,0.05,C T T T A T A A A G G G A G T T G C C A C G T T C C T C C C A G C A C A G A G T T G G G A G T G A C T C C A G A G C C T C C A G C G A G A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.83),1.0,2.45,C T C C T T T A T A A A G G C C G G A A C A G C T G A A A G G G T G G C A A C T T C T C C T C C T G C A G C C G G G A G C G G C C T G C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.33),1.0,-17.55,G A C T A T A A G A C T A T A C C T G A G A C T G G T C A T C T C A G T T C T T T T C T C A C C T T G A C T G C A A G A T G A A A C T C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.82),1.0,-5.09,G G T T T T A T A A A G G G C A G T T C C C C T G C A C A T G A T C T C T T G C C T G C T G A C A T G T A A G A G A T G A C T T T G C T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.29),1.0,2.49,C C T C A T A A A G C C T G G G G C G A G G G G C A C A A C G G C C T T G G G A A G G A G C C C T G C T G G G G C C G T C C A G T C C C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.23),1.0,-22.91,T A A A T A A A A T A C C G G T A T G C T C T T G A G A G T C T T G C A A G A A A C A T A C T T A A C T T T A A G G T G G A G T G G A A T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.18),1.0,-17.67,C T G G T T T A T A G G T C C C G G C C C G A G C C T C C G G C C G C C C G C C G G G T T T G T C C C G C G A T C C C C G A C C A T G C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.90),1.0,-3.7,G G T T T T A A A A A C C G G A G T T G C T C T A C A C A A G C T C T C T T T T T G C C T G C T G C C A T T C C A G T A G G A T G T G A C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.88),1.0,-16.69,C G C G G T T A T A A G G C G G G G A G T T C C C T G C G C C G C G A G C C G G G A G G C G C A C G C T C G C T C G T A C G G C G G C C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.59),1.0,-32.15,G T A T T T A A G G A A C T G T T T C A G T T C A T A C C T T C C A C T G C G A T A G G A A T C A T G T C T G G T C G C G G C A A A G G C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (11.18),1.0,-101.48,C T A A A T A A A T A T G A T C C C C A A A A T G T G T T C T C T A A A G A A T T T C T C A G G C T C A A A A T C C A A T A C A G G A G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.07),1.0,-4.13,C G G C T T T A T A A G G C G C T C T T C C C C C T T T G C T T T C T C T T C T C C C T C C T G C C G C C T T A T G A A C A G G G T G A C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.46),1.0,-117.47,G G G T T A A A T A C T C C G G C C C A G C A C T C C C C A G G C C T C T A G T C C C A G T A G G A G G T T T G A C T A A G A T C A A T C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.18),1.0,-1.93,G G G C T A T A A A A T A C C C G A G C C G G G G C G C C G G G C G G G G G A C G T G A G G A C C A G C C C T C T C C G G G G A C C C C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.77),1.0,-1.77,C G C C T A T A A G G A G C T G T C C G C C A C C C G G G T G C T G A T T C C A G C T C T C G C G C C C G A C G A G G T G G A T T T G G C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.12),1.0,2.24,G T A T A A G T G G G A G G C C G G G C C T G C T G A G C A C C T G T C A G G G G C A G C T C C G G G G T G C A G G T G C C A G G C A G G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.37),1.0,-22.3,G T A T A T A T A C A G G G T G G G C A A A A G C T C T G G G A C A G C C C A C T G G A A A G C T T C A A T A C A G C T G T G G A A A T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (0.72),1.0,-0.9,G C C A T A T A T A C C C G G G G C G C T G C G C T C C A C C T G G C C G C C G C C T C C A G C C C A G C A C C T G C G G A G G G A G C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.57),1.0,2.78,C T A T A T A A A A G G C G G T G C C C G C C G G G G C C G A G T G G G A G T C C G C G G C G A G C G C A G C A G C A G G G C C C G G T C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.50),1.0,-3.23,C T T T A A G A C C C C C G C C T C C G C C C C T G T C C C G A C A C T C G G C C T A G G A A T T T C C C T T A T C T C C T T C G C A G G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (12.64),1.0,-155.1,T A T A T A A A G A G G A A C A G T T C A G G A A C T T A G G C T A G A A A G G A A C A C A G T A A A C T G A A T T G A T C C G T T T A G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.71),1.0,-146.3,T C C T T A A A A G G G C C C A G C A A T T T C A G C A T C C T T A T T C C C C A G A C C T T C T G C A G A T T C T G T G G T T A T A C T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.20),1.0,-18.11,T C C A A T A A A G A C C T C G T G A G C A T G G G C C T G T C C A C C G T C T T T G T T T G G C T G C T G T T T T A A T C T T T C A T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.02),1.0,-7.7,G C T A T A A A T A G G G C A T C G T G A C C C G G C C G G G G G A A G A A G C T G C C G T T G T T C T G G G T A C T A C A G C A G A A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.65),1.0,-4.31,A T A T A A A T T G T T T A T T T C T G G A A T T T T T T A T T T A A T A T T T T T G T A C A G C A G T T G A C T G A G G T A A G T G A A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.13),1.0,-15.88,T A T A A G A A G G A A A C C A G A G A T C T G G T G C T A T T A C G T C C C A G C G T C T G A G A G A A C G A G T A A G C A C A G A A T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.41),1.0,-14.87,A A A C T T A A A T C C C G A G G C G G G C G A A C C T G C A C C A G A C C G C G G A C G T C T G T A A T C T C A G A G G C T T G T T T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.38),1.0,1.11,G G C A T A A A A A G G C C C C T G G C T G A G A A C T T C C T T C T T C A T T C T G C A G T T G G T G C C A G A A C T C T G G A T C C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.54),1.0,-1.18,C G A T A T T A A C C C G G G A G G C G G C G G C G G G G A G G G G A G A G G C T C T G A G A G G C G A G G C C G G G T G A G G C G G C G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.51),1.0,-3.74,G C C A C A T A A A A G T G A A G T G G A G G A A A G G G A A G A G A T G A T T T C T C C A T C C T G A A C G T G C A G C G G T A A G A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.16),1.0,-1.03,T A T A A A T C C T T G A T G T G A G G C T G G C T A C C T C T C A T C A C T T C T G A G C A C G G A G C A A T G G C C T C T C G C T G G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.61),1.0,-37.49,G T A T A A A G C A C C G T G A C C C T C A G G T G A C G C A C C A G G G C C G G C T G C C G T C G G G G A C A G G G C T T T C C A T A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.84),1.0,1.67,C C G C T A C A A A C G G C G G G A C C G C G G C G C C T G G G C G T C A C T G A G G C A G T A G C C G G C C G G G T G A G G A G G G C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.17),1.0,-4.13,G T G T A T A T T T A T C T G T A A G T G A G C C G T T G G G G A A G G A T T G A A T A C A G A G A C G C T G T C T G C T T G C T G C C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.01),1.0,-13.48,C C T A T A A A A T C C A G G T A A A T A T C T G A T A C T G G C A C A C A G G T T G G A G C A G A G A A A G A G G A A A C A T A G A G G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.29),1.0,-10.34,C T G C T T T T A A G G C A G G T C T G G A G G T G A A C T G G C G C C T C T C C T G A C T C T T C A C C T G G A C T C T G A G T C G T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.52),1.0,-201.44,T G A G T A C A A A A G T G G T G T A T T G G A G A G T T G G G A G G G A T T A C A G A G G T A A A G C C C T T G C C C T T A T A G A G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.80),1.0,0.32,T A T A T T T A A A G A G G A C A A G A G C C C C T C A G A C T C A G T T G A G C T G A A C G G A G T C C A C A C A G G C A G G T G A G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.15),1.0,-0.98,T C C A A T A A A T A A C G C T G C T G A T C T T C A C G C C T C C T C T T C T G T C T G C C C A G C T T C A A C T C C A G T C C T T C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.79),1.0,2.4,A C T T T A A G A G G C G A G C C G G G G G G T C A G G G T C C C A G A T G C A C A G G A G G A G A A G C A G G A G C T G T C G G G A A G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.34),1.0,-0.23,G A A A C T A T A A A A G G G A G G A A G T G G G G T G G G C G T G G G A G C C C C G T T T G C G C A A G G C A G G C G C G G G G G A C T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.49),1.0,-115.02,C C C T T T T T A A G G A C A G A G C C T A G G C C C T T C C T C G C T T T T C C A A G T C T T T T G C C A A C C A G T T T G T T A G A C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.33),1.0,0.67,C T A T T A A A G C A G C T C C A G C C C T G C G C A C T C C C T G C T G G G G T G A G C A G C A C T G T A A A G A T G A A G C T G G C T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.14),1.0,-0.43,C A G A T A T A A G C A C C A T C C T T A C A G G C T G G C T T C C T G A C T G A G A G C A G G G A G C A G C A G G C A T G G G G C A T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.73),1.0,-3.2,G T G T A T A A A A G G C C C A A G A A G A C A A G G T T C A G A G A G C T T C C G G A T A G C T G A A C G C A T G G A G G C T G A C A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.69),1.0,-0.31,G A C T A T C A A A G C A G C G G C C G G C T G T T G G G G T C C A C C A C G C C T T C C A C C T G C C C C A C T G C T T C T T C G C T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.90),1.0,-43.4,C C T A T A T A C G G T T G T G A T T C A T A C C A A G G C A G T C T C A T G C T T T T A G C C C A G A C C C A C C A A C C T A C C A A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.79),1.0,2.36,A T T A A A A A C T G C C C A T G C A A G G G T C A G G T C T C C A G C A G A C C C T G A A A G C T G A G C T G C C C T G A C C C C C A A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.95),1.0,3.44,G G C T A T A A A A G G G G C G G G A C C G C G G C G G G C C G G A A G C C G C G A G G A G C G C G G A C G G C T G G G C T G C T G C T G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.41),1.0,-54.62,G C T G A T A A A T T C C T G A G A A A A G T T T C T C T T C T T A A G A A T T C T A G G T C A A G A A G T A A A A T A T G G C A C A G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.97),1.0,-246.23,G T T A T A A G A C A A A T G G A C A G G G A G G G C A T G C T T C T C A A A A A A T G T G A G C T A T T G T C T A A G A C T A A C A A T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.10),1.0,2.24,A G A G A A T A A A A G C A G G C T G C C C G A G C C A G C A G T G G C A A C C C G C T T C G G T C C C C T T C C A C A C T G T G G A A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.99),1.0,0.31,A T C A T T T A A A G A C A A A G C A A A A C C A G T G A G G C T G A A A G A A C G G C T G C C C T G G T G C A C A C A G A T G G C A C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.89),1.0,-6.21,G T T A T A A A A G G C T T G G A G T G C A A G C T C A T G G T T G T C T T A A C A A G A A G A G A A G G C T T C A A T G G A T T C T C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.67),1.0,-15.04,A T A T T A A A G C A A G A T G G C C G C G C C C T G C A G A T T G T C T C T T G T T G C G T A A G T T T T T T T G A C C G T C A C T C G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.45),1.0,-3.84,C G G G A T A A A T A G G G T C C C G C A A T G G C C G T G G C T G G C T G C G C T C C G A G C T G C G G A G T C C G G G A C T G G A G C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.85),1.0,0.86,G T C A C T A T A A C A C G A G G G T G T G A G C C G G G C G C C A G T G C C T G C A G C C G G T G C T G T C C A C A G G G A G C T C C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.72),1.0,1.89,G G A A A T A A A A G C T G T G G C C C C C A G G A G T T C T G G A C A C T G G G G G A G A G T G G G G T G A C A T G A G T G A C T C C A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.94),1.0,-0.25,G C A T C T A T A A A G G C G G G C G G C G G C A G A G G C G C C A T T T T G C G A A C G G C G A G C A G C G G C G G C G G C G C G G A G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.08),1.0,1.74,G A T A A A A G G C C C A G T T G G A G G C T G C A G C A G G G T G C A G G G C A G T C A G A C C A G G A C C A T G G A A C T C A G C G T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.32),1.0,1.0,G C T A T A A A A G C G G C C T C T C G G C T G A G G G C C G G G G A G C T A G C G C T C A A G C A G A G C C C A G C G C G G T G C T A T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.66),1.0,-6.74,C C T A T A A A A T C A G A A G C C C A A G T C T C C A C T G C C A G T G T G A A A T C T T C A G A G A A G A A T T T C T C T T T A G T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.60),1.0,-1.2,T A T T T A A A G C T G C A T C G C T T G A A A A A A G T T T T C G C A G A C T G T G C T G G A G C T G G T G C T G A A A A A G G G G G T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.80),1.0,-5.93,A G C A A T A A A A A G G A A G T T A A A A A A A A G C T T G T C A C A G G G C A G T G T A G T T C C A G A A A A T A G G A C T G A C C A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.31),1.0,-4.0,C C C T A A A A A T A G G C C T C T G T T G C C A G T G G G G G T G G C T G G G C G T G C G G C T G C T A C A T G C C C C A C G G A C C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.84),1.0,-7.82,C T A T A T A A G C G C A C T C G C G G C G G C G T T G G T G T T A T G C T C T G G G C C A T T C C T G A G C T C G G G T A A G G C T C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.73),1.0,1.77,G G T A T A A A A G C A G C T T G C C T G C G A A G G T T C T T C A C A C T G C T C A G G G A A G A G C C T G C T A C G G T G G A C T G T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.04),1.0,1.14,G A G T T A T A A A A C C C A A G A C T G G A A A G G A A A A C C A G C A T T T G C T C A G G C A G C C T C T C T G G G A A G A T G C T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.90),1.0,2.49,G A G A T T T A A A A G T T G G C G G C T C G C C G G G C G C T C A G T C C T G T G T C C G G G C C C C G A G G C A C A G C C A G G G C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.94),1.0,1.44,A A G T T T A A A A A C T C G C C G T G G A G A A G G G C G C G G G G G A A G A G G A G A G A G G G A G A G C G C T G A C C C A G G C G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.34),1.0,-8.44,T G C A T A A T A G A G C T C G T A A A C T G T A G G A A T T C T G A T G T G C T T C A G T G C A C A G A A C A G T A A C A G A T G A G C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.51),1.0,-3.81,A G C C C T T A A A A C C C T T G T G T T G T T T C T T C C T G C A G A C C T G G T C G A G A A C A C T A C T T C A G G A C A G G G A A T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.70),1.0,-79.6,T C C A G A T A A A T G A T C C A G G C T C C A T T A G A A C C C A T C A C T G A C C T T G G A A G C T G C T G G A G C C A C G A T T C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.51),1.0,1.38,T G A T T T A A A A A A A A T A C A C A C A G A A C T G C C C C C A G G C T T T T C T G T C G G G C T T T T T C C C T C C A A G C A G C G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.05),1.0,-2.3,T C C T T T A A A G G T T G G A A G T G G C C C C G A G G C G C C G G C A G C C C T G G G C G G T C C C G G G A A T C G C A C A G G A C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.08),1.0,-4.8,G A A T A A A T A G A G C A G G G C T G G G T A C T C A C C T C C A C A G C A A C T T C C T T G A T C C C T G C C A C G C A C G A C T G A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.45),1.0,-8.16,C T A G A A A A G A A T C G T G A C G G G C A G G A A A C C A T T A C A C C A C C A C C T G G G C T G T G C T C T C C G G C T C C C G C C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.84),1.0,-15.28,G T A T A T A T A T A G C C A G C A T G T C A T C T T T T C A C C A A T G G A T T T G C T T A A A A T T T C T C A C A C A A A G C T A C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.40),1.0,-5.27,C T C T A T T T A G T C A G G G G A A A A C A A A G A A A A A G C A T G T G A A T G G A G G A C A G T A C T G A G A C C G T G G T A G A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.67),1.0,-9.17,A G C A T T A A A A C G C T G C A G G T C G C A G G T G A G A C T A A C A G C T G G G A G A G C T G C T C C A G G C A T T T A G G A C C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.65),1.0,-0.87,A C C C C A T A A A T C C G G G A T C G C A G A G A G G G A G G A G G G T C A G A G A G G G C T C A G C T T G T C A G A G C G G C T C T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.67),1.0,-20.25,C T T A T T A A A T A G G A G C C A A C T G G C T G G G T C G G G G C T C A A T A C C C C A A G C A A T A C C T G C A A C T G A G G A T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.23),1.0,-3.95,G C T A T A T A G G A G G G C C C T G C C A G G C A C C G T C T C C T C T C T C C G G T C C G T G C C T C C A A G A T G G T G A G T C T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.54),1.0,-0.74,C T C T A T A A T T G G C T G C C G G C G G G C C T T C C T T T C C T C T T A T C T G A T C C T G G G C T C C C A G C T G G A G A G G C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.71),1.0,-5.81,T T T A A A A T A A C G G C A G C C C T G A A C A T T T G A G T C A G T T C T T A A A A C T G C C C T G C T A T T G G T A G G G A C G C A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.08),1.0,-13.44,G C T T A T A A A A T A G C T T G G G A G A G G C C A G T C A C C A A G A C A G G C A T C T C A A A T C G G C T G A T T C T G C A T C T G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (0.95),1.0,-6.8,A T T T A T T T A T A C C C T T C G A G A G A A A A A C G T C T C A G C T G T C A C A G G A A G C T G C T T C G G G G G G T G A G C A A A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.66),1.0,0.82,G C T T T A A A A A G G G A G C T T T T T C G A G A A G A A A G C A C A T C G G G G G C T C T T A G A G G C C A G C G T G G A C G C C G C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.80),1.0,-0.37,G C T T A A A A A A A G G C C C A G A G A G G C A G T C T T G A C A C C C T A G A T C C C A A G A T C T C C A A G G A T T T G G T G G C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.16),1.0,-16.26,C T G A T A T A T G T T G G G T A C C A T A G A G T G A A T C T C A G A A C A G G A A G C G G A G G C A T A A G C A G A G A G G A T T C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.89),1.0,2.76,T G G A T A A A A A A A A A G T G A G C A C T G C A A A T T T C T A G A A G A A A A C A T C A G G A G A A G A A A G A G A G A G G G G G A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.98),1.0,-135.73,A T A T A T A A A C A C A T G T A T T T T C C T G T T T A A A T T G A G C G A A T T G G T C C C C T G C C T G T G C C T T G A T T T A G C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (12.63),1.0,-39.2,T T A T A T A G T G T C G A G A C A G A T G A G T A A A T A T C C A T C T G G T G A G A G A G T A C G T T A T C A A T G T A G G A G C C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.17),1.0,2.37,G G C G C A T A A A A G C G C G C G G G G C T G C G G C A G G G C A C T G C C A C C A T G T C C C C G C T G C G C C C G C T G C T G C T G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.21),1.0,-0.5,G T G T A T A A A A G G T G T C C A C T C T G G G G A A G A G C C A C A G T C C T C G G C C C A G G C C A A G C A A G C T T C T A T C T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.07),1.0,-3.03,G C A T A A A G C A G C C T G T C C T G G C A A G G G A T G G T C A T C C T C T C A A C C T A C A G A C C C A G T G A G C C C T A G G C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.66),1.0,-2.14,A G C A C A A A A G C C A A T C G C T A G C A A A C T C C C T G C C T A G C A A G G C C C A G C C T G G G G C A G A A A T G G C T G C A A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.08),1.0,-7.7,G C A T T T A T A C C A T T A G C C T C T G A A C A G A T T T C A A G T T G C T G C T G C C A A C T C T C G C G A G C T T T G T C A G T A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.33),1.0,1.92,G A C A T A A A G G A G G C T G G G C C C T G C C A C C A C G A C A G C A G C C A C A C C T C T G C A G A G A G A A T G G T G A G T G C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.68),1.0,-9.74,G A A A T A A A A C A G G A C T T T A G A T T C G G T T A C A A T A A A A T A T C A G A T G C A C C A G A G A C A C A A G G C T T G A A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.68),1.0,-0.36,C T T A A A A G C A G C C T C A A G G T C A C T G T A A A G G G A A G A G A T T C C G T C A G A G T C A G A G C C C A G C A C G T G C T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.71),1.0,1.17,A C A T A A A G A C T C A C A G G T C C G C C T C C C A G G C T C A A A G C T G G C T C T G C A G G G G A C A T G A G A G G C A C A C C G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (6.57),1.0,-18.58,C A G C A T A A A T T T G G A G A G A T G C A T C T A A A C T G C G T G T G G G T T C G G G C T C T T T T A G G T G A A T A G G G C T C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.61),1.0,-40.62,A A C G T T T A A A A T C C A A G G A C A A A A A C C G G A G T C G G T C T C A A A A T C C G C G C C G T T C A G T G C T C C A C T C G G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.60),1.0,-0.26,T C C A T A A A A G C C C T G T C G C G A C C C G C T C T C T G C A C C C C A T C C G C T G G C T C T C A C C C C T C G G A G A C G C T C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.92),1.0,-1.08,C T G T A G A A A A A G A A G A A G A A A A G C C A C A A G A C T G C T T A A A A A T T T C T G T G T C T T A C A C A G A A G A T A G A A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (6.81),1.0,-66.28,G G T A T A A G A G A G G G G G G A A A A T C C C T T A A A C T C C C G T T C A G T C T C T T A T C T G A C C A T C T C A T T C T C T A T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.38),1.0,-3.24,G G G T A T A A A A A G G G C C C A C A A G A G A C C A G C T C A A G G A T C C C A A G G C C C A A C T C C C C G A A C C A C T C A G G G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.05),1.0,-3.79,C G T T T A T A A G A G A T C G G T G T T T G C C A A G G C C G T G C C G G A T T C T C G A G A G C C A A G G C C T G A G A G A A G A G C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.08),1.0,-0.84,G G G T A T T T A A A C C C C A G A A A A T T C T G T A A C G G G G C T C T T G A G C C C C T G T G C T C G G G G C C G C T C C C A T C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.27),1.0,-30.28,G A A T A T A A A T G C A T G T G T A A A C A C A A G C T G T T T G T T T T G C T T A G T G T T C C T T G T C A T T C T A C A C G C T T G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.63),1.0,-5.66,C A G A T A A A G G C G G C C C C T C C A C C G G G C G C T C C T A G C G G T C T C C C G G A C C C T G C C G C C C T G C C A C T A T G T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.24),1.0,-186.36,A G C T A T A A T A T A G T G C T A A A G G C A G C C T C T C T C A C A A G C T C T C C A G G C T T G C T A C C A T T T A A A A T C A G A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.91),1.0,-7.87,A T T A T A A G A T G C T C T G A A A A C T C T T C A G A C A C T G A G G G G C A C C A G A G G A G C A G A C T A C A A G A A T G G C A C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.27),1.0,-271.88,G T G G T A T A T A G T G G T A T T A C A C T G G T G G T A G G G A A A G T C T A C A G G A A A T G G C A T A G G A T A C A C T C A A G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.96),1.0,-2.24,A G T A T T T A A G A C C G G G C G G A G T T G G A G G T G G C C A A G G G C A G A A T G A G C G G G A T T C A G G G C A C C A G G A C C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.18),1.0,2.32,T G G A A T A A A G G G A C G C G G G G C G C C G G A G G C T G C A C A G A A G C G A G T C C G A C T G T G C T C G C T G C T C A G C G C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.53),1.0,-2.16,T T T T A T T T A A G G T C C C C A G C A G G C C C C A C C A C C A C G G C T G C C C A A C C C G G T C C C A G C C A T G T C C G T G A G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.93),1.0,-1.03,G T T A C A A A A A G A G A T A G G G T C C C A G A C C G C A C C G A A G C T T C A T G A G G C C G C T C C T C A T C T G T G C A C G G A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.76),1.0,-48.85,C T T T A T A A A C C A C A A C T C T G G G C C C G C A A T G G C A G T C C A C T G C C T T G C T G C A G T C A C A G A A T G G A A A T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.82),1.0,0.31,C G C G G A T A A A A G G C G C G G A G T G T C C C C G A G G T C A G C G A G T G C G C G C T C C T C C T C G C C C G C C G C T A G G T C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.87),1.0,-1.02,A C T T T A A A T A G A C A G G T C T G A G T G C C T G A A C T T G C C T T T T C A T T T T A C T T C A T C C T C C A A G G A G T T C A A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.30),1.0,-5.06,T T T T T A T A G C A G T C C T T T C T C T C T G A A A A T C T C A G G T T A C T T G A C T G G G A G T T C T C A G A C C T C C A G T T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.08),1.0,-4.0,A G G T T A A A T A G G T C A C T G C C A C C C T C G A C T C T C A G C A G G G T G T C T C C C T G A G C A G A G G G A C C T G C A C A C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.97),1.0,-0.85,G T T T T T A A G T C C G C C C C C A G T C A G T C C C C A C T C A G T C T T C G C A G C A G C T C T C A T C C T C C A C T T G G C C T C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (11.20),1.0,-42.75,A T T T T A T A T A A A G G G C A C A A A A T G A A T A C T G T T A T T G A T G A A A A T T G G G T T A C C C T G A G G A C T C T T G C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.44),1.0,-0.68,G C A A G A T A A A A G G A T T G C A G C T G A A C A G G G T G G A G G G A G C A T T G G A A T G G C A C T C A G G G C A A A G G C A G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.84),1.0,1.76,A G A G A A T A A A A G C A G G C T G C C C G A G C C A G C A T T G G C A A C T C G C T C G G G T C C C C T T C C A C G C T G T G G G A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (6.61),1.0,-16.8,A A A T A A A A C C T C G G G C G G C G G C G G C T G G T G G G A A G A C T T G A A C T T G A A T C T C G A A C C A C T G C A T C T C C G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.46),1.0,-3.7,G G C T T T A A A G T C T C T G C C A G G A T C C A T G C T C A C A T G T T A C T T C C T G T A T G G A G G C A T G G C C A G T T T C C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.28),1.0,-13.29,A C A A A A T A A A T A T C T G T G C A A T A T A T C T G C T T T A T G C A C T C A A G C A G A G A A G A A A T C C A C A A A G A C T C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.71),1.0,-7.46,G G T A T A A A T G C C A C C T C C C G C T G G C C G A G C T T C A C G G C A C T C G C A G G G G C T G G T G T C A C T G G T A A G A T T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.01),1.0,2.33,G G A T A A A G G G T G C G G G G C T G C T G G C G G C T C T G C A G A G T C G A G A G T G G G A G A A G A G C G G A G C G T G T G A G C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.86),1.0,-0.06,G C T A T A A G A G G A G C C G G G C G G G C A C G G A G G G G C A G A G A C C C C G G A G C C C C A G C C C C A C C A T G A C C C T C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.44),1.0,0.22,C T A T A A A T C C T G G G G C C A T C A G G C T A G G G T C C T G C A G C T G C C T G A A G G A G C C A T C T C A T C C A C A G C T C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.60),1.0,-8.73,C T A T A A A G C G G A C T G T G G C C C T G G G G T T C A T C C G A T T G A G T C A C C A G T C A C T G G A G C T T T A C A G G A G A G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.31),1.0,-9.21,G T A T A A A T A C T T C T T G G C T G C C A G T G T G T T C A T A A C T T T G T A G C G A G T C G A A A A C T G A G G C T C C G G C C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.88),1.0,1.07,C T C T T T A T A A G T T C A T C C T C C A G C T G G A A A G G A A G T C A G C G G C G A C G C G C A G G G C C T G C G G C C G A G T C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.03),1.0,-3.93,A G G G T T A A A A G C C C G G G T A T G A T T C A C C A A G C G A C C T A T T C C C T G C C A C A T G A T C C T G G G G G C A G A T G G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.60),1.0,-36.73,T C A C T A T A T A G A G A G C T C A G T G A G C T G A T C G C G G A G A A G C C A C T T C T G C C A G C C C C G G C G C C T A T A A A T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.48),1.0,-31.67,A G A T T T A A A G C G C G G C T G C G C C C G G C T T C T G A C G T C C A T T G A A T C G C G C G G G C G G C C G G C G G C G A G C G C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.00),1.0,-28.23,T A T A A A A G G C C T G G G T G G G G C G G G C G C G G C G G C A G G A C A G C C G A G T T C A G G T G A G C G G T T G C T C G T C G T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (0.90),1.0,-18.64,C T A C A A A A C G G C T T C T T A A A G G A A G C G G C C C T C A G A C A G G A C A G A G T T G A G G G A A A G G A C A G A G G T T A T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.08),1.0,-2.82,T G T A T A A A T A A A C G C G C T G C T A C C G C T G C T G G C G A G C T G T G C C C C A C G C T C C C G C T G C A A C A G T C C C G G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.18),1.0,-143.94,G G G T A T A T A A G C A A G G C T C C C C T C C C G C C A T T T G A T T T A A A A G A A T G A A A A G G C G G A T C T G G T C A T T C G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.90),1.0,-10.22,T T T A A A A A A A A A A C T C C G G C A A C G T T G G C C T T T G G G A G G C C G C G T A T T G T T A C C T A G G G G T G G T T T C T T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (10.41),1.0,2.84,G C T T C A A A A A C A T G T G A G G A G G G A A G A G T G T G C A G A C G G A A C T T C A G C C G C T G C C T C T G T T C T C A G C G T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.01),1.0,2.02,G T A T A T A G C A G G C T G C C A G C G A C T C C T G C T C T T G C T T C T G G A T C T G C A G G G C A G T C C C A G C A G G A C C C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.06),1.0,-6.44,A C C T A T A T A A G G C T C A C A G T C T G C G C T C C T G G T A C A C G C G C T T C A A C T T C G G T T G G T G T G T G T C G A A G A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.56),1.0,-36.24,A G A T T A T A T A T A A G A G G A C C C A G G A G T T A C A T C A G G A C T C A G G A A G A G A T A G A C C C A T A A T G A T G C T G C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.00),1.0,1.98,C C G G A C A A A A G T C C C G C C T G C C C A C G G C T T T T T G C C C G C C G C T C G T G A C C G A G A C G C C T C G C C G C G G C C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.52),1.0,3.91,T G G T T A A A A A G G C G G C T G G G C G G A G C T G C G G G C C T C C G A C G G C G G G A G G A C A G C G C T G C G A G G A G G C G C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.03),1.0,-2.32,G C T A T A T A T A G A G G C G G C G G C G G C A G C C G C G G C G A C G G C G G T C C G G T G C G A G G C A G A G T G C T A G C G G G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (10.59),1.0,-48.4,T T T T A A A T C A C A G A T G A C T G C C C C T G T G C C A A C G A A T T C T T C T C C A G C C A C C C A T C T A G C G T T A T G C T C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (1.94),1.0,-31.67,T G T A T A A A T G C G G T G G C G C C C G G C G T A G G G A C A C T T C G G T C C T G A G C G C T T G G G A G T T A G G T T G T T T G C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.72),1.0,-14.05,G G G T A T A C A T A G G G A G G G C A A G A G C T C C G G G C C A C T G A G A A G A T T C A A A A C C T A C A A A A G C C T G C C G C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (6.96),1.0,-100.69,T T T T T A A A A G C C A T T T T A C T G G T T T G G C A T G C G G T A T G G A A A T T C T A A G A G A G A A A G T T T T A A G G C A A T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.36),1.0,2.09,C A G G C A T A A A A A G G C C A G G C C G G A G A G A C C G C C A C C A G T C A C G G A C C C T G G A C C C A G C G C A C C C G C A C C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.14),1.0,-3.23,G A A T A T A A A C A G G C A G A A A A A T G T G A A A A G G C T G G A C T A G C C T A G C C T C C C A G A C T C C A T C T T T C T C C C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.63),1.0,-3.79,A T T T A A T A A A T A A G C G C A A G C G C C A G C C G C T C C A C C A G T C C G A G G T C C A G A G G A G C A G A C A C A C A G A C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.90),1.0,-4.94,G C T A T A T A T C T G C C C A G C A G C G G T A C T C T C G G G A C A G A G A T G G C A C T G A T G C A G G A A C T G T A T A G C A C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (0.47),1.0,-5.54,C C C T T T A A G A G C A A C T G C T C T A A G C C A G G A G C C A G A G A T T C G A G C C G G C C T C G C C C A G C C A G C C C T C T C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.89),1.0,0.73,T T T A T A G G G C G C C G C G G C G G C G C T G C A G A G C C C A C A G C A G T C C G T G C C G C C G T C C C G C C C G C C A G C G C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.29),1.0,-3.4,C C T A T A A A A A G G A G A G A C A A C A G C T C A T A C C C C A G A A G G A G G C C A G G A G T T G T G A G T T T C C A A G C C C C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.12),1.0,-4.97,G C T A T A A A A G A A G C T C A T T C C A G T G T A T T C T C C C T G C A G C C T G C T C C T T G C C A T C C A C C A C A A A C C C T C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.99),1.0,-30.42,C T A C G C A T A A A A A C T G G A G G G A C T C G T C C C A G A G T T T G C T G C C G C C G C C G C C G C T G C C A T T A G A G C G G T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.71),1.0,-8.54,A A A T A A A T A G T T C C C C T C T C A A A A C T T T C A G T T A A G A C T G A A A A C G C A A C T G G G C C T G A A G A A C T T G G A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.13),1.0,-4.58,C C C T A T A T A A G T C C C T C A C C T T A A A C A C C C T G G G C T G C C T C C T C T G A C T G T T A T G C A G C A G G C C G G C A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.09),1.0,-5.27,A A C T T T A A G A A C T T G T G T T T G T C A A G C A T T T T C A A T T G T A T T T C C G T T C A T T T A C A A G T T A T T T T C T C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.84),1.0,1.52,G A C A C A T A A A G G C C C T G G G G T C A G A C C T G G A C C G A C T T C C T G C C T C A T C A C C C A C C T G G C T T G T G A C T G T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.65),1.0,-2.62,T G A A T A T A A G G G C A G G C C A G A C C C T G G C T C T T G A G C A G T C G T G C A T T C C C A G C C T C G C C T C G G G T G T A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.59),1.0,0.26,G G G C T T A A A T A G G A G C T C C G G G C T C T G G C T G G G A C C C G A C C G C T G C C G G C C G C G C T C C C G C T G C T C C T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.85),1.0,-28.95,T T T A A A G G T T G G T C T G A A C A G T G A G G C A C T C C C A T A C A A A G G A A C T C C A T C A T A C C G G A T G C C A C C G T T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.50),1.0,-28.82,G C T T A T T T A T A G C A A G T T G T T T G G C T C T T T A A T A C C A C C C C A T T T C C T C A A A G C T C A G C T T T C T G T T T A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.57),1.0,2.78,G T T T T A A A A G G C A G T G G T A G G G T A G A G G G T G G C A G T G G A G A C A G C T G G A G A G C A C T G A G G A C G C G G A C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.16),1.0,-18.94,C T A G T T A A A A G T A A G G G G G A A A A G A G T A A A C G C G C G A C T C C A G C G C G C G G C T A C C T A C G C T T G G T G C T T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.63),1.0,1.99,C G A G C T T A T A A A G G C C C G C G G G C G G A G G A G G G C G G G A G C C G C C G T C C G G C C C A G C T C G G C C C C A G T G A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.62),1.0,-0.11,G C C A G A T A A A A G C A G A G C A G G A C C T G G A A A G C T G G T T T G T A T G G G C T G C A G C C T G C C G C T G A G C T G C A T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.33),1.0,0.87,G G C T A T A A A G A A C A T C T C G A G A G G A G C C A G C A C A G C C T T G T T C A G A C G C C C A G T G A C C T G C C G A G G T C G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.40),1.0,-30.35,C A C T C T A A A T A G A A T G T A T T G T A A T C T T T G C T C A G T C C A A C G T G G T C C C T T C A C C C G G G C T C C G C T C T T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.14),1.0,-10.7,T G T A T A T A A G T A T G A G A A A G A A A C A G T G A C T G G A A A A G A A T T A T C A C A T T G C A C A A G G A T G G C T C T G A A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.47),1.0,-6.62,T C T A T A T A T A A G C G C G G G G A G C C T G C G T C C T T T C C C T G G T G T G A T T C C G T C C T G C G C G G T T G T T C T C T G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.05),1.0,-19.0,G C T T T A T A A A G C A A G C T A G T G G C T G C C C T T T C C A A T A C C T C A C T C A G C A C A C C G T C T G T C A C C C A A A C A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.60),1.0,2.01,A G A T T T A A A G G C T G C T G G A G T G A G G G G T C G C C C G T G C A C C C T G T C C C A G C C G T C C T G T C C T G G C T G C T C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.68),1.0,-6.54,G T A T A T A G T A A A G G C C G G G C G C C G C A C G C A G A C A C A C A C T C G C C A G G A C A C A G G C G C A C A C A C G C T C A C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.57),1.0,3.0,T C T T A T A A A A G A G G T C C C A G G G A A C T T C C C A G C C C C T C C C A C C A T G T G A A G A C A C A G C A A G A A G A C A G C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.55),1.0,0.83,G A T A T A A A G G G T C G G C C C C A C A T C C C A G G G A C C A G C G A G C G G C C T T G A G A G G C T C T G G C T C T T G C T T C T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.95),1.0,-14.94,G T T A T A A A T A G T T T C A G G A G G A A C C T G C T G G T C A G A C T T T G C T C A G C C G A T T T C A C G C A C C T T A C T C A G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.59),1.0,-19.98,T G C T A T T T A T A C T T T G A G T T A C T C T T C A A A A G T G G T A T A C C T C T A G T T T G G A G C T G T G C T G T A A A A A C A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.27),1.0,-2.47,T T T T A T A A G T G T T T G G T A G T T C C T C C T G T G T T C G T T C T C T T T C T T G C T G C C T T G T G A A G A A G G T G C C T T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.91),1.0,0.47,C G A A T A A A G C C C G G G C G G C G C C C C G C G C C C C T C G C G G A A G C C C A C A C T C C G C G C G A C T C C A G G C G C A C G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.94),1.0,0.86,C A A A A A A A G C T C T G T G C T G G C T G G A G C C C C C T C A G T G T G C A G G C T T A G A G G G A C T A G G C T G G G T G T G G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.33),1.0,2.26,C A T A A A T G T T C C C A C C C C T G G G C A A G G T G G C T C A C T C T G G C A G G T A G G A A C A G G G G A G A G T G C A C C T G C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.80),1.0,1.96,G G C C T A T A A A G C C C T G T G C C A A G G G G C A G A C T C A C T G G C T C A G A G G G A G G A C G C A C C C G C C A G C C A G C C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.84),1.0,3.72,A G G C T A C A A A G C C C G G G G G C G G T G C C G G G G C G C A G A G T C C C C G C A G C G C C G G T C G G G A G C G C A G C G C G G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.09),1.0,-3.86,C A T A A A A C C A G G G C C T T A T C C A G G G C C A C G C T T A C A G A A C T C C C A C G G A C A C A C C A T G A T A A G G A C G C T G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.17),1.0,-9.21,C C A G A A T A A A G A C C G A G G T C T T C G C T G T G C T C T A T C G G C C T C T G C A T G A C C T G G C C C C T C T C A C A C C A C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (4.42),1.0,-12.39,G T A T A A A T T G G C T A G G A G G C A C C T G G G C G A G A C A A G G A T C G A G G A A G A C A T C A G G A T G T A C C A T C T G C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.21),1.0,-1.64,T T T A A G A G C A G C T G G A A T G C A G T T C C C C T G A T C A G C G T A G C C A G T T G T T G C C T G T C T G A A C C T C T G C C A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.71),1.0,-0.09,G A A C T A T A A A T G T G A G T T C C C T T T C T G A C C C C C A G G T T T G G C G A G G G C T C C T C C T G T T T G G G C A G C A G C T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (2.21),1.0,-10.53,T C C T T A A A G A C A A G C C C C T T A G C A A A C T C T C A C A T C T T C T G G T C C C C C A A A C A T A C A A C T T C A C C T G T T A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.25),1.0,-1.6,C T A T A A A G G C A G C G G A A G G G T A G T C C G C G G G G C A T T C C G G G C G G G G C G C G A G C A G A G A C A G G T G A G T T C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (5.74),1.0,-1.61,G T T A C A A A A A G A G C T A G G G T C C C A G G C C C A G C C G A A G C T T C A G G A G A C C T A T C C T C A T C T G T G C A A G G A G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.76),1.0,-2.56,A G G T A T A A A A G C T C A G G C T G G C A G T C C T T G A A C A C T G G T G C T G A A C C C C G C C G C G C G C G C T T T G A A T T T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.75),1.0,-4.94,C A T A T T T A T T G A G T T G A G C T G C A T G T C T G G C C T G A G T C A C C C T C A G G C C T G A C T T C A G A T G C T G T A A G T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.46),1.0,-70.75,A G T A T A T A A A G G G G T C G G C C A C A G A C G G G C A A A A G G A A C A G A T T C A C C G C T T C G G A G T G A A T T C C A T G A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.38),1.0,1.23,C C G T T A A A A G G G G A A G C C C G T G C C C C A T C A G G T C C G C T C T T G C T G A G C C C A G A G C C A T C C C G C G C T C T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.05),1.0,0.1,T G A T A T A A A G C T C C T A C A G C T A C C T G G C C T G A G A A G C C A A C T C A G A C T C A G C C A A C A G G T A A G T G G G C A T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (7.13),1.0,-78.26,G G G A A T A A A A C G T G C C G G C G A G G C T C A G G A G T C A T T G G C C A C A G A G A C C C A G C C C G A G T T T C C C A T C G C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (6.36),1.0,2.05,C A G G A T A A A T G C A C A G C T G C C T G C T G G T C T G G G A T C C C C G C C T C A G G C T C T C A A C C T C C T C T C C T G C A G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.60),1.0,0.45,G A G G T T A A A T A G A A A G A C C A G A G C A A C C C G A G A G A G C C T G A G A G C A C T G G G T G A T G A G A A G A C C T G C A G G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (8.54),1.0,-106.37,G G T A T G A A T A A G G A G A A G G C G G G A G A G A A G G G G A A G G A A A G C C G G A G G G G A A A G A A G G T G T G T T A C G T C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (6.83),1.0,-33.66,G C T A C A T A A A A G G C A G C T G T A G A A C A T C T G G G A A G G T C A A T G A T A G C A T C T G C C T A G A G T C A A A C C T C C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.78),1.0,-0.9,C A T A T A A A C G G G T C T C C A G G G C C T G G A G G G A C T G C A C A T C C T G G G C C T G C G G C G C A G T G T A G A C C T G G G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (5.76),1.0,-88.14,C T T A T A T A A A A G G C C T A C A G A G G T G C A A G T A G T G A A C G C C T G A C G C C C C G A C C A C T G T G C T C T C C A T T C G
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (3.22),1.0,-9.44,A A T A A A T A T A C A G T G C T T G T C C T T A G C C T T T C T G T G G G C A T A C C A G T G T C A G C T G C A C T T G T A G G G G C C C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.95),1.0,-8.9,G T T T A T A A G G G G A G C T G T G A C A A T C T T C T T G C C A G C C C T C T T C C T C C C A C T C G G C T C C T C T T A C G G A G T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (7.51),1.0,1.76,A C A C T T T A A A A G G A T G C T T G T C C A G A G A T T T C T C T C T C C T T C A G T G A G G A C C C A C T G G A T T T G T G G C T G C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (8.58),1.0,1.6,A G T G T A T A A A A G G G G A A G G G C T A A G G G A G C C A C A G A A C C T C A G T G G A T C T C A G A G A G A G C C C C A G A C T G A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.41),1.0,-2.86,G A G T A T A A A G A G A G C T G G G G G G T C A C A C T C C T G A A A T T G C T G T G A C C G C A G C A T C T C T A G G T A A G T G C T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (9.79),1.0,2.01,G A G T A G A A A A G C A G C C G C A G G C T G T G G C G C T C C A C C A C G C C G T C C G G G T G G G C C T A G C A G T C G C T C C A T T
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (4.24),1.0,-2.28,T T T C T A C A T A A A C T G G C T C T A A C T T T C A G G T G T G T C T T C A C A T G C C C C G T T T G C T G C C T G A A C C T C T C C A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (2.15),1.0,-2.98,T G G T A T A A G A G G C A G C C A G G G C A C C G A G G C A A T G A G C T A T C T G C T C A G C T T A A T A G C A G G A C G C T G G C A A
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,0 (9.92),1.0,-26.12,G C A G A T A A A A C G G T C A T T T A T G G T A C C C A C C C T A A C A G A T T A G T C A G T C A G A C C C A G G A G G A G T G A G G T C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.34),1.0,-4.31,C T T A T A A A A G A G G C T T C A C A A G A C A T T T G G A C T A T T T C A C C C T T C C A T T C A T T C C A C C A T G T G A G G A C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (3.95),1.0,-2.49,C T T A T A A A A A G G T T C T C T G G C T G G A C G C T G C T C A A T C C A C T G C C T A G C A G G T G G C C C A T T C C A G T T G G A G
,,,,


In [161]:
def sum_of_windows_np(arr):
    # Создаем 2D массив с окнами размером 7xN, где каждый столбец - это окно
    windows = np.lib.stride_tricks.sliding_window_view(arr[0], window_shape=7)
    
    # Шаг 2: суммируем каждое окно по строкам (оси 1) и возвращаем результат
    window_sums = np.sum(windows[::1, :], axis=1)
    
    return window_sums
    
def top_attr_score(all_scores, all_seqs, k, n_top):

    counts = defaultdict(int)
    score_sums = defaultdict(float)
    
    for scores, seq in zip(all_scores, all_seqs):
        sums = sum_of_windows_np(scores)
        for i, score in enumerate(sums):
            kmer = seq[i:i+k]
            counts[kmer] += 1
            score_sums[kmer] += score
            
    importances = defaultdict(float)
    for key in counts:
        importances[key] = score_sums[key] / counts[key]
        
    result = [(k, v, counts[k]) for k, v in sorted(importances.items(), key=lambda item: item[1])][-n_top:][::-1]
    # print(f'Top-{n_top} most important {k}mers:')
    return result

In [129]:
tp_seqs = []
for seq in seqs:
    tp_seqs.append(seq['sequence'])

In [90]:
mer3 = top_attr_score(lrp_scores[:100], tp_seqs[:100], 3, 5)
mer2 = top_attr_score(lrp_scores[:100], tp_seqs[:100], 2, 5)
mer1 = top_attr_score(lrp_scores[:100], tp_seqs[:100], 1, 5)
top = mer3 + mer2 + mer1
tmp = sorted(top, key=lambda x: -x[1])

kmer, score, cnt = [], [], []
for k, s, c in tmp:
    kmer.append(k)
    score.append(s)
    cnt.append(c)




In [157]:
import pandas as pd
pd.DataFrame({'kmer': kmer, 'Score': score, 'Count': cnt})

Unnamed: 0,kmer,Score,Count
0,GCC,0.080427,184
1,GAG,0.019399,168
2,CTG,-0.045958,175
3,TGC,-0.094487,136
4,CGG,-0.12844,121
5,GC,-0.278866,597
6,GG,-0.479605,551
7,CC,-0.568345,543
8,CA,-0.725476,401
9,AG,-0.758879,543


## SHAP

In [None]:
import shap
import transformers

classifier = transformers.pipeline("text-classification", top_k=None, model=student, tokenizer=tokenizer)

In [49]:
explainer = shap.Explainer(classifier, tokenizer)

In [143]:
shap_values = explainer(tp_seqs[:100])

PartitionExplainer explainer: 101it [02:10,  1.41s/it]                          


In [144]:
# model scores for TP sequences
scores = []
for val in shap_values:
    scores.append(val.values[:, 1])

In [154]:
mer3 = top_attr_score(scores, tp_seqs[:100], 3, 5)
mer2 = top_attr_score(scores, tp_seqs[:100], 2, 5)
mer1 = top_attr_score(scores, tp_seqs[:100], 1, 5)
top = mer3 + mer2 + mer1
tmp = sorted(top, key=lambda x: -x[1])

kmer, score, cnt = [], [], []
for k, s, c in tmp:
    kmer.append(k)
    score.append(s)
    cnt.append(c)




In [155]:
import pandas as pd
pd.DataFrame({'kmer': kmer, 'Score': score, 'Count': cnt}) # 100 from test

Unnamed: 0,kmer,Score,Count
0,TTC,2.948153e-08,90
1,GTT,2.786983e-08,80
2,CGT,2.506472e-08,46
3,CCG,2.206902e-08,112
4,CGG,1.40077e-08,123
5,CG,1.009704e-08,322
6,CC,5.040594e-09,553
7,TC,3.203104e-10,346
8,TT,2.908715e-10,349
9,GG,-7.183778e-09,562


In [54]:
shap.plots.text(shap_values)

## Integrated Gradients

In [110]:
from transformers_interpret import SequenceClassificationExplainer

cls_explainer = SequenceClassificationExplainer(model=student, tokenizer=tokenizer_trunc)

cls_explainer(text=tokenized_dataset["test"][0]['sequence'])

[('G', 0.0020596207483561993),
 ('C', -0.0003802532383775007),
 ('A', -0.00440157753961326),
 ('A', -0.0029835699811859373),
 ('T', -0.015863297004601894),
 ('A', -0.0008023940074261023),
 ('A', 0.009594203966061544),
 ('A', 0.006107401986125816),
 ('A', -0.010224080398650512),
 ('G', 0.037114302781969664),
 ('G', 0.004887322874535199),
 ('C', 0.019753670172447738),
 ('T', 0.0006453859658875439),
 ('T', 3.0590645925541204e-06),
 ('A', -0.00893307328550077),
 ('G', 0.019276701923606462),
 ('C', -0.000810499025868952),
 ('C', 0.020880984279230262),
 ('A', 0.005710285685256842),
 ('C', 0.010734844904841813),
 ('A', -0.039340951830356685),
 ('T', -0.04388842041396035),
 ('A', -0.03542239763252787),
 ('G', 0.05078919719444281),
 ('T', 0.011144081666473757),
 ('G', 0.02226013829873419),
 ('C', 0.029932931209744426),
 ('A', -0.007395757142623885),
 ('T', -0.0036078280364048045),
 ('G', -0.007698166666664917),
 ('C', -0.013118873566202479),
 ('A', -0.006963222186675395),
 ('G', -0.005301528369

In [111]:
cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,NEGATIVE (0.78),NEGATIVE,1.58,G C A A T A A A A G G C T T A G C C A C A T A G T G C A T G C A G A G C T C T G A C T T C T A G G G C A A A A C A T A A G C A T G T A C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,NEGATIVE (0.78),NEGATIVE,1.58,G C A A T A A A A G G C T T A G C C A C A T A G T G C A T G C A G A G C T C T G A C T T C T A G G G C A A A A C A T A A G C A T G T A C A C
,,,,


In [159]:
raw_scores = []
i = 0
for data in tqdm(seqs):
    if i == 100:
        break
    i += 1
    
    expl_seq = cls_explainer(text=data['sequence'])
    seq, attr_score = zip(*expl_seq)
    raw_scores.append([attr_score])

 33%|█████████████▋                           | 100/300 [00:04<00:08, 24.89it/s]


In [162]:
mer3 = top_attr_score(raw_scores, tp_seqs[:100], 3, 5)
mer2 = top_attr_score(raw_scores, tp_seqs[:100], 2, 5)
mer1 = top_attr_score(raw_scores, tp_seqs[:100], 1, 5)

top = mer3 + mer2 + mer1
tmp = sorted(top, key=lambda x: -x[1])

kmer, score, cnt = [], [], []
for k, s, c in tmp:
    kmer.append(k)
    score.append(s)
    cnt.append(c)

Top-5 most important 3mers:
Top-5 most important 2mers:
Top-5 most important 1mers:


In [163]:
import pandas as pd
pd.DataFrame({'kmer': kmer, 'Score': score, 'Count': cnt})

Unnamed: 0,kmer,Score,Count
0,CAT,0.099243,65
1,TCG,0.087065,41
2,ATG,0.069443,54
3,GGA,0.068183,133
4,ATT,0.065082,57
5,CC,0.050274,543
6,GA,0.042288,381
7,GG,0.038817,551
8,G,0.030486,1805
9,C,0.028874,1719


# Interpretation of teacher

## Integrated Gradients

In [108]:
from typing import List, Optional, Dict, Sequence, Tuple

from transformers import PreTrainedTokenizer
from transformers_interpret import SequenceClassificationExplainer
from tqdm import tqdm

class CaduceusTokenizerTrunc(PreTrainedTokenizer): # dummy class that removes SEP token from tokenized sequence 
    model_input_names = ["input_ids"]

    def __init__(self,
                 model_max_length: int,
                 characters: Sequence[str] = ("A", "C", "G", "T", "N"),
                 complement_map=None,
                 bos_token="[BOS]",
                 eos_token="[SEP]",
                 sep_token="[SEP]",
                 cls_token="[CLS]",
                 pad_token="[PAD]",
                 mask_token="[MASK]",
                 unk_token="[UNK]",
                 **kwargs):
        """Character tokenizer for Hugging Face transformers.

        Adapted from https://huggingface.co/LongSafari/hyenadna-tiny-1k-seqlen-hf/blob/main/tokenization_hyena.py
        Args:
            model_max_length (int): Model maximum sequence length.
            characters (Sequence[str]): List of desired characters. Any character which
                is not included in this list will be replaced by a special token called
                [UNK] with id=6. Following is a list of the special tokens with
                their corresponding ids:
                    "[CLS]": 0
                    "[SEP]": 1
                    "[BOS]": 2
                    "[MASK]": 3
                    "[PAD]": 4
                    "[RESERVED]": 5
                    "[UNK]": 6
                an id (starting at 7) will be assigned to each character.
            complement_map (Optional[Dict[str, str]]): Dictionary with string complements for each character.
        """
        if complement_map is None:
            complement_map = {"A": "T", "C": "G", "G": "C", "T": "A", "N": "N"}
        self.characters = characters
        self.model_max_length = model_max_length

        self._vocab_str_to_int = {
            "[CLS]": 0,
            "[SEP]": 1,
            "[BOS]": 2,
            "[MASK]": 3,
            "[PAD]": 4,
            "[RESERVED]": 5,
            "[UNK]": 6,
            **{ch: i + 7 for i, ch in enumerate(self.characters)},
        }
        self._vocab_int_to_str = {v: k for k, v in self._vocab_str_to_int.items()}
        add_prefix_space = kwargs.pop("add_prefix_space", False)
        padding_side = kwargs.pop("padding_side", "left")

        self._complement_map = {}
        for k, v in self._vocab_str_to_int.items():
            complement_id = self._vocab_str_to_int[complement_map[k]] if k in complement_map.keys() else v
            self._complement_map[self._vocab_str_to_int[k]] = complement_id

        super().__init__(
            bos_token=bos_token,
            eos_token=eos_token,
            sep_token=sep_token,
            cls_token=cls_token,
            pad_token=pad_token,
            mask_token=mask_token,
            unk_token=unk_token,
            add_prefix_space=add_prefix_space,
            model_max_length=model_max_length,
            padding_side=padding_side,
            **kwargs,
        )

    @property
    def vocab_size(self) -> int:
        return len(self._vocab_str_to_int)

    @property
    def complement_map(self) -> Dict[int, int]:
        return self._complement_map

    def _tokenize(self, text: str, **kwargs) -> List[str]:
        return list(text.upper())  # Convert all base pairs to uppercase

    def _convert_token_to_id(self, token: str) -> int:
        return self._vocab_str_to_int.get(token, self._vocab_str_to_int["[UNK]"])

    def _convert_id_to_token(self, index: int) -> str:
        return self._vocab_int_to_str[index]

    def convert_tokens_to_string(self, tokens):
        return "".join(tokens)  # Note: this operation has lost info about which base pairs were originally lowercase

    def get_special_tokens_mask(
        self,
        token_ids_0: List[int],
        token_ids_1: Optional[List[int]] = None,
        already_has_special_tokens: bool = False,
    ) -> List[int]:
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0,
                token_ids_1=token_ids_1,
                already_has_special_tokens=True,
            )

        result = ([0] * len(token_ids_0)) + [1]
        if token_ids_1 is not None:
            result += ([0] * len(token_ids_1)) + [1]
        return result

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        sep = [self.sep_token_id]
        # cls = [self.cls_token_id]
        result = token_ids_0# + sep
        if token_ids_1 is not None:
            result += token_ids_1 + sep
        return result

    def get_vocab(self) -> Dict[str, int]:
        return self._vocab_str_to_int

    # Fixed vocabulary with no vocab file
    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple:
        return ()

In [109]:
tokenizer_trunc = CaduceusTokenizerTrunc(
    model_max_length=131072,
    complement_map = tokenizer.complement_map
)

In [43]:
from transformers_interpret import SequenceClassificationExplainer

cls_explainer = SequenceClassificationExplainer(model=teacher, tokenizer=tokenizer_trunc)

cls_explainer(text=tokenized_dataset["test"][0]['sequence'])

[('G', -0.02272874780077807),
 ('C', 0.020332691117615546),
 ('A', -0.019477094306320768),
 ('A', 0.09589136748433773),
 ('T', -0.0599355409352968),
 ('A', 0.05281295133943184),
 ('A', 0.03305630671031337),
 ('A', 0.04089035775597921),
 ('A', 0.09401061757626873),
 ('G', -0.01581770532093722),
 ('G', 0.07625482445372932),
 ('C', -0.2922637637939151),
 ('T', -0.01730136130445904),
 ('T', 0.19015506699283385),
 ('A', 0.0880629834694845),
 ('G', 0.03751968139555241),
 ('C', -0.14580838708043123),
 ('C', -0.2664387000980375),
 ('A', -0.001787385286473923),
 ('C', -0.27507493490571505),
 ('A', 0.10232435821865996),
 ('T', 0.22811316558358752),
 ('A', 0.10842382930436646),
 ('G', -0.14053195709265),
 ('T', -0.000706333359591897),
 ('G', -0.1742380503082306),
 ('C', -0.23884437465317537),
 ('A', 0.11248097342024713),
 ('T', -0.003283601215186013),
 ('G', -0.01684023465526359),
 ('C', -0.2883384708030539),
 ('A', 0.08469908272924048),
 ('G', -0.05970732971595813),
 ('A', -0.0020430762832530825

In [44]:
cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.54),LABEL_0,0.24,G C A A T A A A A G G C T T A G C C A C A T A G T G C A T G C A G A G C T C T G A C T T C T A G G G C A A A A C A T A A G C A T G T A C A C
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.54),LABEL_0,0.24,G C A A T A A A A G G C T T A G C C A C A T A G T G C A T G C A G A G C T C T G A C T T C T A G G G C A A A A C A T A A G C A T G T A C A C
,,,,


In [63]:
import numpy as np
npzfile = np.load('caduceus/test_attr_scores_new1.npz')

labels=npzfile['label']
preds=npzfile['pred']
raw_scores=npzfile['raw_scores']

In [69]:
seqs = []
i = 0
for data in tqdm(tokenized_dataset["test"]):
    if i==3777:
        break
    seqs += [data['sequence']]
    i += 1

 64%|██████████████████████▉             | 3777/5920 [00:00<00:00, 25663.60it/s]


In [70]:
tp_seqs = np.array(seqs)[(np.array(labels)==1) & (np.array(preds)==1)]
tp_scores = np.array(raw_scores)[(np.array(labels)==1) & (np.array(preds)==1)]

In [73]:
tp_seqs

array(['GGCTATTTAAGTCAAGGGCCGGCTGGCAACCCCAGCAAGCTGTCCTGTGAGCCGCCAGCATGGATGACAT',
       'GATTACAAATGAGACTGGGAAACCCTCTTCAATAAGACCTGTGTGATGATAGATTGTGTCCTGAGCCCGC',
       'GAGTATATAAGTTCCAGTACCAGCAACAGCAGCAGAAGAAACAACATCTGTTTCAGGGCCATTGGACTCT',
       ...,
       'AGGTGGGACGGCGGCGCGCTGAGGAGGCGGTGCGCTCAGGCGGCGCTCCCGGAGATGCCCCGCGGCAGCC',
       'TTCACGTTGTCCCGTTGAACGTCACTTCCGCTTAGGGGCGGACACGTCCCAAACCTGTGGCGGGAGTTGG',
       'CCCCCGACCCCGCCTCCTCTCCCCCCACCCCTCCTTTCCTCTCCCTCCCCCGCCGAGGCCTGGCTGCCCC'],
      dtype='<U70')

In [181]:
from collections import defaultdict
import numpy as np

def sum_of_windows_np(arr, k):
    # Создаем 2D массив с окнами размером 5xN, где каждый столбец - это окно
    windows = np.lib.stride_tricks.sliding_window_view(arr, window_shape=k)
    
    # Шаг 2: mean каждое окно по строкам (оси 1) и возвращаем результат
    window_sums = np.mean(windows[::1, :], axis=1)
    
    return window_sums
    
def top_attr_score(all_scores, all_seqs, k, n_top):

    counts = defaultdict(int)
    score_sums = defaultdict(float)
    
    for scores, seq in zip(all_scores, all_seqs):
        sums = sum_of_windows_np(scores, k)
        for i, score in enumerate(sums):
            kmer = seq[i:i+k]
            counts[kmer] += 1
            score_sums[kmer] += score
            
    importances = defaultdict(float)
    for key in counts:
        importances[key] = score_sums[key] / counts[key]
        
    result = [(k, v, counts[k]) for k, v in sorted(importances.items(), key=lambda item: item[1])][-n_top:][::-1]
    print(f'Top-{n_top} most important {k}mers:')
    #print(result[0])
    return result

In [111]:
mer3 = top_attr_score(tp_scores, tp_seqs, 3, 5)
mer2 = top_attr_score(tp_scores, tp_seqs, 2, 5)
mer1 = top_attr_score(tp_scores, tp_seqs, 1, 5)

Top-5 most important 3mers:
Top-5 most important 2mers:
Top-5 most important 1mers:


In [112]:
top = mer3 + mer2 + mer1
tmp = sorted(top, key=lambda x: -x[1])

kmer, score, cnt = [], [], []
for k, s, c in tmp:
    kmer.append(k)
    score.append(s)
    cnt.append(c)

In [113]:
import pandas as pd
pd.DataFrame({'kmer': kmer, 'Score': score, 'Count': cnt})

Unnamed: 0,kmer,Score,Count
0,GCG,0.082989,3894
1,GC,0.079525,12850
2,CGC,0.077936,3462
3,CGG,0.072569,3498
4,GCC,0.071867,3958
5,GGC,0.071304,4241
6,G,0.069405,37695
7,CG,0.067275,8978
8,GG,0.057543,12398
9,C,0.057237,35549


In [175]:
# to match results with shap:

tmp_scores = np.vstack([tp_scores[:300], tp_scores[-200:]])
tmp_seqs = np.concatenate([tp_seqs[:300], tp_seqs[-200:]])

mer3 = top_attr_score(tmp_scores, tmp_seqs, 3, 5)
mer2 = top_attr_score(tmp_scores, tmp_seqs, 2, 5)
mer1 = top_attr_score(tmp_scores, tmp_seqs, 1, 5)

Top-5 most important 3mers:
Top-5 most important 2mers:
Top-5 most important 1mers:


In [176]:
top = mer3 + mer2 + mer1
tmp = sorted(top, key=lambda x: -x[1])

kmer, score, cnt = [], [], []
for k, s, c in tmp:
    kmer.append(k)
    score.append(s)
    cnt.append(c)

In [177]:
import pandas as pd
pd.DataFrame({'kmer': kmer, 'Score': score, 'Count': cnt})

Unnamed: 0,kmer,Score,Count
0,GCG,0.081281,875
1,CGC,0.074222,800
2,GC,0.071823,3535
3,CGG,0.064753,872
4,GCC,0.06393,1084
5,CG,0.062027,2153
6,CCG,0.061412,770
7,G,0.057628,10775
8,C,0.053403,10254
9,CC,0.048623,3215


## SHAP

In [114]:
import shap
import transformers

classifier = transformers.pipeline("text-classification", top_k=None, model=teacher, tokenizer=tokenizer_trunc) # or top_k=1

Device set to use cuda:0


In [115]:
explainer = shap.Explainer(classifier)

In [122]:
import numpy as np
np.savez('tp_seqs.npz', tp_seq=tp_seqs)

In [147]:
shap_values = explainer(tp_seqs[0:300])
shap_scores = shap_values[:, :, 'LABEL_1'].values[:,:70]

all_shap_scores = np.vstack([all_shap_scores, shap_scores])

PartitionExplainer explainer: 151it [05:09,  2.12s/it]                          


In [155]:
shap_values = explainer(tp_seqs[-200:])
shap_scores = shap_values[:, :, 'LABEL_1'].values[:,:70]

PartitionExplainer explainer: 201it [06:49,  2.09s/it]                          


In [156]:
top_attr_score(shap_scores, tp_seqs[-200:], 3, 8)

Top-8 most important 3mers:


[('GGG', 0.003356151090107915, 476),
 ('CGG', 0.0028650444784636777, 448),
 ('TTT', 0.002653445934672126, 170),
 ('CCG', 0.0023996710436010936, 409),
 ('CCC', 0.0023600335443187985, 432),
 ('GCC', 0.0016350185272009862, 495),
 ('GGC', 0.0015769195227130401, 527),
 ('GCG', 0.0014734388901906205, 442)]

In [157]:
all_shap_scores = np.vstack([all_shap_scores, shap_scores])
np.savez('tp_shap_scores.npz', scores=all_shap_scores)

In [182]:
mer3 = top_attr_score(all_shap_scores, tmp_seqs, 3, 5)
mer2 = top_attr_score(all_shap_scores, tmp_seqs, 2, 5)
mer1 = top_attr_score(all_shap_scores, tmp_seqs, 1, 5)

top = mer3 + mer2 + mer1
tmp = sorted(top, key=lambda x: -x[1])

kmer, score, cnt = [], [], []
for k, s, c in tmp:
    kmer.append(k)
    score.append(s)
    cnt.append(c)

Top-5 most important 3mers:
Top-5 most important 2mers:
Top-5 most important 1mers:


In [185]:
import pandas as pd
pd.DataFrame({'kmer': kmer, 'Score': score, 'Count': cnt})

Unnamed: 0,kmer,Score,Count
0,GGG,0.004029,1032
1,CGG,0.003227,872
2,GG,0.003215,3444
3,CCG,0.003078,770
4,TTT,0.00285,459
5,CCC,0.002093,933
6,CG,0.001814,2153
7,CC,0.001162,3215
8,G,0.000295,10775
9,TT,-0.00042,1525
