In [1]:
import torch
import numpy as np
import psutil
import logging
import time
import random
from helical.models.hyena_dna import HyenaDNA, HyenaDNAConfig

  from .autonotebook import tqdm as notebook_tqdm

2026-01-13 19:52:43,215 - INFO:datasets:PyTorch version 2.7.0 available.


In [2]:
# Download the model using the hyena_config
hyena_config = HyenaDNAConfig(
    model_name = "hyenadna-tiny-1k-seqlen-d256"
)
model = HyenaDNA(configurer=hyena_config)

2026-01-13 19:52:45,543 - INFO:helical.models.hyena_dna.pretrained_model:Loaded pretrained weights ok!
2026-01-13 19:52:45,545 - INFO:helical.models.hyena_dna.model:Model finished initializing.
2026-01-13 19:52:45,545 - INFO:helical.models.hyena_dna.model:'hyenadna-tiny-1k-seqlen-d256' model is in 'eval' mode, on device 'cpu'.


In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cpu'

In [4]:
# move model to use GPU if possible
if DEVICE == "cuda":
    model.model.to(DEVICE)

In [5]:
# Download the dataset
from datasets import load_dataset
label = "promoter_tata"

dataset = load_dataset("InstaDeepAI/nucleotide_transformer_downstream_tasks",  trust_remote_code=True).filter(lambda x: x["task"] == "promoter_tata")

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['sequence', 'name', 'label', 'task'],
        num_rows: 5509
    })
    test: Dataset({
        features: ['sequence', 'name', 'label', 'task'],
        num_rows: 621
    })
})

In [7]:
train_dataset = dataset["train"]
test_dataset = dataset["test"]

In [8]:
train_dataset.features

{'sequence': Value(dtype='string', id=None),
 'name': Value(dtype='string', id=None),
 'label': Value(dtype='int32', id=None),
 'task': Value(dtype='string', id=None)}

In [9]:
sequences = train_dataset['sequence']
sequences[0], len(sequences[0]), len(sequences)

('CGCTCCCCCAGGAGTGTACTCCTGGTCAAAAGAGCGACATCACACGACGTAGGCCCGCCCGGCTTATCGAAGTCGAGCTGGGATTTGGGGGGGAACCTGACAGTATAGGTTGGGGGCCAGGACATTTATAGAACAACGGGAAAGACCTGCGCCAGCAGCTGAGAAGGAGGCCCCGTGATCAGCTCCAGCCATTTGCCAGCAACCGAAGCCCAGGAGCTTACATAATTTGCCAGGGCAGCACTGAGAGGTGACAGTTAGAGTTAAGTCGCTCTCGGAGCTCCGGGCTACCAGCGATTCTCT',
 300,
 5509)

In [29]:
SAMPLE_SIZE = 10
sample_sequences = sequences[:SAMPLE_SIZE]
len(sample_sequences), sample_sequences[0]

(10,
 'CGCTCCCCCAGGAGTGTACTCCTGGTCAAAAGAGCGACATCACACGACGTAGGCCCGCCCGGCTTATCGAAGTCGAGCTGGGATTTGGGGGGGAACCTGACAGTATAGGTTGGGGGCCAGGACATTTATAGAACAACGGGAAAGACCTGCGCCAGCAGCTGAGAAGGAGGCCCCGTGATCAGCTCCAGCCATTTGCCAGCAACCGAAGCCCAGGAGCTTACATAATTTGCCAGGGCAGCACTGAGAGGTGACAGTTAGAGTTAAGTCGCTCTCGGAGCTCCGGGCTACCAGCGATTCTCT')

In [30]:
# for suppressing INFO printing on each iteration
logging.getLogger("helical.models.hyena_dna.model").setLevel(logging.WARNING)

In [31]:
from numpy.random.mtrand import sample
# Inference time Without GPU (no BATCH)
import time
start = time.time()
sample_embeddings = []
latencies = []
start_rss = psutil.Process().memory_info().rss / (1024 * 1024)
BATCH_SIZE = 32
overall_start = time.time()
for i in range(SAMPLE_SIZE):
  t_loop_in = time.time()
  tokens = model.process_data(sample_sequences[i])
  with torch.no_grad():
    embeddings = model.get_embeddings(tokens)
    sample_embeddings.append(embeddings)
  t_loop_out = time.time()
  latencies.append(t_loop_out - t_loop_in)
  if isinstance(embeddings, torch.Tensor):
    sample_embeddings.append(embeddings)
overall_end = time.time()

Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 19.71it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 35.75it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 30.79it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 34.93it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 38.82it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 29.81it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 30.12it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 35.39it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 30.66it/s]
Getting embeddings: 100%|██████████| 1/1 [00:00<00:00, 30.94it/s]


In [32]:
# Profiling (without Batching)
overall_time = overall_end - overall_start
avg_latency = np.mean(latencies)
throughput = SAMPLE_SIZE/overall_time
end_rss = psutil.Process().memory_info().rss / (1024 * 1024)
peak_gpu = torch.cuda.max_memory_allocated() / (1024 * 1024) if torch.cuda.is_available() else 0

print(f"""
------------ Profile : BATCH_SIZE = 1 ------------
Overall time taken to inference: {overall_time:.2f} s
Avg Latency per Batch: {1000*avg_latency:.2f} ms
Throughput Processed: {throughput:.2f} samples/s
CPU RAM Usage: {end_rss - start_rss:2f} MB
GPU Memory Peak: {peak_gpu:.2f} MB
---------------------------------------------------
""")


------------ Profile : BATCH_SIZE = 1 ------------
Overall time taken to inference: 0.39 s
Avg Latency per Batch: 39.37 ms
Throughput Processed: 25.39 samples/s
CPU RAM Usage: 35.203125 MB
GPU Memory Peak: 0.00 MB
---------------------------------------------------



In [33]:
def force_move(data, device):
    # If it has a .to() method (like Tensors or HF BatchEncoding), use it
    if hasattr(data, "to"):
        return data.to(device)
    # If it's a dictionary, move each value manually
    elif isinstance(data, dict):
        return {k: force_move(v, device) for k, v in data.items()}
    return data

In [34]:
# Inference time with GPU (BATCH)
start = time.time()
sample_embeddings = []
latencies = []
start_rss = psutil.Process().memory_info().rss / (1024 * 1024)
BATCH_SIZE = 32
overall_start = time.time()
for i in range(0, SAMPLE_SIZE, BATCH_SIZE):
  t_loop_in = time.time()
  raw_tokens = model.process_data(sample_sequences[i:i + BATCH_SIZE])
  input_ids_tensor = torch.tensor(raw_tokens["input_ids"]).to(DEVICE)

  with torch.no_grad():
    outputs = model.model(input_ids=input_ids_tensor)
    embeddings = outputs
  t_loop_out = time.time()
  latencies.append(t_loop_out - t_loop_in)
  if isinstance(embeddings, torch.Tensor):
    sample_embeddings.append(embeddings)
overall_end = time.time()

In [35]:
raw_tokens

Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 10
})

In [36]:
sample_embeddings

[tensor([[[ 0.2221, -1.1488, -0.3061,  ...,  0.1818,  0.3753,  0.3343],
          [ 0.4377, -2.3777, -0.9752,  ..., -0.0337, -0.5294, -0.2220],
          [ 0.3123, -2.7216, -1.0941,  ..., -1.2870, -0.4325, -0.4745],
          ...,
          [ 0.6696, -1.9989, -1.2394,  ..., -0.6132, -0.6580, -0.2012],
          [ 0.7257, -2.2866, -1.3089,  ..., -1.1637, -0.5974, -0.5252],
          [ 0.7874, -1.5952, -1.1560,  ..., -0.6187, -1.1269,  0.5691]],
 
         [[ 0.2221, -1.1488, -0.3061,  ...,  0.1818,  0.3753,  0.3343],
          [ 0.4377, -2.3777, -0.9752,  ..., -0.0337, -0.5294, -0.2220],
          [ 0.3821, -2.6401, -1.1393,  ..., -0.8344, -0.4520, -0.3291],
          ...,
          [ 0.4790, -2.5562, -1.5859,  ..., -0.7552, -0.7874,  0.5136],
          [ 0.4910, -2.3967, -1.1183,  ..., -0.3889, -0.7296, -0.2873],
          [ 0.5855, -2.3533,  0.0256,  ..., -0.0341, -0.3571, -0.1109]],
 
         [[ 0.2221, -1.1488, -0.3061,  ...,  0.1818,  0.3753,  0.3343],
          [ 0.2844, -2.3120,

In [37]:
type(raw_tokens['input_ids'])

list

In [38]:
import numpy as np
import psutil
# Profiling (without Batching)
overall_time = overall_end - overall_start
avg_latency = np.mean(latencies)
throughput = SAMPLE_SIZE/overall_time
end_rss = psutil.Process().memory_info().rss / (1024 * 1024)
peak_gpu = torch.cuda.max_memory_allocated() / (1024 * 1024) if torch.cuda.is_available() else 0

print(f"""
------------ Profile : BATCH_SIZE = 32 ------------
Overall time taken to inference: {overall_time:.2f}
Avg Latency per Batch: {1000*avg_latency:.2f} ms
Throughput Processed: {throughput:.2f} samples/s
CPU RAM Usage: {end_rss - start_rss:2f} MB
GPU Memory Peak: {peak_gpu:.2f} MB
---------------------------------------------------
""")


------------ Profile : BATCH_SIZE = 32 ------------
Overall time taken to inference: 0.15
Avg Latency per Batch: 149.04 ms
Throughput Processed: 67.00 samples/s
CPU RAM Usage: 123.375000 MB
GPU Memory Peak: 0.00 MB
---------------------------------------------------



# Perturbation based inferencing

In [45]:
def add_pertubations(sequence_string, num_of_pertubations):
  """adds pertubations to a sequence of nucleotides"""
  nucleotides = ["A", "G", "T", "C"]
  length = len(sequence_string)
  seq_list = list(sequence_string)

  for _ in range(num_of_pertubations):
    random_idx = np.random.randint(0, length - 1)

    original_nucleotide = seq_list[random_idx]
    possible_pertubations = [n for n in nucleotides if n != original_nucleotide]
    new_nucleotide = random.choice(possible_pertubations)

    # apply the pertubation to mutate
    seq_list[random_idx] = new_nucleotide

  # return perturbed sequence
  return "".join(seq_list)


In [46]:
PERTURBATION_PER_SEQUENCE = 10
preturbed_sequences = []

In [47]:
for sequence in sample_sequences:
    preturbed_sequences.append(add_pertubations(sequence, num_of_pertubations=1))

In [48]:
# for p in preturbed_sequences[1]:
#     print(p)
len(preturbed_sequences)

10

In [54]:
# Inference time with GPU (BATCH)
start = time.time()
perturbed_embeddings = []
latencies = []
start_rss = psutil.Process().memory_info().rss / (1024 * 1024)
BATCH_SIZE = 32
overall_start = time.time()
for i in range(0, SAMPLE_SIZE, BATCH_SIZE):
  t_loop_in = time.time()
  raw_tokens = model.process_data(sample_sequences[i:i + BATCH_SIZE])
  input_ids_tensor = torch.tensor(raw_tokens["input_ids"]).to(DEVICE)

  with torch.no_grad():
    outputs = model.model(input_ids=input_ids_tensor)
    embeddings = outputs
  t_loop_out = time.time()
  latencies.append(t_loop_out - t_loop_in)
  if isinstance(embeddings, torch.Tensor):
    perturbed_embeddings.append(embeddings)
overall_end = time.time()

In [None]:
perturbed_embeddings

1

In [58]:
import numpy as np
import psutil
# Profiling (without Batching)
overall_time = overall_end - overall_start
avg_latency = np.mean(latencies)
throughput = SAMPLE_SIZE/overall_time
end_rss = psutil.Process().memory_info().rss / (1024 * 1024)
peak_gpu = torch.cuda.max_memory_allocated() / (1024 * 1024) if torch.cuda.is_available() else 0

print(f"""
------------ Profile : BATCH_SIZE = 32 ------------
Number of Samples processed: {len(preturbed_sequences)}
Overall time taken to inference: {overall_time:.2f}
Avg Latency per Batch: {1000*avg_latency:.2f} ms
Throughput Processed: {throughput:.2f} samples/s
CPU RAM Usage: {end_rss - start_rss:2f} MB
GPU Memory Peak: {peak_gpu:.2f} MB
---------------------------------------------------
""")


------------ Profile : BATCH_SIZE = 32 ------------
Number of Samples processed: 10
Overall time taken to inference: 0.14
Avg Latency per Batch: 141.59 ms
Throughput Processed: 70.46 samples/s
CPU RAM Usage: 83.406250 MB
GPU Memory Peak: 0.00 MB
---------------------------------------------------

