# BioNeMo Container Interactive Access

This notebook demonstrates how to interact with the BioNeMo container from a Jupyter notebook running on a SageMaker notebook instance.

## Setup - Helper Functions

In [12]:
import subprocess
import tempfile
import os

CONTAINER_NAME = "bionemo"

def run_in_container(command):
    """Run a shell command inside the BioNeMo container."""
    full_cmd = f'docker exec {CONTAINER_NAME} bash -c "{command}"'
    result = subprocess.run(full_cmd, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Error: {result.stderr}")
    return result.stdout

def run_python_in_container(python_code):
    """Run Python code inside the BioNeMo container by writing to a temp file."""
    # Write code to a temp file in shared workspace
    temp_path = '/home/ec2-user/SageMaker/.temp_script.py'
    with open(temp_path, 'w') as f:
        f.write(python_code)
    
    # Run in container
    result = run_in_container("python /workspace/.temp_script.py")
    return result

print("Helper functions loaded!")

Helper functions loaded!


In [13]:
python_code = """
import torch
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
print(f'CUDA version: {torch.version.cuda}')
print(f'Number of GPUs: {torch.cuda.device_count()}')
for i in range(torch.cuda.device_count()):
    print(f'  GPU {i}: {torch.cuda.get_device_name(i)}')
"""

print(run_python_in_container(python_code))

PyTorch version: 2.8.0a0+5228986c39.nv25.06
CUDA available: True
CUDA version: 12.9
Number of GPUs: 4
  GPU 0: NVIDIA A10G
  GPU 1: NVIDIA A10G
  GPU 2: NVIDIA A10G
  GPU 3: NVIDIA A10G



## Verify Container is Running

In [None]:
result = subprocess.run("docker ps --filter name=bionemo --format '{{.Status}}'", 
                       shell=True, capture_output=True, text=True)
if result.stdout.strip():
    print(f"✅ Container is running: {result.stdout.strip()}")
else:
    print("❌ Container is not running!")
    print("\nStart it with this command in the terminal:")
    print("""
docker run -d \\
    --name bionemo \\
    --gpus all \\
    --ipc=host \\
    --ulimit memlock=-1 \\
    --ulimit stack=67108864 \\
    -e TRITON_LIBCUDA_PATH=/usr/lib/x86_64-linux-gnu/libcuda.so.1 \\
    -v /home/ec2-user/SageMaker:/workspace \\
    <YOUR_AWS_ACCOUNT_ID>.dkr.ecr.<REGION>.amazonaws.com/<IMAGE>:<TAG> \\
    tail -f /dev/null
""")

✅ Container is running: Up 32 minutes


## Check GPU Status

In [3]:
print(run_in_container("nvidia-smi"))

Wed Feb  4 02:11:51 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.211.01             Driver Version: 570.211.01     CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A10G                    On  |   00000000:00:1B.0 Off |                    0 |
|  0%   21C    P8             15W /  300W |       0MiB /  23028MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A10G                    On  |   00

## Check BioNeMo Packages

In [4]:
print("BioNeMo packages installed:")
print(run_in_container("pip list | grep -i bionemo"))

BioNeMo packages installed:
bionemo-amplify                  0.0.1
bionemo-core                     2.4.5
bionemo-esm2                     2.4
bionemo-evo2                     2.4
bionemo-example_model            0.0.0
bionemo-fw                       0.0.0
bionemo-geneformer               2.4
bionemo-llm                      2.4.5
bionemo-moco                     0.0.2.2
bionemo-noodles                  0.1.2
bionemo-scdl                     0.1.3
bionemo-scspeedtest              0.0.1
bionemo-size-aware-batching      1.0.0
bionemo-testing                  2.4.1
bionemo-webdatamodule            1.0.0



## Check PyTorch & CUDA

In [14]:
python_code = """
import torch
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
print(f'CUDA version: {torch.version.cuda}')
print(f'Number of GPUs: {torch.cuda.device_count()}')
for i in range(torch.cuda.device_count()):
    print(f'  GPU {i}: {torch.cuda.get_device_name(i)}')
"""

print(run_python_in_container(python_code))

PyTorch version: 2.8.0a0+5228986c39.nv25.06
CUDA available: True
CUDA version: 12.9
Number of GPUs: 4
  GPU 0: NVIDIA A10G
  GPU 1: NVIDIA A10G
  GPU 2: NVIDIA A10G
  GPU 3: NVIDIA A10G



## List Available Evo2 Model Sizes

In [15]:
python_code = """
import logging
logging.getLogger('nemo.utils.import_utils').setLevel(logging.ERROR)

from bionemo.evo2.models.mamba import MAMBA_MODEL_OPTIONS

print('Available Evo2 Mamba model sizes:')
for name in MAMBA_MODEL_OPTIONS.keys():
    print(f'  - {name}')
"""

print(run_python_in_container(python_code))

Available Evo2 Mamba model sizes:
  - hybrid_mamba_8b



## Evo2 Tokenization Example

In [17]:
python_code = """
import logging
logging.getLogger('nemo.utils.import_utils').setLevel(logging.ERROR)

# This is how train_evo2 imports it
from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer

tokenizer = get_nmt_tokenizer('byte-level')

sequences = [
    'ATCGATCGATCG',
    'GCTAGCTAGCTA', 
    'AAACCCGGGTTTT',
]

print('Tokenizing DNA sequences:')
for seq in sequences:
    tokens = tokenizer.text_to_ids(seq)
    decoded = tokenizer.ids_to_text(tokens)
    print(f'  {seq} -> {tokens} -> {decoded}')
"""
print(run_python_in_container(python_code))

[NeMo I 2026-02-04 02:18:08 nemo_logging:393] Using byte-level tokenization
Tokenizing DNA sequences:
  ATCGATCGATCG -> [65, 84, 67, 71, 65, 84, 67, 71, 65, 84, 67, 71] -> ATCGATCGATCG
  GCTAGCTAGCTA -> [71, 67, 84, 65, 71, 67, 84, 65, 71, 67, 84, 65] -> GCTAGCTAGCTA
  AAACCCGGGTTTT -> [65, 65, 65, 67, 67, 67, 71, 71, 71, 84, 84, 84, 84] -> AAACCCGGGTTTT



## Helper Class for Easy Access

In [18]:
class BioNeMoContainer:
    """Helper class for interacting with the BioNeMo container."""
    
    def __init__(self, container_name="bionemo"):
        self.container_name = container_name
    
    def run(self, command):
        """Run a shell command."""
        return run_in_container(command)
    
    def python(self, code):
        """Run Python code."""
        return run_python_in_container(code)
    
    def script(self, path):
        """Run a Python script (use /workspace path for files in SageMaker dir)."""
        return run_python_script_in_container(path)
    
    def gpu_memory(self):
        """Get GPU memory usage."""
        output = self.run("nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits")
        lines = output.strip().split('\n')
        for i, line in enumerate(lines):
            used, total = line.split(', ')
            print(f"GPU {i}: {used}MB / {total}MB")
    
    def list_files(self, path="/workspace"):
        """List files in a directory."""
        return self.run(f"ls -la {path}")

# Create instance
bionemo = BioNeMoContainer()

print("BioNeMoContainer helper created!")
print("\nUsage:")
print('  bionemo.run("nvidia-smi")')
print('  bionemo.python("import torch; print(torch.cuda.is_available())")')
print('  bionemo.gpu_memory()')
print('  bionemo.list_files("/workspace")')
print('  bionemo.script("/workspace/my_script.py")')

BioNeMoContainer helper created!

Usage:
  bionemo.run("nvidia-smi")
  bionemo.python("import torch; print(torch.cuda.is_available())")
  bionemo.gpu_memory()
  bionemo.list_files("/workspace")
  bionemo.script("/workspace/my_script.py")


## Check GPU Memory Usage

In [19]:
bionemo.gpu_memory()

GPU 0: 0MB / 23028MB
GPU 1: 0MB / 23028MB
GPU 2: 0MB / 23028MB
GPU 3: 0MB / 23028MB


## List Workspace Files

In [20]:
print(bionemo.list_files("/workspace"))

total 96
drwxr-xr-x 10 ubuntu ubuntu  4096 Feb  4 02:17 .
drwxr-xr-x  1 root   root      84 Feb  4 01:39 ..
drwx------  4 ubuntu ubuntu  4096 Feb  4 01:04 .Trash-1000
drwxrwxr-x  2 ubuntu ubuntu  4096 Feb  4 02:11 .ipynb_checkpoints
drwxr-xr-x  2 ubuntu ubuntu  4096 Feb  4 00:15 .sparkmagic
-rw-rw-r--  1 ubuntu ubuntu   517 Feb  4 02:17 .temp_script.py
drwxrwxr-x  2 ubuntu ubuntu  4096 Feb  4 02:11 .virtual_documents
-rw-rw-r--  1 ubuntu ubuntu  4398 Feb  4 01:37 README.md
drwxr-xr-x  3 root   root    4096 Feb  4 01:40 bionemo2
-rw-rw-r--  1 ubuntu ubuntu 17246 Feb  4 02:17 bionemo_interactive.ipynb
drwxr-xr-x  3 root   root    4096 Feb  4 01:40 evo2_test
-rw-rw-r--  1 ubuntu ubuntu  1856 Feb  4 01:58 launch_training.py
drwx------  2 root   root   16384 Feb  4 00:15 lost+found
drwxrwxr-x  3 ubuntu ubuntu  4096 Feb  4 01:38 src
-rw-rw-r--  1 ubuntu ubuntu   253 Feb  4 02:13 temp_script.py
-rw-rw-r--  1 ubuntu ubuntu  1196 Feb  4 01:37 test_cli.sh
-rw-rw-r--  1 ubuntu ubuntu  1973 Feb  4

## Run Custom Python Script

Any Python file you save in `/home/ec2-user/SageMaker/` is accessible in the container at `/workspace/`

In [21]:
# Create a script
script = '''
import logging
logging.getLogger('nemo.utils.import_utils').setLevel(logging.ERROR)

import torch
print(f"Hello from BioNeMo container!")
print(f"GPUs available: {torch.cuda.device_count()}")
'''

with open('/home/ec2-user/SageMaker/hello_bionemo.py', 'w') as f:
    f.write(script)

print("Script created. Running in container:")
print(bionemo.script("/workspace/hello_bionemo.py"))

Script created. Running in container:
Hello from BioNeMo container!
GPUs available: 4



## Run Evo2 Training

In [23]:
!docker exec bionemo train_evo2 \
     --mock-data \
     --model-size test \
     --num-nodes 1 \
     --devices 1 \
     --seq-length 128 \
     --micro-batch-size 1 \
     --global-batch-size 1 \
     --max-steps 5 \
     --result-dir /workspace/evo2_notebook_test \
     --experiment-name notebook_test \
     --disable-checkpointing \
     --limit-val-batches 1

Import of quick_gelu from megatron.core.fusions.fused_bias_geglu failed with: Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/nemo/utils/import_utils.py", line 319, in safe_import_from
    return getattr(imported_module, symbol), True
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: module 'megatron.core.fusions.fused_bias_geglu' has no attribute 'quick_gelu'

INFO:nemo.utils.import_utils:Import of quick_gelu from megatron.core.fusions.fused_bias_geglu failed with: Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/nemo/utils/import_utils.py", line 319, in safe_import_from
    return getattr(imported_module, symbol), True
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: module 'megatron.core.fusions.fused_bias_geglu' has no attribute 'quick_gelu'

[NeMo I 2026-02-04 02:19:42 nemo_logging:393] Using byte-level tokenization
[NeMo W 2026-02-04 02:19:42 nemo_logging:405] WandB is currently turned off.
