In [1]:
import torch
import transformers
import random
import numpy as np
def create_fixed_short_dataset(tokenizer, num_samples=8192):
    # Fixed short context - all sequences 512 tokens
    tokens = torch.randint(100, 16000, (num_samples, 512), dtype=torch.long)
    mask = torch.ones(num_samples, 512, dtype=torch.long)
    return {
        'input_ids': tokens.long(),
        'attention_mask': mask.long()
    }

def create_fixed_long_dataset(tokenizer, num_samples=8192):
    # Fixed long context - all sequences 8192 tokens
    tokens = torch.randint(100, 16000, (num_samples, 8192), dtype=torch.long)
    mask = torch.ones(num_samples, 8192, dtype=torch.long)
    return {
        'input_ids': tokens.long(),
        'attention_mask': mask.long()
    }

def create_variable_short_dataset(tokenizer, num_samples=8192):
    # Variable short context - normal dist around 256 tokens
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    np.random.seed(42)
    random.seed(42)
    lengths = torch.normal(mean=256, std=64, size=(num_samples,)).int().clamp(32, 512)
    tokens_list = []
    masks_list = []
    for length in lengths:
        # Generate random tokens of the specified length
        tokens = torch.randint(100, 16000, (length.item(),))
        # Create attention mask of 1s for the actual tokens
        mask = torch.ones(length.item())
        # Pad both tokens and mask to max length
        padded_tokens = torch.full((512,), tokenizer.pad_token_id, dtype=torch.long)
        padded_mask = torch.zeros(512, dtype=torch.long)
        padded_tokens[:length] = tokens
        padded_mask[:length] = mask
        tokens_list.append(padded_tokens)
        masks_list.append(padded_mask)
    
    tokens = torch.stack(tokens_list)
    masks = torch.stack(masks_list)
    
    return {
        'input_ids': tokens.long(),
        'attention_mask': masks.long()
    }

def create_variable_long_dataset(tokenizer, num_samples=8192):
    # Variable long context - normal dist around 4096 tokens
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    np.random.seed(42)
    random.seed(42)
    lengths = torch.normal(mean=4096, std=1024, size=(num_samples,)).int().clamp(32, 8192)
    tokens_list = []
    masks_list = []
    for length in lengths:
        # Generate random tokens of the specified length
        tokens = torch.randint(100, 16000, (length.item(),))
        # Create attention mask of 1s for the actual tokens
        mask = torch.ones(length.item())
        # Pad both tokens and mask to max length
        padded_tokens = torch.full((int(8192),), tokenizer.pad_token_id, dtype=torch.long)
        padded_mask = torch.zeros(int(8192), dtype=torch.long)
        padded_tokens[:length] = tokens
        padded_mask[:length] = mask
        tokens_list.append(padded_tokens)
        masks_list.append(padded_mask)
    
    tokens = torch.stack(tokens_list)
    masks = torch.stack(masks_list)
    
    return {
        'input_ids': tokens.long(),
        'attention_mask': masks.long()
    }

# Create all datasets
def create_all_datasets(tokenizer, num_samples=8192):
    datasets = {
        'fixed_short': create_fixed_short_dataset(tokenizer, num_samples),
        'variable_short': create_variable_short_dataset(tokenizer, num_samples),
        'fixed_long': create_fixed_long_dataset(tokenizer, num_samples), 
        'variable_long': create_variable_long_dataset(tokenizer, num_samples)
    }
    return datasets

In [2]:
import transformers
model = transformers.AutoModel.from_pretrained("nomic-ai/nomic-bert-2048", trust_remote_code=True).to('cuda:0')
tokenizer = transformers.AutoTokenizer.from_pretrained("nomic-ai/nomic-bert-2048")
datasets = create_all_datasets(tokenizer, 8192)

  state_dict = loader(resolved_archive_file)
<All keys matched successfully>


In [3]:
import time

# Speed tests all ran at standardised batch sizes for the notebook -- impact on speed at 512 is not significant.
batch_size = 512
n_iters = 10
times = []

with torch.inference_mode():
    for dataset_name, dataset in datasets.items():
        batch_size = 512
        print(f"\nTesting {dataset_name}...")
        batch_times = []

        if 'long' in dataset_name:
            batch_size = 32
        
        # Create DataLoader
        dataloader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(
                dataset['input_ids'].to('cuda:0'),
                dataset['attention_mask'].to('cuda:0')
            ),
            batch_size=batch_size,
            shuffle=False
        )
        
        # Warmup
        for batch in dataloader:
            model(input_ids=batch[0], attention_mask=batch[1])
            break
            
        torch.cuda.synchronize()
        
        # Timing runs
        for i in range(n_iters):
            start = time.perf_counter()
            for batch in dataloader:
                model(input_ids=batch[0], attention_mask=batch[1])
            torch.cuda.synchronize()
            end = time.perf_counter()
            batch_times.append(end - start)
            
        mean_time = np.mean(batch_times)
        std_time = np.std(batch_times)
        print(f"{dataset_name} -> {mean_time:.2f} ± {std_time:.2f} sec (batch_size: {batch_size})")
        times.append((dataset_name, mean_time, std_time))

    print("\nProcessing Time Summary:")
    print("-" * 50 + "\n")
    print("bert-base-uncased Model:")
    for name, mean, std in times:
        print(f"{name}: {mean:.2f} ± {std:.2f} seconds (batch_size: {batch_size})")




Testing fixed_short...


fixed_short -> 35.83 ± 0.00 sec (batch_size: 512)

Testing variable_short...
variable_short -> 35.83 ± 0.01 sec (batch_size: 512)

Testing fixed_long...
fixed_long -> 1458.79 ± 0.52 sec (batch_size: 32)

Testing variable_long...
variable_long -> 1455.46 ± 0.31 sec (batch_size: 32)

Processing Time Summary:
--------------------------------------------------

bert-base-uncased Model:
fixed_short: 35.83 ± 0.00 seconds (batch_size: 32)
variable_short: 35.83 ± 0.01 seconds (batch_size: 32)
fixed_long: 1458.79 ± 0.52 seconds (batch_size: 32)
variable_long: 1455.46 ± 0.31 seconds (batch_size: 32)


In [4]:
# Save timing results to file
with open('runtime.txt', 'w') as f:
    for name, mean, std in times:
        f.write(f"{name}: {mean:.2f} ± {std:.2f} seconds (batch_size: {batch_size})\n")
