In [1]:
import torch
import transformers
import random
import numpy as np
def create_fixed_short_dataset(tokenizer, num_samples=8192):
    # Fixed short context - all sequences 512 tokens
    tokens = torch.randint(100, 16000, (num_samples, 512), dtype=torch.long)
    mask = torch.ones(num_samples, 512, dtype=torch.long)
    return {
        'input_ids': tokens.long(),
        'attention_mask': mask.long()
    }

def create_fixed_long_dataset(tokenizer, num_samples=8192):
    # Fixed long context - all sequences 8192 tokens
    tokens = torch.randint(100, 16000, (num_samples, 8192), dtype=torch.long)
    mask = torch.ones(num_samples, 8192, dtype=torch.long)
    return {
        'input_ids': tokens.long(),
        'attention_mask': mask.long()
    }

def create_variable_short_dataset(tokenizer, num_samples=8192):
    # Variable short context - normal dist around 256 tokens
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    np.random.seed(42)
    random.seed(42)
    lengths = torch.normal(mean=256, std=64, size=(num_samples,)).int().clamp(32, 512)
    tokens_list = []
    masks_list = []
    for length in lengths:
        # Generate random tokens of the specified length
        tokens = torch.randint(100, 16000, (length.item(),))
        # Create attention mask of 1s for the actual tokens
        mask = torch.ones(length.item())
        # Pad both tokens and mask to max length
        padded_tokens = torch.full((512,), tokenizer.pad_token_id, dtype=torch.long)
        padded_mask = torch.zeros(512, dtype=torch.long)
        padded_tokens[:length] = tokens
        padded_mask[:length] = mask
        tokens_list.append(padded_tokens)
        masks_list.append(padded_mask)
    
    tokens = torch.stack(tokens_list)
    masks = torch.stack(masks_list)
    
    return {
        'input_ids': tokens.long(),
        'attention_mask': masks.long()
    }

def create_variable_long_dataset(tokenizer, num_samples=8192):
    # Variable long context - normal dist around 4096 tokens
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    np.random.seed(42)
    random.seed(42)
    lengths = torch.normal(mean=4096, std=1024, size=(num_samples,)).int().clamp(32, 8192)
    tokens_list = []
    masks_list = []
    for length in lengths:
        # Generate random tokens of the specified length
        tokens = torch.randint(100, 16000, (length.item(),))
        # Create attention mask of 1s for the actual tokens
        mask = torch.ones(length.item())
        # Pad both tokens and mask to max length
        padded_tokens = torch.full((int(8192),), tokenizer.pad_token_id, dtype=torch.long)
        padded_mask = torch.zeros(int(8192), dtype=torch.long)
        padded_tokens[:length] = tokens
        padded_mask[:length] = mask
        tokens_list.append(padded_tokens)
        masks_list.append(padded_mask)
    
    tokens = torch.stack(tokens_list)
    masks = torch.stack(masks_list)
    
    return {
        'input_ids': tokens.long(),
        'attention_mask': masks.long()
    }

# Create all datasets
def create_all_datasets(tokenizer, num_samples=8192):
    datasets = {
        'fixed_short': create_fixed_short_dataset(tokenizer, num_samples),
        'fixed_long': create_fixed_long_dataset(tokenizer, num_samples), 
        'variable_short': create_variable_short_dataset(tokenizer, num_samples),
        'variable_long': create_variable_long_dataset(tokenizer, num_samples)
    }
    return datasets

In [2]:
model = transformers.AutoModel.from_pretrained("roberta-large", trust_remote_code=True).to('cuda:2')
tokenizer = transformers.AutoTokenizer.from_pretrained("roberta-large")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
gte_datasets = create_all_datasets(tokenizer, 4096)

In [4]:
bsize = 462
fixed_short_batch = {
    'input_ids': gte_datasets['fixed_short']['input_ids'][:bsize].to('cuda:2'),
    'attention_mask': gte_datasets['fixed_short']['attention_mask'][:bsize].to('cuda:2')
}


In [5]:
with torch.inference_mode():
    outputs = model(**fixed_short_batch)

OutOfMemoryError: CUDA out of memory. Tried to allocate 924.00 MiB. GPU 2 has a total capacity of 23.43 GiB of which 908.06 MiB is free. Including non-PyTorch memory, this process has 22.54 GiB memory in use. Of the allocated memory 21.19 GiB is allocated by PyTorch, and 925.58 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)