In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

from basin_volume import VolumeConfig, VolumeEstimator

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from basin_volume import ImplicitParamVector, ImplicitRandomVector, ImplicitVector
from basin_volume import print_gpu_memory

In [5]:
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
model.cuda()
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
tokenizer.pad_token_id = 1  # pythia-specific
tokenizer.eos_token_id = 0  # pythia-specific
dataset = load_dataset("EleutherAI/lambada_openai", name="en", split="test", trust_remote_code=True)


The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`attribute of the `GPTNeoXAttention` class! It will be removed in v4.48


In [6]:

cfg = VolumeConfig(model=model, 
                   tokenizer=tokenizer, 
                   dataset=dataset, 
                   text_key="text",  # must match dataset field
                   n_samples=10,  # number of MC samples
                   cutoff=1e-2,  # KL-divergence cutoff (nats)
                   max_seq_len=8,  # sequence length for chunking dataset
                   val_size=10,  # number of sequences or chunks to use in estimation
                   data_batch_size=1,
                   cache_mode=None,
                   chunking=True,
                   implicit_vectors=False,
                   debug=False,
                   )
estimator = VolumeEstimator.from_config(cfg)

tokens.shape=torch.Size([52947, 8])


In [7]:
result = estimator.run()

  0%|          | 0/10 [00:00<?, ?it/s]

after randn

Current GPU memory allocated: 85.80 MB
Max GPU memory allocated: 85.80 MB
Current GPU memory reserved: 98.00 MB
Max GPU memory reserved: 98.00 MB

Current GPU memory allocated: 85.80 MB
Max GPU memory allocated: 101.80 MB
Current GPU memory reserved: 118.00 MB
Max GPU memory reserved: 118.00 MB


 10%|█         | 1/10 [00:16<02:26, 16.29s/it]


Current GPU memory allocated: 139.47 MB
Max GPU memory allocated: 139.47 MB
Current GPU memory reserved: 164.00 MB
Max GPU memory reserved: 164.00 MB
after randn

Current GPU memory allocated: 139.49 MB
Max GPU memory allocated: 139.52 MB
Current GPU memory reserved: 164.00 MB
Max GPU memory reserved: 164.00 MB

Current GPU memory allocated: 139.49 MB
Max GPU memory allocated: 155.49 MB
Current GPU memory reserved: 164.00 MB
Max GPU memory reserved: 164.00 MB


 20%|██        | 2/10 [00:32<02:09, 16.24s/it]


Current GPU memory allocated: 193.16 MB
Max GPU memory allocated: 193.16 MB
Current GPU memory reserved: 208.00 MB
Max GPU memory reserved: 208.00 MB
after randn

Current GPU memory allocated: 193.19 MB
Max GPU memory allocated: 193.22 MB
Current GPU memory reserved: 208.00 MB
Max GPU memory reserved: 208.00 MB

Current GPU memory allocated: 193.19 MB
Max GPU memory allocated: 209.19 MB
Current GPU memory reserved: 230.00 MB
Max GPU memory reserved: 230.00 MB


 30%|███       | 3/10 [00:49<01:56, 16.63s/it]


Current GPU memory allocated: 246.86 MB
Max GPU memory allocated: 246.86 MB
Current GPU memory reserved: 254.00 MB
Max GPU memory reserved: 254.00 MB
after randn

Current GPU memory allocated: 246.88 MB
Max GPU memory allocated: 246.91 MB
Current GPU memory reserved: 254.00 MB
Max GPU memory reserved: 254.00 MB

Current GPU memory allocated: 246.88 MB
Max GPU memory allocated: 262.88 MB
Current GPU memory reserved: 276.00 MB
Max GPU memory reserved: 276.00 MB


 30%|███       | 3/10 [00:57<02:13, 19.03s/it]


KeyboardInterrupt: 

In [7]:
import gc

In [10]:
def list_largest_tensors():
    # Get all tensor objects
    tensors = []
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                tensors.append(obj)
        except:
            pass
    
    # Group tensors by memory location
    memory_dict = {}
    for t in tensors:
        if t.device.type == 'cuda':
            location = t.data_ptr()
            if location not in memory_dict:
                memory_dict[location] = []
            memory_dict[location].append(t)
    
    # Calculate sizes and sort by memory usage
    tensor_sizes = []
    for location, tensor_list in memory_dict.items():
        # Take the first tensor from each memory location
        tensor = tensor_list[0]
        size_mb = tensor.nelement() * tensor.element_size() / (1024 * 1024)
        tensor_sizes.append((size_mb, tensor.size(), tensor.dtype, len(tensor_list)))
    
    # Sort by size in descending order
    tensor_sizes.sort(reverse=True)
    
    # Calculate cumulative sizes relative to largest tensor
    if tensor_sizes:
        largest_size = tensor_sizes[0][0]
        cumulative = 0
    
    # Print results
    print(f"{'Size (MB)':>10} {'Cumul.(x)':>10} {'Shape':>20} {'Type':>10} {'Aliases':>8}")
    print("-" * 60)
    for size, shape, dtype, num_tensors in tensor_sizes:
        cumulative += size
        relative_cumul = cumulative / largest_size
        print(f"{size:10.2f} {relative_cumul:10.2f} {str(shape):>20} {str(dtype):>10} {num_tensors:>8}")

In [11]:
list_largest_tensors()

 Size (MB)  Cumul.(x)                Shape       Type  Aliases
------------------------------------------------------------
     53.66       1.00 torch.Size([14067712]) torch.float32        2
     24.56       1.46 torch.Size([50304, 128]) torch.float32        1
      4.00       1.53 torch.Size([1, 1, 2048, 2048]) torch.bool        1
      4.00       1.61 torch.Size([1, 1, 2048, 2048]) torch.bool        1
      4.00       1.68 torch.Size([1, 1, 2048, 2048]) torch.bool        1
      4.00       1.76 torch.Size([1, 1, 2048, 2048]) torch.bool        1
      4.00       1.83 torch.Size([1, 1, 2048, 2048]) torch.bool        1
      4.00       1.90 torch.Size([1, 1, 2048, 2048]) torch.bool        1
      0.25       1.91 torch.Size([512, 128]) torch.float32        1
      0.25       1.91 torch.Size([512, 128]) torch.float32        1
      0.25       1.92 torch.Size([512, 128]) torch.float32        1
      0.25       1.92 torch.Size([512, 128]) torch.float32        1
      0.25       1.93 torch.