In [1]:
!pip install transformers accelerate optimum nvidia-ml-py



In [2]:
from transformers.utils import is_flash_attn_2_available
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import numpy as np
import torch.nn.functional as F
import torch
from datetime import timedelta
import time
from collections import namedtuple
import json

torch.random.manual_seed(0)

<torch._C.Generator at 0x7f94b6e4a1f0>

In [4]:
from sklearn.cluster import KMeans

from pynvml import *

def check_gpu(step):
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"{step}: GPU memory used: {info.used // 1024**2} MB.")
    
def D(obj, label=None, c=True):
    print()
    if label:
        print(label)
        
    if isinstance(obj, tuple):
        print(len(obj))
    elif isinstance(obj, torch.Tensor) or isinstance(obj, np.ndarray):
        print(obj.shape)
        if c: # Contents
            display(obj)
    else:
        if c: # Contents
            display(obj)
            
def DS(obj, label=None):
    D(obj, label, c=False)
    
    
    

class InferenceTensor:
    def __init__(self):
        self.model = AutoModelForCausalLM.from_pretrained(
            "microsoft/Phi-3-mini-4k-instruct",
            torch_dtype=torch.bfloat16,
            device_map='auto',
            trust_remote_code=True,
            use_cache=True,
            # attn_implementation='flash_attention_2',
        )
        self.tokenizer = AutoTokenizer.from_pretrained(
            "microsoft/Phi-3-mini-4k-instruct")
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.max_new_tokens = 10
        self.batch_size = 8
        
    def candidates_generator(self, top_p: float, max_beams: int, prompt: str):
        print(prompt)
        candidates, candidate_logprobs = self._init_candidates(prompt)
        for level_idx in range(self.max_new_tokens):
            logits, embeddings = self._infer(candidates[:self.max_candidates, ...], candidate_logprobs[:self.max_candidates, ...])
        
            if candidates.shape[0] > max_beams:
                candidates, candidate_parents, candidate_logprobs, logits = self._k_means(logits, embeddings, candidates, candidate_logprobs, max_beams)
                yield self._format_candidates('k_means', f"{level_idx}-k", candidates, candidate_parents, candidate_logprobs)

            candidates, candidate_parents, candidate_logprobs = self._top_p(logits, candidates, candidate_logprobs, top_p)
            yield self._format_candidates('top_p', f"{level_idx}-p", candidates, candidate_parents, candidate_logprobs)

        yield f"event: level\nid: END\ndata: []\n\n"

    def _format_candidates(self, event: str, idx: int, candidates, candidate_parents, candidate_logprobs):
        D(candidate_parents, 'candidate_parents')
        candidate_texts = self.tokenizer.batch_decode(candidates[:, -1])
        candidate_probs = candidate_logprobs.exp()
        candidate_dicts = []
        for i in range(len(candidate_texts)):
            candidate_dicts.append({'content': candidate_texts[i], 'parents': candidate_parents[i], 'prob': candidate_probs[i].item()})
        data = json.dumps(candidate_dicts)
        return f"event: {event}\nid: {idx}\ndata: {data}\n\n"
        
    def _init_candidates(self, text: str):
        prompt = "<|user|>\n{} <|end|>\n<|assistant|>".format(text)
        inputs = self.tokenizer(prompt, return_tensors='pt')
        D(inputs.input_ids, 'input_ids')
        print(self.tokenizer.batch_decode(inputs.input_ids))

        candidates = inputs.input_ids.to(self.device)
        candidate_logprobs = torch.zeros((1), dtype=torch.float32, device=self.device)

        return candidates, candidate_logprobs

    def _k_means(self, logits, embeddings, candidates, candidate_logprobs, max_beams):
        D(candidates, 'candidates')
        D(candidate_logprobs, 'candidate_logprobs')
        # === CPU ===
        embeddings_np = embeddings.float().numpy(force=True)
        D(embeddings_np, 'embeddings_np')
        k_means = KMeans(n_clusters=min(2, embeddings_np.shape[0]), random_state=0, n_init="auto")
        k_mean_space = k_means.fit_transform(embeddings_np)
        D(k_mean_space, 'k_mean_space')
        k_mean_clusters = k_means.predict(embeddings_np)
        D(k_mean_clusters, 'k_mean_clusters')
        k_mean_logprob_mass = np.bincount(k_mean_clusters, weights=candidate_logprobs.cpu())
        D(k_mean_logprob_mass, 'k_mean_logprob_mass')
        closest = np.argmin(k_mean_space, axis=0)
        D(closest, 'closest')
        # === END CPU ===
        
        closest_indices = torch.from_numpy(closest).to(self.device)
        new_candidates = candidates.index_select(0, closest_indices)
        D(new_candidates, 'new_candidates')
        new_candidate_parents = [torch.nonzero(torch.from_numpy(k_mean_clusters).to(self.device) == i).squeeze(-1).tolist() for i in range(new_candidates.shape[0])]
        D(new_candidate_parents, 'new_candidate_parents')
        new_candidate_logprobs = torch.from_numpy(k_mean_logprob_mass).to(self.device)
        D(new_candidate_logprobs, 'new_candidate_logprobs')
        new_candidate_logits = logits.index_select(0, closest_indices)
        
        return new_candidates, new_candidate_parents, new_candidate_logprobs, new_candidate_logits
        
    def _top_p(self, logits, candidates, candidate_logprobs, top_p):
        D(candidates, 'candidates')
        D(candidate_logprobs, 'candidate_logprobs')
        
        last_tok_logits = logits[:, -1, :]
        D(last_tok_logits, 'last_tok_logits')

        sorted_logits, sorted_indices = torch.sort(last_tok_logits, descending=True, dim=-1)
        DS(sorted_logits, 'sorted_logits')
        DS(sorted_indices, 'sorted_indices')
        sorted_probs = F.softmax(sorted_logits, dim=-1)
        D(sorted_probs, 'sorted_probs')
        display(sorted_probs.sum(dim=1))
        cum_probs = torch.cumsum(sorted_probs, dim=-1)
        D(cum_probs, 'cum_probs')

        # Create tensor of bools indicating which indices are cumulatively less than top_p
        keep_indices = cum_probs < top_p

        # Keep the last element that went over top_p
        keep_indices[:, 1:] = keep_indices[:, :-1].clone() # Is this inefficient?
        keep_indices[:, 0] = 1  # Always keep the first element
        D(keep_indices, 'keep_indices')

        new_candidate_parents = keep_indices.nonzero()[:, 0]
        D(new_candidate_parents, 'new_candidate_parents')

        # OPTIM: Potential optimization -- have a fixed tensor of size (max_candidates, max_tokens) and copy this into that (batch-aware).
        # OPTIM: consider which of these operations can be done in-place to prevent new allocations?
        carryover_candidates = candidates.index_select(0, new_candidate_parents)
        D(carryover_candidates, 'carryover_candidates')

        # Similar code could be used to trace entire origin of sequence. For now since server just traces parent of the preceding generation, not needed
        # carryover_candidate_parents = candidate_parents.index_select(0, carryover_candidate_indices)  # Not strictly necessary since 1d
        # D(carryover_candidate_parents, 'carryover_candidate_parents')

        carryover_candidate_logprobs = candidate_logprobs.index_select(0, new_candidate_parents)  # Not strictly necessary since 1d
        D(carryover_candidate_logprobs, 'carryover_candidate_logprobs')

        new_candidate_toks = sorted_indices[keep_indices].unsqueeze(1)
        D(new_candidate_toks, 'new_candidate_toks')
        new_candidate_tok_logprobs = sorted_probs[keep_indices].log()
        D(new_candidate_tok_logprobs, 'new_candidate_tok_logprobs')

        new_candidates = torch.cat([carryover_candidates, new_candidate_toks], dim=1)
        D(new_candidates, 'new_candidates')
        new_candidate_logprobs = carryover_candidate_logprobs.add_(new_candidate_tok_logprobs)
        D(new_candidate_logprobs, 'new_candidate_logprobs')

        return new_candidates, new_candidate_parents.unsqueeze(-1).tolist(), new_candidate_logprobs


    def _infer(self, candidates, candidate_logprobs):
        with torch.inference_mode():
            num_batches = (candidates.shape[0] + self.batch_size - 1) // self.batch_size  # Round up to nearest whole number of batches
            print('\nnum_batches', num_batches)

            new_candidates_list = []
            new_candidate_parents_list = []
            new_candidate_logprobs_list = []

            check_gpu('infer start')
            output_logits_list = []
            output_embeddings_list = []
            for i in range(0, num_batches, 1):
                batch_candidates = candidates[i * self.batch_size:(i + 1) * self.batch_size]
                DS(batch_candidates, 'batch_candidates')
                batch_candidate_logprobs = candidate_logprobs[i * self.batch_size:(i + 1) * self.batch_size]
                DS(batch_candidate_logprobs, 'batch_candidate_logprobs')

                batch_outputs = self.model(input_ids=batch_candidates, output_hidden_states=True)
                DS(batch_outputs.logits, 'batch_logits')
                DS(batch_outputs.hidden_states[-1], 'hidden_states[-1]')

                output_logits_list.append(batch_outputs.logits)
                output_embeddings_list.append(batch_outputs.hidden_states[-1][:,-1,:])
                check_gpu('infer - after batch run')

            output_logits = torch.cat(output_logits_list, dim=0)
            output_embeddings = torch.cat(output_embeddings_list, dim=0)
            
            return output_logits, output_embeddings

it = InferenceTensor()

for x in it.candidates_generator(0.9, 2, 'What is the highest mountain?'):
    print(x)
    print()
    print('====================================')
    print()

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


What is the highest mountain?

input_ids
torch.Size([1, 11])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001]])

['<s><|user|> What is the highest mountain? <|end|><|assistant|>']

num_batches 1
infer start: GPU memory used: 15175 MB.

batch_candidates
torch.Size([1, 11])

batch_candidate_logprobs
torch.Size([1])

batch_logits
torch.Size([1, 11, 32064])

hidden_states[-1]
torch.Size([1, 11, 3072])
infer - after batch run: GPU memory used: 15179 MB.

candidates
torch.Size([1, 11])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001]], device='cuda:0')


candidate_logprobs
torch.Size([1])


tensor([0.], device='cuda:0')


last_tok_logits
torch.Size([1, 32064])


tensor([[ 5.7812, -1.4688, -3.2969,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([1, 32064])

sorted_indices
torch.Size([1, 32064])

sorted_probs
torch.Size([1, 32064])


tensor([[9.2405e-01, 7.5850e-02, 8.8812e-05,  ..., 1.1622e-21, 7.0490e-22,
         3.7730e-22]], device='cuda:0')

tensor([1.0000], device='cuda:0')


cum_probs
torch.Size([1, 32064])


tensor([[0.9240, 0.9999, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([1, 32064])


tensor([[ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([1])


tensor([0], device='cuda:0')


carryover_candidates
torch.Size([1, 11])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([1])


tensor([0.], device='cuda:0')


new_candidate_toks
torch.Size([1, 1])


tensor([[450]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([1])


tensor([-0.0790], device='cuda:0')


new_candidates
torch.Size([1, 12])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450]], device='cuda:0')


new_candidate_logprobs
torch.Size([1])


tensor([-0.0790], device='cuda:0')


candidate_parents


[[0]]

event: top_p
id: 0-p
data: [{"content": "The", "parents": [0], "prob": 0.924048125743866}]





num_batches 1
infer start: GPU memory used: 15179 MB.

batch_candidates
torch.Size([1, 12])

batch_candidate_logprobs
torch.Size([1])

batch_logits
torch.Size([1, 12, 32064])

hidden_states[-1]
torch.Size([1, 12, 3072])
infer - after batch run: GPU memory used: 15179 MB.

candidates
torch.Size([1, 12])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450]], device='cuda:0')


candidate_logprobs
torch.Size([1])


tensor([-0.0790], device='cuda:0')


last_tok_logits
torch.Size([1, 32064])


tensor([[-1.2969,  1.0312, -5.0938,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([1, 32064])

sorted_indices
torch.Size([1, 32064])

sorted_probs
torch.Size([1, 32064])


tensor([[9.9909e-01, 9.1105e-04, 1.2088e-06,  ..., 6.5938e-24, 3.9993e-24,
         1.4713e-24]], device='cuda:0')

tensor([1.0000], device='cuda:0')


cum_probs
torch.Size([1, 32064])


tensor([[0.9991, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([1, 32064])


tensor([[ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([1])


tensor([0], device='cuda:0')


carryover_candidates
torch.Size([1, 12])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([1])


tensor([-0.0790], device='cuda:0')


new_candidate_toks
torch.Size([1, 1])


tensor([[9939]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([1])


tensor([-0.0009], device='cuda:0')


new_candidates
torch.Size([1, 13])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939]], device='cuda:0')


new_candidate_logprobs
torch.Size([1])


tensor([-0.0799], device='cuda:0')


candidate_parents


[[0]]

event: top_p
id: 1-p
data: [{"content": "highest", "parents": [0], "prob": 0.9232035875320435}]





num_batches 1
infer start: GPU memory used: 15179 MB.

batch_candidates
torch.Size([1, 13])

batch_candidate_logprobs
torch.Size([1])

batch_logits
torch.Size([1, 13, 32064])

hidden_states[-1]
torch.Size([1, 13, 3072])
infer - after batch run: GPU memory used: 15181 MB.

candidates
torch.Size([1, 13])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939]], device='cuda:0')


candidate_logprobs
torch.Size([1])


tensor([-0.0799], device='cuda:0')


last_tok_logits
torch.Size([1, 32064])


tensor([[ 2.8906,  3.1875, -7.0938,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([1, 32064])

sorted_indices
torch.Size([1, 32064])

sorted_probs
torch.Size([1, 32064])


tensor([[9.9984e-01, 1.2339e-04, 3.5352e-05,  ..., 5.1391e-24, 4.5352e-24,
         3.5320e-24]], device='cuda:0')

tensor([1.], device='cuda:0')


cum_probs
torch.Size([1, 32064])


tensor([[0.9998, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([1, 32064])


tensor([[ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([1])


tensor([0], device='cuda:0')


carryover_candidates
torch.Size([1, 13])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([1])


tensor([-0.0799], device='cuda:0')


new_candidate_toks
torch.Size([1, 1])


tensor([[14378]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([1])


tensor([-0.0002], device='cuda:0')


new_candidates
torch.Size([1, 14])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378]], device='cuda:0')


new_candidate_logprobs
torch.Size([1])


tensor([-0.0801], device='cuda:0')


candidate_parents


[[0]]

event: top_p
id: 2-p
data: [{"content": "mountain", "parents": [0], "prob": 0.9230522513389587}]





num_batches 1
infer start: GPU memory used: 15181 MB.

batch_candidates
torch.Size([1, 14])

batch_candidate_logprobs
torch.Size([1])

batch_logits
torch.Size([1, 14, 32064])

hidden_states[-1]
torch.Size([1, 14, 3072])
infer - after batch run: GPU memory used: 15181 MB.

candidates
torch.Size([1, 14])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378]], device='cuda:0')


candidate_logprobs
torch.Size([1])


tensor([-0.0801], device='cuda:0')


last_tok_logits
torch.Size([1, 32064])


tensor([[ 4.8125,  1.2734, -5.8750,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([1, 32064])

sorted_indices
torch.Size([1, 32064])

sorted_probs
torch.Size([1, 32064])


tensor([[8.1600e-01, 9.7458e-02, 8.6006e-02,  ..., 5.2119e-21, 4.0591e-21,
         1.6921e-21]], device='cuda:0')

tensor([1.0000], device='cuda:0')


cum_probs
torch.Size([1, 32064])


tensor([[0.8160, 0.9135, 0.9995,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([1, 32064])


tensor([[ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 0], device='cuda:0')


carryover_candidates
torch.Size([2, 14])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-0.0801, -0.0801], device='cuda:0')


new_candidate_toks
torch.Size([2, 1])


tensor([[373],
        [297]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.2033, -2.3283], device='cuda:0')


new_candidates
torch.Size([2, 15])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-0.2834, -2.4084], device='cuda:0')


candidate_parents


[[0], [0]]

event: top_p
id: 3-p
data: [{"content": "on", "parents": [0], "prob": 0.7532129883766174}, {"content": "in", "parents": [0], "prob": 0.08995844423770905}]





num_batches 1
infer start: GPU memory used: 15201 MB.

batch_candidates
torch.Size([2, 15])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 15, 32064])

hidden_states[-1]
torch.Size([2, 15, 3072])
infer - after batch run: GPU memory used: 15219 MB.

candidates
torch.Size([2, 15])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-0.2834, -2.4084], device='cuda:0')


last_tok_logits
torch.Size([2, 32064])


tensor([[ 1.6797,  0.8750, -4.7812,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.3438, -5.4688, -4.6875,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9996e-01, 3.5356e-05, 1.7603e-06,  ..., 3.5325e-24, 2.4278e-24,
         3.7232e-25],
        [8.1683e-01, 1.8226e-01, 5.1193e-04,  ..., 3.4021e-20, 3.4021e-20,
         7.5910e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.8168, 0.9991, 0.9996,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 15])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([3])


tensor([-0.2834, -2.4084, -2.4084], device='cuda:0')


new_candidate_toks
torch.Size([3, 1])


tensor([[11563],
        [ 4958],
        [  278]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-3.7432e-05, -2.0232e-01, -1.7023e+00], device='cuda:0')


new_candidates
torch.Size([3, 16])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,   278]], device='cuda:0')


new_candidate_logprobs
torch.Size([3])


tensor([-0.2834, -2.6107, -4.1107], device='cuda:0')


candidate_parents


[[0], [1], [1]]

event: top_p
id: 4-p
data: [{"content": "Earth", "parents": [0], "prob": 0.7531847953796387}, {"content": "terms", "parents": [1], "prob": 0.0734809935092926}, {"content": "the", "parents": [1], "prob": 0.016395829617977142}]





num_batches 1
infer start: GPU memory used: 15219 MB.

batch_candidates
torch.Size([3, 16])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 16, 32064])

hidden_states[-1]
torch.Size([3, 16, 3072])
infer - after batch run: GPU memory used: 15235 MB.

candidates
torch.Size([3, 16])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,   278]], device='cuda:0')


candidate_logprobs
torch.Size([3])


tensor([-0.2834, -2.6107, -4.1107], device='cuda:0')


embeddings_np
(3, 3072)


array([[-1.4765625 , -0.0112915 ,  1.4921875 , ..., -0.94921875,
        -0.27734375, -1.515625  ],
       [ 0.640625  , -1.109375  ,  2.71875   , ..., -0.9140625 ,
         2.5625    , -2.1875    ],
       [ 0.09326172, -0.6796875 ,  3.390625  , ..., -0.69921875,
        -3.28125   , -1.0859375 ]], dtype=float32)


k_mean_space
(3, 2)


array([[105.234764,  50.63994 ],
       [  0.      ,  93.1218  ],
       [106.760445,  50.63994 ]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-2.6107285 , -4.39417294])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 16])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563]], device='cuda:0')


new_candidate_parents


[[1], [0, 2]]


new_candidate_logprobs
torch.Size([2])


tensor([-2.6107, -4.3942], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1], [0, 2]]

event: k_means
id: 5-k
data: [{"content": "terms", "parents": [1], "prob": 0.07348099318896636}, {"content": "Earth", "parents": [0, 2], "prob": 0.012349089582051961}]





candidates
torch.Size([2, 16])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-2.6107, -4.3942], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  1.5703,  -2.2031,  -5.8750,  ...,   0.0000,   0.0000,   0.0000],
        [ -0.9766,  -6.3438, -10.3125,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.0587e-10, 1.3177e-10,  ..., 1.3697e-25, 1.2088e-25,
         1.0668e-25],
        [9.8127e-01, 1.5861e-02, 2.1465e-03,  ..., 8.4822e-22, 5.8297e-22,
         1.6702e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9813, 0.9971, 0.9993,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 16])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-2.6107, -4.3942], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  310],
        [29892]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([ 0.0000, -0.0189], device='cuda:0')


new_candidates
torch.Size([2, 17])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563, 29892]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-2.6107, -4.4131], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 5-p
data: [{"content": "of", "parents": [0], "prob": 0.07348099318896636}, {"content": ",", "parents": [1], "prob": 0.012117801617336242}]





num_batches 1
infer start: GPU memory used: 15235 MB.

batch_candidates
torch.Size([2, 17])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 17, 32064])

hidden_states[-1]
torch.Size([2, 17, 3072])
infer - after batch run: GPU memory used: 15235 MB.

candidates
torch.Size([2, 17])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   373, 11563, 29892]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-2.6107, -4.4131], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  0.2129,  -0.4570,  -6.7500,  ...,   0.0000,   0.0000,   0.0000],
        [ -3.5469,  -2.0000, -10.3750,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[3.9407e-01, 1.8615e-01, 1.8615e-01,  ..., 1.0154e-18, 6.1589e-19,
         5.4352e-19],
        [4.0809e-01, 3.1782e-01, 2.1843e-01,  ..., 1.1798e-19, 8.6316e-20,
         7.6173e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.3941, 0.5802, 0.7664,  ..., 1.0000, 1.0000, 1.0000],
        [0.4081, 0.7259, 0.9443,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([10])


tensor([0, 0, 0, 0, 0, 0, 0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([10, 17])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310],
        [    1, 32010,  1724,   33


carryover_candidate_logprobs
torch.Size([10])


tensor([-2.6107, -2.6107, -2.6107, -2.6107, -2.6107, -2.6107, -2.6107, -4.4131,
        -4.4131, -4.4131], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([10, 1])


tensor([[11858],
        [ 3171],
        [19224],
        [ 2038],
        [  967],
        [ 2533],
        [11563],
        [  408],
        [ 2729],
        [  297]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([10])


tensor([-0.9312, -1.6812, -1.6812, -3.0562, -3.3062, -3.3062, -3.5562, -0.8963,
        -1.1463, -1.5213], device='cuda:0')


new_candidates
torch.Size([10, 18])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,  3171],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,  2038],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,   967],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,  2533],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958, 


new_candidate_logprobs
torch.Size([10])


tensor([-3.5420, -4.2920, -4.2920, -5.6670, -5.9170, -5.9170, -6.1670, -5.3094,
        -5.5594, -5.9344], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [0], [0], [0], [0], [0], [1], [1], [1]]

event: top_p
id: 6-p
data: [{"content": "elev", "parents": [0], "prob": 0.02895674790207378}, {"content": "height", "parents": [0], "prob": 0.013678199185093203}, {"content": "peak", "parents": [0], "prob": 0.013678199185093203}, {"content": "above", "parents": [0], "prob": 0.0034583899410235456}, {"content": "its", "parents": [0], "prob": 0.002693396794235407}, {"content": "sum", "parents": [0], "prob": 0.002693396794235407}, {"content": "Earth", "parents": [0], "prob": 0.0020976195324725463}, {"content": "as", "parents": [1], "prob": 0.00494511213828901}, {"content": "based", "parents": [1], "prob": 0.0038512569761225794}, {"content": "in", "parents": [1], "prob": 0.002646927945096369}]





num_batches 2
infer start: GPU memory used: 15235 MB.

batch_candidates
torch.Size([8, 18])

batch_candidate_logprobs
torch.Size([8])

batch_logits
torch.Size([8, 18, 32064])

hidden_states[-1]
torch.Size([8, 18, 3072])
infer - after batch run: GPU memory used: 15353 MB.

batch_candidates
torch.S

tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,  3171],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,  2038],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,   967],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310,  2533],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958, 


candidate_logprobs
torch.Size([10])


tensor([-3.5420, -4.2920, -4.2920, -5.6670, -5.9170, -5.9170, -6.1670, -5.3094,
        -5.5594, -5.9344], device='cuda:0', dtype=torch.float64)


embeddings_np
(10, 3072)


array([[ 1.5625    , -2.265625  ,  1.78125   , ...,  1.15625   ,
         1.265625  , -0.3515625 ],
       [-0.71484375,  0.29296875,  3.296875  , ...,  0.05078125,
         1.15625   , -0.35546875],
       [-0.75      , -1.2109375 ,  3.328125  , ...,  0.03637695,
        -1.0546875 , -0.5078125 ],
       ...,
       [-1.1328125 ,  0.54296875,  0.23242188, ..., -1.6953125 ,
         1.109375  , -5.3125    ],
       [ 0.51171875, -1.3359375 ,  0.40625   , ...,  0.41601562,
         0.46289062, -3.9375    ],
       [-2.484375  , -0.44921875,  2.4375    , ...,  0.34375   ,
        -2.703125  , -4.28125   ]], dtype=float32)


k_mean_space
(10, 2)


array([[51.670624, 84.66034 ],
       [87.51908 , 61.940083],
       [85.87011 , 58.84681 ],
       [92.760315, 71.27328 ],
       [89.12618 , 60.97973 ],
       [51.670624, 82.49791 ],
       [90.98064 , 63.01418 ],
       [97.24759 , 67.10196 ],
       [97.51835 , 69.43807 ],
       [94.846634, 62.403366]], dtype=float32)


k_mean_clusters
(10,)


array([0, 1, 1, 1, 1, 0, 1, 1, 1, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -9.45890415, -43.13782734])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 18])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224]],
       device='cuda:0')


new_candidate_parents


[[0, 5], [1, 2, 3, 4, 6, 7, 8, 9]]


new_candidate_logprobs
torch.Size([2])


tensor([ -9.4589, -43.1378], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 5], [1, 2, 3, 4, 6, 7, 8, 9]]

event: k_means
id: 7-k
data: [{"content": "elev", "parents": [0, 5], "prob": 7.799201197092837e-05}, {"content": "peak", "parents": [1, 2, 3, 4, 6, 7, 8, 9], "prob": 1.8428060336253292e-19}]





candidates
torch.Size([2, 18])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -9.4589, -43.1378], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.7148, -1.0703, -1.1406,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.1250, -1.9844, -7.7188,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9999e-01, 7.8892e-06, 1.9947e-06,  ..., 2.6103e-23, 1.5832e-23,
         6.5997e-24],
        [6.9831e-01, 2.0007e-01, 9.4507e-02,  ..., 2.5163e-22, 1.7294e-22,
         1.6086e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.6983, 0.8984, 0.9929,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 18])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([4])


tensor([ -9.4589, -43.1378, -43.1378, -43.1378], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[  362],
        [11858],
        [ 3171],
        [ 2038]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-1.0610e-05, -3.5909e-01, -1.6091e+00, -2.3591e+00], device='cuda:0')


new_candidates
torch.Size([4, 19])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  3171],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([4])


tensor([ -9.4589, -43.4969, -44.7469, -45.4969], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 7-p
data: [{"content": "ation", "parents": [0], "prob": 7.799118450478204e-05}, {"content": "elev", "parents": [1], "prob": 1.286858130179515e-19}, {"content": "height", "parents": [1], "prob": 3.686910491506643e-20}, {"content": "above", "parents": [1], "prob": 1.741572991525982e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([4, 19])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 19, 32064])

hidden_states[-1]
torch.Size([4, 19, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([4, 19])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  3171],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038]],
       device='cuda:0')


candidate_logprobs
torch.Size([4])


tensor([ -9.4589, -43.4969, -44.7469, -45.4969], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-1.2265625 , -1.7265625 ,  1.828125  , ..., -0.35742188,
         0.8828125 ,  0.328125  ],
       [ 2.125     , -2.265625  ,  2.328125  , ...,  0.8203125 ,
         0.359375  , -2.0625    ],
       [-1.0546875 , -0.04760742,  1.234375  , ..., -2.640625  ,
        -0.08251953, -0.78125   ],
       [-2.328125  , -1.921875  ,  2.46875   , ...,  1.109375  ,
         0.11474609, -1.1484375 ]], dtype=float32)


k_mean_space
(4, 2)


array([[ 42.043888, 100.05355 ],
       [ 64.13674 , 105.39813 ],
       [ 43.182697, 100.9499  ],
       [ 88.62732 ,   0.      ]], dtype=float32)


k_mean_clusters
(4,)


array([0, 0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-97.70274085, -45.49691314])


closest
(2,)


array([0, 3])


new_candidates
torch.Size([2, 19])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038]],
       device='cuda:0')


new_candidate_parents


[[0, 1, 2], [3]]


new_candidate_logprobs
torch.Size([2])


tensor([-97.7027, -45.4969], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1, 2], [3]]

event: k_means
id: 8-k
data: [{"content": "ation", "parents": [0, 1, 2], "prob": 3.700315724286182e-43}, {"content": "above", "parents": [3], "prob": 1.741572991525982e-20}]





candidates
torch.Size([2, 19])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7027, -45.4969], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.0850, -0.9180, -5.1875,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.6797, -1.0234, -9.4375,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9033e-01, 3.1520e-03, 2.1663e-03,  ..., 1.5174e-20, 1.3391e-20,
         2.3270e-21],
        [1.0000e+00, 8.3153e-07, 5.0435e-07,  ..., 1.6687e-24, 1.1469e-24,
         8.9319e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9903, 0.9935, 0.9956,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 19])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7027, -45.4969], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[2038],
        [7205]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-9.7195e-03, -2.3842e-06], device='cuda:0')


new_candidates
torch.Size([2, 20])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4969], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 8-p
data: [{"content": "above", "parents": [0], "prob": 3.66452455770134e-43}, {"content": "sea", "parents": [1], "prob": 1.7415688392922036e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 20])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 20, 32064])

hidden_states[-1]
torch.Size([2, 20, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 20])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4969], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  1.5781,  -3.1719, -11.3125,  ...,   0.0000,   0.0000,   0.0000],
        [  0.8398,  -5.3438, -11.2500,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9999e-01, 1.9947e-06, 1.5535e-06,  ..., 3.3516e-23, 2.9578e-23,
         2.3035e-23],
        [1.0000e+00, 2.6996e-07, 1.8554e-07,  ..., 4.9401e-28, 3.8473e-28,
         5.9001e-29]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 20])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4969], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[7205],
        [3233]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-5.9605e-06, -5.9605e-07], device='cuda:0')


new_candidates
torch.Size([2, 21])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4969], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 9-p
data: [{"content": "sea", "parents": [0], "prob": 3.664502715432991e-43}, {"content": "level", "parents": [1], "prob": 1.7415678012362957e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 21])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 21, 32064])

hidden_states[-1]
torch.Size([2, 21, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 21])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4969], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  1.0781,  -5.1562, -10.2500,  ...,   0.0000,   0.0000,   0.0000],
        [ -0.3145,  -4.1250,  -7.0312,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.3710e-06, 1.2752e-07,  ..., 2.9963e-28, 1.1023e-28,
         4.0551e-29],
        [9.9994e-01, 5.1442e-05, 7.3378e-07,  ..., 5.1397e-24, 4.0028e-24,
         1.4725e-24]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9999, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 21])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4969], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[3233],
        [ 338]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.5497e-06, -5.5076e-05], device='cuda:0')


new_candidates
torch.Size([2, 22])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4970], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 10-p
data: [{"content": "level", "parents": [0], "prob": 3.6644970364768605e-43}, {"content": "is", "parents": [1], "prob": 1.7414718849257964e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 22])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 22, 32064])

hidden_states[-1]
torch.Size([2, 22, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 22])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4970], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.2197, -4.9375, -5.1250,  ...,  0.0000,  0.0000,  0.0000],
        [ 4.3750, -2.2500, -5.2812,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9973e-01, 2.6119e-04, 5.4208e-06,  ..., 6.5980e-24, 5.8227e-24,
         2.1421e-24],
        [9.9997e-01, 2.4300e-05, 1.3709e-06,  ..., 2.8062e-22, 2.8062e-22,
         1.5021e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9997, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 22])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7125, -45.4970], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 338],
        [8040]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-2.7440e-04, -2.8015e-05], device='cuda:0')


new_candidates
torch.Size([2, 23])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7127, -45.4970], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 11-p
data: [{"content": "is", "parents": [0], "prob": 3.663491644456064e-43}, {"content": "Mount", "parents": [1], "prob": 1.7414230990127795e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 23])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 23, 32064])

hidden_states[-1]
torch.Size([2, 23, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 23])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7127, -45.4970], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.0938, -1.3828, -5.3750,  ...,  0.0000,  0.0000,  0.0000],
        [ 7.4375, -0.4766, -1.4766,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9996e-01, 3.5356e-05, 3.2886e-06,  ..., 5.9407e-22, 3.6032e-22,
         2.4765e-22],
        [1.0000e+00, 2.2603e-06, 9.4224e-07,  ..., 1.2577e-21, 9.7950e-22,
         6.7320e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 23])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7127, -45.4970], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 8040],
        [18274]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-4.2082e-05, -4.1723e-06], device='cuda:0')


new_candidates
torch.Size([2, 24])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.4970], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 12-p
data: [{"content": "Mount", "parents": [0], "prob": 3.6633374815117964e-43}, {"content": "Ever", "parents": [1], "prob": 1.7414158332295256e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 24])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 24, 32064])

hidden_states[-1]
torch.Size([2, 24, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 24])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.4970], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 7.5625,  0.5508, -1.1172,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.2500, -3.5781, -3.4844,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.3710e-06, 1.3710e-06,  ..., 7.6283e-22, 7.6283e-22,
         5.9409e-22],
        [9.9999e-01, 1.0130e-05, 5.0434e-07,  ..., 9.6026e-24, 6.5997e-24,
         4.0029e-24]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 24])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.4970], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[18274],
        [  342]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-3.8147e-06, -1.0848e-05], device='cuda:0')


new_candidates
torch.Size([2, 25])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.4970], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 13-p
data: [{"content": "Ever", "parents": [0], "prob": 3.663323506988323e-43}, {"content": "est", "parents": [1], "prob": 1.741396942271113e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 25])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 25, 32064])

hidden_states[-1]
torch.Size([2, 25, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 25])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.4970], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 1.2969, -3.5312, -3.7812,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.1250, -3.6719, -3.7656,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9999e-01, 7.8893e-06, 4.4508e-07,  ..., 9.6026e-24, 7.4785e-24,
         6.5998e-24],
        [9.8201e-01, 1.7986e-02, 1.5256e-06,  ..., 4.2262e-23, 2.9046e-23,
         2.5633e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9820, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 25])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.4970], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  342],
        [29889]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-8.5831e-06, -1.8152e-02], device='cuda:0')


new_candidates
torch.Size([2, 26])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.5152], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 14-p
data: [{"content": "est", "parents": [0], "prob": 3.663292064428786e-43}, {"content": ".", "parents": [1], "prob": 1.7100717129371742e-20}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 26])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 26, 32064])

hidden_states[-1]
torch.Size([2, 26, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 26])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-97.7128, -45.5152], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 1.8125, -4.8438, -4.0312,  ...,  0.0000,  0.0000,  0.0000],
        [ 7.0000, -1.2109, -3.0312,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.8593e-01, 1.4064e-02, 9.2899e-07,  ..., 3.3045e-23, 2.5736e-23,
         2.0043e-23],
        [4.4115e-01, 3.0320e-01, 2.3613e-01,  ..., 7.5089e-23, 7.5089e-23,
         5.1608e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9859, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4411, 0.7443, 0.9805,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 26])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([4])


tensor([-97.7128, -45.5152, -45.5152, -45.5152], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[29889],
        [  739],
        [ 5976],
        [ 8011]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-0.0142, -0.8184, -1.1934, -1.4434], device='cuda:0')


new_candidates
torch.Size([4, 27])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274,   342, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  8011]], device='cuda:0')


new_candidate_logprobs
torch.Size([4])


tensor([-97.7270, -46.3335, -46.7085, -46.9585], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 15-p
data: [{"content": ".", "parents": [0], "prob": 3.6117664531874214e-43}, {"content": "It", "parents": [1], "prob": 7.543971341390068e-21}, {"content": "Loc", "parents": [1], "prob": 5.1848906224437434e-21}, {"content": "Its", "parents": [1], "prob": 4.037997358265539e-21}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([4, 27])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 27, 32064])

hidden_states[-1]
torch.Size([4, 27, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([4, 27])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 11858,   362,  2038,
          7205,  3233,   338,  8040, 18274,   342, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  8011]], device='cuda:0')


candidate_logprobs
torch.Size([4])


tensor([-97.7270, -46.3335, -46.7085, -46.9585], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-3.234375  , -0.28125   , -0.19433594, ...,  0.35351562,
         0.44921875,  3.84375   ],
       [-2.015625  , -1.09375   , -0.296875  , ..., -1.1640625 ,
        -0.21289062,  1.4453125 ],
       [-1.765625  ,  1.2109375 ,  1.359375  , ..., -1.6875    ,
        -0.8046875 ,  1.21875   ],
       [-3.        , -1.0234375 ,  2.6875    , ...,  1.0234375 ,
        -2.4375    ,  2.96875   ]], dtype=float32)


k_mean_space
(4, 2)


array([[103.925575,  53.26101 ],
       [107.37869 ,  52.111237],
       [  0.      ,  92.52292 ],
       [111.134865,  58.718685]], dtype=float32)


k_mean_clusters
(4,)


array([1, 1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -46.70853821, -191.01903832])


closest
(2,)


array([2, 1])


new_candidates
torch.Size([2, 27])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739]], device='cuda:0')


new_candidate_parents


[[2], [0, 1, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([ -46.7085, -191.0190], device='cuda:0', dtype=torch.float64)


candidate_parents


[[2], [0, 1, 3]]

event: k_means
id: 16-k
data: [{"content": "Loc", "parents": [2], "prob": 5.1848906224437434e-21}, {"content": "It", "parents": [0, 1, 3], "prob": 1.1002356685841186e-83}]





candidates
torch.Size([2, 27])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -46.7085, -191.0190], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.7969, -6.4688,  0.0972,  ...,  0.0000,  0.0000,  0.0000],
        [ 4.2812, -4.5312, -7.2188,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 2.3824e-07, 9.9312e-08,  ..., 1.8538e-26, 2.8428e-27,
         2.2140e-27],
        [4.1624e-01, 2.8607e-01, 2.8607e-01,  ..., 4.6199e-22, 1.4999e-22,
         1.7913e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4162, 0.7023, 0.9884,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 27])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([4])


tensor([ -46.7085, -191.0190, -191.0190, -191.0190], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[  630],
        [  338],
        [15028],
        [22170]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-3.5763e-07, -8.7650e-01, -1.2515e+00, -1.2515e+00], device='cuda:0')


new_candidates
torch.Size([4, 28])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739,   338],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 22170]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([4])


tensor([ -46.7085, -191.8955, -192.2705, -192.2705], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 16-p
data: [{"content": "ated", "parents": [0], "prob": 5.184888768182397e-21}, {"content": "is", "parents": [1], "prob": 4.579578304018173e-84}, {"content": "stands", "parents": [1], "prob": 3.147495257340765e-84}, {"content": "reaches", "parents": [1], "prob": 3.147495257340765e-84}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([4, 28])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 28, 32064])

hidden_states[-1]
torch.Size([4, 28, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([4, 28])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739,   338],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 22170]],
       device='cuda:0')


candidate_logprobs
torch.Size([4])


tensor([ -46.7085, -191.8955, -192.2705, -192.2705], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[ 0.65234375,  1.140625  , -2.25      , ..., -1.3046875 ,
        -1.59375   ,  1.171875  ],
       [-0.47851562,  1.65625   , -0.41015625, ...,  0.234375  ,
         0.62109375,  2.140625  ],
       [-0.20703125,  2.21875   ,  0.54296875, ...,  2.21875   ,
         2.609375  ,  1.6875    ],
       [-0.9140625 ,  2.5       ,  0.96484375, ..., -0.54296875,
         0.6875    ,  0.29492188]], dtype=float32)


k_mean_space
(4, 2)


array([[91.917854, 45.419285],
       [78.163376, 45.419285],
       [41.50877 , 78.10177 ],
       [41.50877 , 88.19667 ]], dtype=float32)


k_mean_clusters
(4,)


array([1, 1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-384.54108166, -238.60407946])


closest
(2,)


array([2, 0])


new_candidates
torch.Size([2, 28])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630]],
       device='cuda:0')


new_candidate_parents


[[2, 3], [0, 1]]


new_candidate_logprobs
torch.Size([2])


tensor([-384.5411, -238.6041], device='cuda:0', dtype=torch.float64)


candidate_parents


[[2, 3], [0, 1]]

event: k_means
id: 17-k
data: [{"content": "stands", "parents": [2, 3], "prob": 9.906726394982609e-168}, {"content": "ated", "parents": [0, 1], "prob": 2.3744604111515618e-104}]





candidates
torch.Size([2, 28])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-384.5411, -238.6041], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 1.7891, -0.8086, -4.4688,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.0156,  1.1250, -4.2812,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.2208e-01, 4.0513e-02, 3.5753e-02,  ..., 4.0477e-21, 2.1666e-21,
         5.7738e-23],
        [9.9982e-01, 1.3982e-04, 3.1197e-05,  ..., 6.7308e-22, 5.9399e-22,
         7.0942e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9221, 0.9626, 0.9983,  ..., 1.0000, 1.0000, 1.0000],
        [0.9998, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 28])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-384.5411, -238.6041], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[472],
        [297]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0811, -0.0002], device='cuda:0')


new_candidates
torch.Size([2, 29])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-384.6222, -238.6043], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 17-p
data: [{"content": "at", "parents": [0], "prob": 9.134755129618341e-168}, {"content": "in", "parents": [1], "prob": 2.3740305879766003e-104}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 29])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 29, 32064])

hidden_states[-1]
torch.Size([2, 29, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 29])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-384.6222, -238.6043], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.0391, -3.9062, -3.2031,  ...,  0.0000,  0.0000,  0.0000],
        [ 4.9688, -3.2969, -2.2812,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.1096e-01, 8.4733e-02, 2.2580e-03,  ..., 8.9229e-22, 6.9491e-22,
         2.8968e-22],
        [1.0000e+00, 1.3710e-06, 5.0435e-07,  ..., 9.1107e-23, 8.0402e-23,
         6.2617e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9110, 0.9957, 0.9980,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 29])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-384.6222, -238.6043], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[14235],
        [  278]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-9.3255e-02, -2.3842e-06], device='cuda:0')


new_candidates
torch.Size([2, 30])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-384.7155, -238.6043], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 18-p
data: [{"content": "approximately", "parents": [0], "prob": 8.321406479414545e-168}, {"content": "the", "parents": [1], "prob": 2.374024927846335e-104}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 30])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 30, 32064])

hidden_states[-1]
torch.Size([2, 30, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 30])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-384.7155, -238.6043], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.1777, -2.3906, -7.0625,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.6484,  1.4766, -1.5234,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 8.7642e-08, 5.3158e-08,  ..., 2.1426e-24, 6.1388e-25,
         1.3697e-25],
        [9.3227e-01, 6.7533e-02, 1.3037e-04,  ..., 6.4017e-20, 3.0240e-20,
         2.3551e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9323, 0.9998, 0.9999,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 30])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-384.7155, -238.6043], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29871],
        [  379]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-07, -7.0134e-02], device='cuda:0')


new_candidates
torch.Size([2, 31])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-384.7155, -238.6744], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 19-p
data: [{"content": "", "parents": [0], "prob": 8.32140548742565e-168}, {"content": "H", "parents": [1], "prob": 2.2132296465864344e-104}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 31])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 31, 32064])

hidden_states[-1]
torch.Size([2, 31, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 31])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-384.7155, -238.6744], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.6250,  0.4570, -3.7812,  ...,  0.0000,  0.0000,  0.0000],
        [ 6.1562,  6.4375, -3.7500,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[5.6218e-01, 4.3782e-01, 6.2640e-09,  ..., 8.0119e-22, 3.3399e-22,
         1.3923e-22],
        [1.0000e+00, 1.5535e-06, 2.3824e-07,  ..., 3.6034e-22, 1.9287e-22,
         1.7021e-22]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.5622, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 31])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([3])


tensor([-384.7155, -384.7155, -238.6744], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29947],
        [29906],
        [ 3039]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-5.7594e-01, -8.2594e-01, -2.3842e-06], device='cuda:0')


new_candidates
torch.Size([3, 32])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039]], device='cuda:0')


new_candidate_logprobs
torch.Size([3])


tensor([-385.2914, -385.5414, -238.6744], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [1]]

event: top_p
id: 20-p
data: [{"content": "8", "parents": [0], "prob": 4.678098912212983e-168}, {"content": "2", "parents": [0], "prob": 3.6433068789589398e-168}, {"content": "imal", "parents": [1], "prob": 2.2132243698355072e-104}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([3, 32])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 32, 32064])

hidden_states[-1]
torch.Size([3, 32, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([3, 32])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039]], device='cuda:0')


candidate_logprobs
torch.Size([3])


tensor([-385.2914, -385.5414, -238.6744], device='cuda:0', dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.4765625 , -3.03125   ,  1.9921875 , ...,  2.28125   ,
         2.765625  ,  0.94921875],
       [-0.48828125, -0.6953125 ,  2.078125  , ...,  0.46289062,
         1.21875   , -0.1015625 ],
       [-1.1875    ,  0.3125    ,  0.9140625 , ..., -0.06445312,
        -2.96875   ,  2.640625  ]], dtype=float32)


k_mean_space
(3, 2)


array([[ 45.772224, 113.93932 ],
       [ 45.772224, 113.46289 ],
       [104.08123 ,   0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-770.83280768, -238.67439923])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 32])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039]], device='cuda:0')


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6744], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 21-k
data: [{"content": "8", "parents": [0, 1], "prob": 0.0}, {"content": "imal", "parents": [2], "prob": 2.2132243698355072e-104}]





candidates
torch.Size([2, 32])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6744], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.0825, -3.0156, -7.3438,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.7500, -2.7969,  3.8125,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 5.7150e-07, 4.3635e-09,  ..., 1.0668e-25, 9.4141e-26,
         3.9244e-26],
        [9.9593e-01, 4.0701e-03, 4.6721e-08,  ..., 6.1138e-25, 3.7082e-25,
         2.2491e-25]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9959, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 32])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6744], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29892],
        [  388]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-5.9605e-07, -4.0784e-03], device='cuda:0')


new_candidates
torch.Size([2, 33])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6785], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 21-p
data: [{"content": ",", "parents": [0], "prob": 0.0}, {"content": "ay", "parents": [1], "prob": 2.2042163180480427e-104}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 33])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 33, 32064])

hidden_states[-1]
torch.Size([2, 33, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 33])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6785], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.0000,  1.9453, -3.4844,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.7305,  0.2275,  0.2246,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 7.3382e-07, 6.4759e-07,  ..., 1.0324e-22, 2.6103e-23,
         7.7276e-27],
        [9.9970e-01, 2.9596e-04, 3.2232e-08,  ..., 2.8989e-25, 1.7583e-25,
         1.8532e-26]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9997, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 33])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6785], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29947],
        [  294]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.3113e-06, -2.9598e-04], device='cuda:0')


new_candidates
torch.Size([2, 34])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6788], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 22-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "as", "parents": [1], "prob": 2.2035640087412235e-104}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 34])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 34, 32064])

hidden_states[-1]
torch.Size([2, 34, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 34])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-770.8328, -238.6788], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 5.0938,  0.6602, -0.2188,  ...,  0.0000,  0.0000,  0.0000],
        [-1.7109, -5.2188, -3.1250,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.3635e-09, 1.5737e-11,  ..., 9.2293e-28, 2.6442e-28,
         5.9001e-29],
        [8.7028e-01, 1.1778e-01, 5.8639e-03,  ..., 1.5412e-18, 1.0593e-18,
         5.2738e-20]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.8703, 0.9881, 0.9939,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 34])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([3])


tensor([-770.8328, -238.6788, -238.6788], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29946],
        [  373],
        [29892]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([ 0.0000, -0.1389, -2.1389], device='cuda:0')


new_candidates
torch.Size([3, 35])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892]], device='cuda:0')


new_candidate_logprobs
torch.Size([3])


tensor([-770.8328, -238.8177, -240.8177], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 23-p
data: [{"content": "4", "parents": [0], "prob": 0.0}, {"content": "on", "parents": [1], "prob": 1.91770869423493e-104}, {"content": ",", "parents": [1], "prob": 2.5953367637106958e-105}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([3, 35])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 35, 32064])

hidden_states[-1]
torch.Size([3, 35, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([3, 35])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892]], device='cuda:0')


candidate_logprobs
torch.Size([3])


tensor([-770.8328, -238.8177, -240.8177], device='cuda:0', dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-1.640625  , -2.5625    ,  1.1484375 , ..., -0.625     ,
        -0.44921875, -0.4921875 ],
       [-2.734375  ,  0.75      ,  0.77734375, ...,  2.453125  ,
         1.2109375 ,  0.578125  ],
       [-0.12988281, -0.94921875, -3.34375   , ..., -2.328125  ,
        -1.34375   , -0.484375  ]], dtype=float32)


k_mean_space
(3, 2)


array([[ 55.74222, 112.98439],
       [ 55.74222, 101.86088],
       [ 91.99652,   0.     ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-1009.65052818,  -240.81771848])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 35])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892]], device='cuda:0')


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-1009.6505,  -240.8177], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 24-k
data: [{"content": "4", "parents": [0, 1], "prob": 0.0}, {"content": ",", "parents": [2], "prob": 2.5953367637106958e-105}]





candidates
torch.Size([2, 35])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-1009.6505,  -240.8177], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.8242,  2.5938, -1.6719,  ...,  0.0000,  0.0000,  0.0000],
        [-0.7070, -7.5312, -4.1875,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9945e-01, 5.5278e-04, 4.8447e-11,  ..., 1.2734e-26, 5.3082e-27,
         2.0582e-28],
        [5.6233e-01, 3.0100e-01, 4.6159e-02,  ..., 4.0699e-21, 3.5917e-21,
         1.6966e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9994, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.5623, 0.8633, 0.9095,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 35])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  72


carryover_candidate_logprobs
torch.Size([4])


tensor([-1009.6505,  -240.8177,  -240.8177,  -240.8177], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[29947],
        [  373],
        [ 1546],
        [  372]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-5.5299e-04, -5.7566e-01, -1.2007e+00, -3.0757e+00], device='cuda:0')


new_candidates
torch.Size([4, 36])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,  1546],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   3


new_candidate_logprobs
torch.Size([4])


tensor([-1009.6511,  -241.3934,  -242.0184,  -243.8934], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 24-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "on", "parents": [1], "prob": 1.4594428338532931e-105}, {"content": "between", "parents": [1], "prob": 7.81183456090117e-106}, {"content": "it", "parents": [1], "prob": 1.1979834872742963e-106}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([4, 36])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 36, 32064])

hidden_states[-1]
torch.Size([4, 36, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([4, 36])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,  1546],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   3


candidate_logprobs
torch.Size([4])


tensor([-1009.6511,  -241.3934,  -242.0184,  -243.8934], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-2.15625   ,  0.51953125, -0.04003906, ..., -2.296875  ,
         1.703125  ,  1.28125   ],
       [-3.140625  ,  0.83203125,  1.1171875 , ...,  2.5625    ,
         1.1953125 ,  0.53125   ],
       [-0.18066406,  1.4375    , -0.00866699, ...,  3.25      ,
        -0.13769531, -1.28125   ],
       [-0.578125  , -0.6953125 , -1.46875   , ..., -2.546875  ,
        -0.9921875 ,  1.0625    ]], dtype=float32)


k_mean_space
(4, 2)


array([[103.526955,  54.952694],
       [ 48.43154 ,  95.42342 ],
       [ 48.43154 ,  95.96844 ],
       [ 94.58388 ,  54.952694]], dtype=float32)


k_mean_clusters
(4,)


array([1, 0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -483.41176004, -1253.5444613 ])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 36])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947]], device='cuda:0')


new_candidate_parents


[[1, 2], [0, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4118, -1253.5445], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0, 3]]

event: k_means
id: 25-k
data: [{"content": "on", "parents": [1, 2], "prob": 1.1400925969154697e-210}, {"content": "8", "parents": [0, 3], "prob": 0.0}]





candidates
torch.Size([2, 36])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4118, -1253.5445], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.6328, -5.0312,  1.7969,  ...,  0.0000,  0.0000,  0.0000],
        [-0.3477, -3.6406, -1.3516,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9999e-01, 3.7266e-06, 1.3709e-06,  ..., 1.0881e-23, 4.0030e-24,
         2.7512e-24],
        [9.8594e-01, 1.4064e-02, 6.3849e-07,  ..., 9.2817e-26, 3.8692e-26,
         3.0133e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9859, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 36])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4118, -1253.5445], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  278],
        [29889]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-7.7486e-06, -1.4164e-02], device='cuda:0')


new_candidates
torch.Size([2, 37])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4118, -1253.5586], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 25-p
data: [{"content": "the", "parents": [0], "prob": 1.1400837627896256e-210}, {"content": ".", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 37])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 37, 32064])

hidden_states[-1]
torch.Size([2, 37, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 37])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4118, -1253.5586], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-4.3750, -6.5938, -4.6250,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.7852, -0.8398, -2.1719,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9983e-01, 6.6045e-05, 3.1198e-05,  ..., 3.8733e-21, 3.4182e-21,
         4.6260e-22],
        [1.0000e+00, 3.0590e-07, 1.1033e-09,  ..., 9.9224e-27, 1.5217e-27,
         1.3429e-27]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9998, 0.9999, 0.9999,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 37])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4118, -1253.5586], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 5139],
        [29947]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.7215e-04, -3.5763e-07], device='cuda:0')


new_candidates
torch.Size([2, 38])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4119, -1253.5586], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 26-p
data: [{"content": "border", "parents": [0], "prob": 1.139887510810903e-210}, {"content": "8", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 38])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 38, 32064])

hidden_states[-1]
torch.Size([2, 38, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 38])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4119, -1253.5586], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.5469, -4.8125, -3.0625,  ...,  0.0000,  0.0000,  0.0000],
        [-1.6641, -2.4375, -3.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9328e-01, 6.6927e-03, 1.0062e-05,  ..., 5.5988e-21, 2.6447e-21,
         6.6868e-22],
        [1.0000e+00, 5.0435e-07, 4.4508e-07,  ..., 5.5978e-28, 4.9401e-28,
         1.1023e-28]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9933, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 38])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4119, -1253.5586], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 1546],
        [29953]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-6.7388e-03, -9.5367e-07], device='cuda:0')


new_candidates
torch.Size([2, 39])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4187, -1253.5586], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 27-p
data: [{"content": "between", "parents": [0], "prob": 1.1322318754323784e-210}, {"content": "6", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 39])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 39, 32064])

hidden_states[-1]
torch.Size([2, 39, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 39])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4187, -1253.5586], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.7812, -3.5156, -4.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-1.2344, -4.4375, -0.1245,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9752e-01, 1.4997e-03, 7.0841e-04,  ..., 2.8554e-20, 2.5199e-20,
         2.2238e-20],
        [1.0000e+00, 4.4508e-07, 2.3824e-07,  ..., 7.7276e-27, 5.3111e-27,
         3.2213e-27]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9975, 0.9990, 0.9997,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 39])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4187, -1253.5586], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[22806],
        [27881]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-2.4880e-03, -9.5367e-07], device='cuda:0')


new_candidates
torch.Size([2, 40])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5586], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 28-p
data: [{"content": "Nep", "parents": [0], "prob": 1.1294183725569486e-210}, {"content": "meters", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 40])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 40, 32064])

hidden_states[-1]
torch.Size([2, 40, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 40])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5586], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.1719, -5.8750, -1.7031,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.4766,  1.7656, -3.6562,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.4508e-07, 1.4450e-07,  ..., 3.2859e-25, 1.9930e-25,
         2.3335e-28],
        [9.9995e-01, 3.5356e-05, 1.1478e-05,  ..., 1.7587e-25, 8.3076e-26,
         3.4631e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 40])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5586], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[284],
        [313]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-8.3447e-07, -4.6970e-05], device='cuda:0')


new_candidates
torch.Size([2, 41])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5587], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 29-p
data: [{"content": "al", "parents": [0], "prob": 1.129417430096824e-210}, {"content": "(", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 41])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 41, 32064])

hidden_states[-1]
torch.Size([2, 41, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 41])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5587], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.0510, -4.4688, -4.8438,  ...,  0.0000,  0.0000,  0.0000],
        [-0.7891,  1.4922, -5.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.2099e-06, 7.7344e-08,  ..., 1.1698e-22, 8.0402e-23,
         3.7979e-23],
        [9.9908e-01, 9.1105e-04, 2.5589e-06,  ..., 1.2442e-22, 4.8722e-23,
         2.6079e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9991, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 41])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5587], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  322],
        [29906]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.4305e-06, -9.1786e-04], device='cuda:0')


new_candidates
torch.Size([2, 42])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313, 29906]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5596], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 30-p
data: [{"content": "and", "parents": [0], "prob": 1.1294158144522307e-210}, {"content": "2", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 42])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 42, 32064])

hidden_states[-1]
torch.Size([2, 42, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 42])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313, 29906]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5596], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.5547, -6.2812, -2.4375,  ...,  0.0000,  0.0000,  0.0000],
        [-2.2969, -4.0312, -6.5625,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.1305e-01, 8.4927e-02, 1.9973e-03,  ..., 1.8933e-21, 1.1484e-21,
         6.1467e-22],
        [1.0000e+00, 5.9053e-10, 5.9053e-10,  ..., 1.5521e-25, 2.6972e-26,
         1.4437e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9131, 0.9980, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 42])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313, 29906]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -483.4212, -1253.5596], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  278],
        [29929]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0910,  0.0000], device='cuda:0')


new_candidates
torch.Size([2, 43])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313, 29906, 29929]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -483.5121, -1253.5596], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 31-p
data: [{"content": "the", "parents": [0], "prob": 1.031217547852845e-210}, {"content": "9", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 43])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 43, 32064])

hidden_states[-1]
torch.Size([2, 43, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 43])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 29953, 27881,
           313, 29906, 29929]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -483.5121, -1253.5596], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.1797, -2.7656, -5.0938,  ...,  0.0000,  0.0000,  0.0000],
        [-1.6953, -2.2500, -7.9062,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[7.3544e-01, 2.1071e-01, 2.5165e-02,  ..., 3.4709e-20, 3.0631e-20,
         7.7446e-21],
        [1.0000e+00, 8.1520e-09, 1.6919e-10,  ..., 5.4175e-25, 3.2859e-25,
         2.5590e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.7354, 0.9461, 0.9713,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 43])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 29889, 29947, 2995


carryover_candidate_logprobs
torch.Size([3])


tensor([ -483.5121,  -483.5121, -1253.5596], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[19429],
        [28273],
        [29892]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.3073, -1.5573,  0.0000], device='cuda:0')


new_candidates
torch.Size([3, 44])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 2988


new_candidate_logprobs
torch.Size([3])


tensor([ -483.8194,  -485.0694, -1253.5596], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [1]]

event: top_p
id: 32-p
data: [{"content": "Tib", "parents": [0], "prob": 7.583982245160216e-211}, {"content": "autonom", "parents": [0], "prob": 2.172847292540916e-211}, {"content": ",", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([3, 44])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 44, 32064])

hidden_states[-1]
torch.Size([3, 44, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([3, 44])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,   739, 15028,   472, 14235,
         29871, 29947, 29892, 29947, 29946, 29947, 2988


candidate_logprobs
torch.Size([3])


tensor([ -483.8194,  -485.0694, -1253.5596], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.8203125 ,  1.3671875 , -1.7578125 , ..., -0.13769531,
         2.796875  , -0.88671875],
       [-0.96484375, -0.45507812,  0.1484375 , ..., -0.06787109,
        -2.125     , -0.62109375],
       [-0.49609375, -0.25390625,  2.625     , ..., -0.20019531,
         0.6328125 ,  2.203125  ]], dtype=float32)


k_mean_space
(3, 2)


array([[113.32689 ,  55.787334],
       [  0.      ,  97.936615],
       [112.092064,  55.787334]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -485.0694162 , -1737.37900898])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 44])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429]], device='cuda:0')


new_candidate_parents


[[1], [0, 2]]


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0694, -1737.3790], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1], [0, 2]]

event: k_means
id: 33-k
data: [{"content": "autonom", "parents": [1], "prob": 2.172847292540916e-211}, {"content": "Tib", "parents": [0, 2], "prob": 0.0}]





candidates
torch.Size([2, 44])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0694, -1737.3790], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.1562,  2.7812, -0.3477,  ...,  0.0000,  0.0000,  0.0000],
        [-2.4062, -7.2188, -2.2500,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.2099e-06, 6.0236e-08,  ..., 1.2330e-23, 8.4743e-24,
         6.5998e-24],
        [1.0000e+00, 1.4450e-07, 1.4450e-07,  ..., 5.1399e-24, 3.5326e-24,
         4.2191e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 44])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0694, -1737.3790], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[681],
        [300]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.3113e-06, -4.7684e-07], device='cuda:0')


new_candidates
torch.Size([2, 45])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0694, -1737.3790], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 33-p
data: [{"content": "ous", "parents": [0], "prob": 2.1728444432814055e-211}, {"content": "et", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 45])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 45, 32064])

hidden_states[-1]
torch.Size([2, 45, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 45])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0694, -1737.3790], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.9414,  1.9297,  2.7031,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.5312, -4.4062, -5.0625,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9857e-01, 1.1692e-03, 9.5974e-05,  ..., 2.4730e-22, 1.6997e-22,
         1.5000e-22],
        [9.9098e-01, 5.8926e-03, 1.6883e-03,  ..., 1.1593e-22, 4.8327e-23,
         5.7718e-24]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9986, 0.9997, 0.9998,  ..., 1.0000, 1.0000, 1.0000],
        [0.9910, 0.9969, 0.9986,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 45])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0694, -1737.3790], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[5120],
        [5202]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0014, -0.0091], device='cuda:0')


new_candidates
torch.Size([2, 46])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0709, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 34-p
data: [{"content": "region", "parents": [0], "prob": 2.1697293003503764e-211}, {"content": "Aut", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 46])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 46, 32064])

hidden_states[-1]
torch.Size([2, 46, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 46])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0709, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-2.6406, -5.5000, -2.9531,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.7656, -0.1924,  1.1797,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 2.9023e-06, 4.4508e-07,  ..., 9.6026e-24, 7.4785e-24,
         3.7233e-25],
        [1.0000e+00, 8.3153e-07, 1.1254e-07,  ..., 4.6268e-22, 6.2617e-23,
         2.9578e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 46])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0709, -1737.3881], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 310],
        [4917]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-3.5763e-06, -1.1921e-06], device='cuda:0')


new_candidates
torch.Size([2, 47])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0709, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 35-p
data: [{"content": "of", "parents": [0], "prob": 2.1697215407937854e-211}, {"content": "onom", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 47])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 47, 32064])

hidden_states[-1]
torch.Size([2, 47, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 47])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0709, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.2500,  4.1250, -1.3047,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.7031, -0.9727,  0.7344,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9842e-01, 1.1690e-03, 3.3493e-04,  ..., 1.1680e-22, 6.2518e-23,
         4.8689e-23],
        [1.0000e+00, 3.0590e-07, 1.0467e-08,  ..., 5.4175e-25, 2.2583e-25,
         8.3079e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9984, 0.9996, 0.9999,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 47])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0709, -1737.3881], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[19429],
        [  681]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.5855e-03, -3.5763e-07], device='cuda:0')


new_candidates
torch.Size([2, 48])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0724, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 36-p
data: [{"content": "Tib", "parents": [0], "prob": 2.166284069409273e-211}, {"content": "ous", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 48])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 48, 32064])

hidden_states[-1]
torch.Size([2, 48, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 48])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0724, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-2.0781, -6.5625, -3.9219,  ...,  0.0000,  0.0000,  0.0000],
        [ 5.4375, -1.2969, -0.1445,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 6.4759e-07, 1.2752e-07,  ..., 1.7021e-22, 1.0324e-22,
         1.5832e-23],
        [1.0000e+00, 3.2887e-06, 1.0677e-06,  ..., 8.7565e-27, 4.6870e-27,
         1.0458e-27]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 48])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0724, -1737.3881], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  300],
        [11069]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-9.5367e-07, -4.8876e-06], device='cuda:0')


new_candidates
torch.Size([2, 49])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0724, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 37-p
data: [{"content": "et", "parents": [0], "prob": 2.1662820034797935e-211}, {"content": "Region", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 49])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 49, 32064])

hidden_states[-1]
torch.Size([2, 49, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 49])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0724, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ -1.7734,  -9.0625, -10.0625,  ...,   0.0000,   0.0000,   0.0000],
        [ -3.4688,  -6.0938,  -3.3594,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.8780e-01, 1.0973e-02, 1.0207e-03,  ..., 6.1854e-23, 9.4856e-24,
         6.5193e-24],
        [9.9998e-01, 1.6701e-05, 2.3823e-07,  ..., 1.4726e-24, 8.9317e-25,
         6.4701e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9878, 0.9988, 0.9998,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 49])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0724, -1737.3881], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[297],
        [310]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.2273e-02, -1.7166e-05], device='cuda:0')


new_candidates
torch.Size([2, 50])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0847, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 38-p
data: [{"content": "in", "parents": [0], "prob": 2.13985866223214e-211}, {"content": "of", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 50])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 50, 32064])

hidden_states[-1]
torch.Size([2, 50, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 50])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0847, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.0938, -4.5625, -4.5938,  ...,  0.0000,  0.0000,  0.0000],
        [ 5.5000, -4.1250, -4.0312,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9990e-01, 5.1440e-05, 1.8924e-05,  ..., 5.2424e-22, 4.6264e-22,
         7.0948e-23],
        [1.0000e+00, 1.2099e-06, 2.6996e-07,  ..., 2.5590e-25, 1.5521e-25,
         1.5521e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9999, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 50])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0847, -1737.3881], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[7551],
        [7551]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-9.8353e-05, -1.6689e-06], device='cuda:0')


new_candidates
torch.Size([2, 51])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0848, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 39-p
data: [{"content": "China", "parents": [0], "prob": 2.1396482121272626e-211}, {"content": "China", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 51])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 51, 32064])

hidden_states[-1]
torch.Size([2, 51, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 51])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0848, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  1.8750,  -0.6367, -10.6875,  ...,   0.0000,   0.0000,   0.0000],
        [  1.1328,  -1.7422, -12.1250,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.5535e-06, 5.7150e-07,  ..., 3.2858e-25, 8.3079e-26,
         1.2741e-26],
        [9.9999e-01, 4.2228e-06, 4.2228e-06,  ..., 1.7588e-25, 1.3697e-25,
         7.7275e-27]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 51])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([ -485.0848, -1737.3881], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29892],
        [29892]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-2.1458e-06, -8.3447e-06], device='cuda:0')


new_candidates
torch.Size([2, 52])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([ -485.0848, -1737.3881], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 40-p
data: [{"content": ",", "parents": [0], "prob": 2.1396436209403448e-211}, {"content": ",", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([2, 52])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 52, 32064])

hidden_states[-1]
torch.Size([2, 52, 3072])
infer - after batch run: GPU memory used: 15405 MB.

candidates
torch.Size([2, 52])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([ -485.0848, -1737.3881], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 7.0938,  1.2109, -0.2148,  ...,  0.0000,  0.0000,  0.0000],
        [ 6.8750,  0.9727, -0.2119,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[7.5146e-01, 1.4797e-01, 8.9750e-02,  ..., 2.9695e-19, 8.5077e-20,
         7.9922e-20],
        [6.9396e-01, 1.7546e-01, 1.2059e-01,  ..., 3.7482e-19, 1.2953e-19,
         1.0739e-19]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.7515, 0.8994, 0.9892,  ..., 1.0000, 1.0000, 1.0000],
        [0.6940, 0.8694, 0.9900,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([6])


tensor([0, 0, 0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([6, 52])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
    


carryover_candidate_logprobs
torch.Size([6])


tensor([ -485.0848,  -485.0848,  -485.0848, -1737.3881, -1737.3881, -1737.3881],
       device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([6, 1])


tensor([[8040],
        [ 372],
        [ 967],
        [8040],
        [ 372],
        [ 967]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([6])


tensor([-0.2857, -1.9107, -2.4107, -0.3653, -1.7403, -2.1153], device='cuda:0')


new_candidates
torch.Size([6, 53])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,   372],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  203


new_candidate_logprobs
torch.Size([6])


tensor([ -485.3705,  -486.9955,  -487.4955, -1737.7534, -1739.1284, -1739.5034],
       device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [0], [1], [1], [1]]

event: top_p
id: 41-p
data: [{"content": "Mount", "parents": [0], "prob": 1.6078656872789198e-211}, {"content": "it", "parents": [0], "prob": 3.166075165497955e-212}, {"content": "its", "parents": [0], "prob": 1.9203214299090924e-212}, {"content": "Mount", "parents": [1], "prob": 0.0}, {"content": "it", "parents": [1], "prob": 0.0}, {"content": "its", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15405 MB.

batch_candidates
torch.Size([6, 53])

batch_candidate_logprobs
torch.Size([6])

batch_logits
torch.Size([6, 53, 32064])

hidden_states[-1]
torch.Size([6, 53, 3072])
infer - after batch run: GPU memory used: 15665 MB.

candidates
torch.Size([6, 53])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,   372],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  203


candidate_logprobs
torch.Size([6])


tensor([ -485.3705,  -486.9955,  -487.4955, -1737.7534, -1739.1284, -1739.5034],
       device='cuda:0', dtype=torch.float64)


embeddings_np
(6, 3072)


array([[-0.56640625, -1.1015625 ,  2.03125   , ...,  2.359375  ,
         0.14941406, -1.59375   ],
       [-1.6171875 , -1.4375    ,  0.82421875, ..., -0.6875    ,
        -0.796875  ,  0.8125    ],
       [-1.859375  , -1.203125  ,  2.4375    , ...,  2.09375   ,
        -2.96875   ,  2.625     ],
       [-0.5546875 , -1.15625   ,  2.0625    , ...,  2.21875   ,
         0.22363281, -1.625     ],
       [-1.4921875 , -1.4453125 ,  0.72265625, ..., -0.78515625,
        -0.69921875,  0.8828125 ],
       [-1.90625   , -1.078125  ,  2.53125   , ...,  2.078125  ,
        -2.859375  ,  2.65625   ]], dtype=float32)


k_mean_space
(6, 2)


array([[  1.7378712,  89.05364  ],
       [102.1956   ,  48.21225  ],
       [100.23554  ,  48.495472 ],
       [  1.7378712,  88.87986  ],
       [102.09195  ,  48.078583 ],
       [100.091675 ,  47.918167 ]], dtype=float32)


k_mean_clusters
(6,)


array([0, 1, 1, 0, 1, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-2223.1239922 , -4453.12298475])


closest
(2,)


array([0, 5])


new_candidates
torch.Size([2, 53])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967]], device='cuda:0')


new_candidate_parents


[[0, 3], [1, 2, 4, 5]]


new_candidate_logprobs
torch.Size([2])


tensor([-2223.1240, -4453.1230], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 3], [1, 2, 4, 5]]

event: k_means
id: 42-k
data: [{"content": "Mount", "parents": [0, 3], "prob": 0.0}, {"content": "its", "parents": [1, 2, 4, 5], "prob": 0.0}]





candidates
torch.Size([2, 53])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-2223.1240, -4453.1230], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 8.6250,  2.4688,  0.3105,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0859, -0.9648, -5.0625,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 6.3488e-09, 5.9053e-10,  ..., 1.6359e-26, 8.7565e-27,
         7.1878e-28],
        [5.8040e-01, 3.5203e-01, 3.7104e-02,  ..., 2.2485e-21, 1.0621e-21,
         1.0621e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.5804, 0.9324, 0.9695,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 53])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  203


carryover_candidate_logprobs
torch.Size([3])


tensor([-2223.1240, -4453.1230, -4453.1230], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[18274],
        [11858],
        [19224]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([ 0.0000, -0.5440, -1.0440], device='cuda:0')


new_candidates
torch.Size([3, 54])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   31


new_candidate_logprobs
torch.Size([3])


tensor([-2223.1240, -4453.6670, -4454.1670], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 42-p
data: [{"content": "Ever", "parents": [0], "prob": 0.0}, {"content": "elev", "parents": [1], "prob": 0.0}, {"content": "peak", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15705 MB.

batch_candidates
torch.Size([3, 54])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 54, 32064])

hidden_states[-1]
torch.Size([3, 54, 3072])
infer - after batch run: GPU memory used: 15705 MB.

candidates
torch.Size([3, 54])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 11858],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   31


candidate_logprobs
torch.Size([3])


tensor([-2223.1240, -4453.6670, -4454.1670], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.6875    ,  0.35546875,  1.9453125 , ..., -0.21777344,
        -0.1640625 ,  0.38867188],
       [ 0.95703125, -0.78515625,  1.671875  , ...,  2.9375    ,
        -1.65625   ,  0.22460938],
       [-0.98828125, -0.28320312, -0.67578125, ...,  0.11621094,
        -0.11035156,  1.265625  ]], dtype=float32)


k_mean_space
(3, 2)


array([[ 56.351128, 113.58847 ],
       [ 56.351128, 105.75722 ],
       [ 94.17014 ,   0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-6676.79102049, -4454.16702829])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 54])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224]], device='cuda:0')


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-6676.7910, -4454.1670], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 43-k
data: [{"content": "Ever", "parents": [0, 1], "prob": 0.0}, {"content": "peak", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 54])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-6676.7910, -4454.1670], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 3.7969, -0.5625, -4.1250,  ...,  0.0000,  0.0000,  0.0000],
        [ 4.4062, -2.0625, -7.8750,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.6374e-07, 3.6535e-08,  ..., 2.3803e-26, 1.4437e-26,
         9.9224e-27],
        [4.2655e-01, 2.9316e-01, 1.7781e-01,  ..., 3.6871e-22, 2.2363e-22,
         1.1970e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4265, 0.7197, 0.8975,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([5])


tensor([0, 1, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([5, 54])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   31


carryover_candidate_logprobs
torch.Size([5])


tensor([-6676.7910, -4454.1670, -4454.1670, -4454.1670, -4454.1670],
       device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([5, 1])


tensor([[  342],
        [  364],
        [22170],
        [  338],
        [15028]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([5])


tensor([-1.1921e-07, -8.5203e-01, -1.2270e+00, -1.7270e+00, -2.3520e+00],
       device='cuda:0')


new_candidates
torch.Size([5, 55])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224,   364],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   29


new_candidate_logprobs
torch.Size([5])


tensor([-6676.7910, -4455.0191, -4455.3941, -4455.8941, -4456.5191],
       device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1], [1]]

event: top_p
id: 43-p
data: [{"content": "est", "parents": [0], "prob": 0.0}, {"content": "r", "parents": [1], "prob": 0.0}, {"content": "reaches", "parents": [1], "prob": 0.0}, {"content": "is", "parents": [1], "prob": 0.0}, {"content": "stands", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15705 MB.

batch_candidates
torch.Size([5, 55])

batch_candidate_logprobs
torch.Size([5])

batch_logits
torch.Size([5, 55, 32064])

hidden_states[-1]
torch.Size([5, 55, 3072])
infer - after batch run: GPU memory used: 15705 MB.

candidates
torch.Size([5, 55])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224,   364],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   29


candidate_logprobs
torch.Size([5])


tensor([-6676.7910, -4455.0191, -4455.3941, -4455.8941, -4456.5191],
       device='cuda:0', dtype=torch.float64)


embeddings_np
(5, 3072)


array([[-0.9140625 , -0.78125   ,  0.19335938, ..., -0.7734375 ,
         0.36914062,  1.6015625 ],
       [-0.734375  ,  1.3515625 ,  0.6640625 , ..., -1.6171875 ,
        -0.75390625,  0.625     ],
       [-1.0234375 ,  2.96875   ,  1.375     , ...,  0.12597656,
         2.        ,  0.09228516],
       [ 0.02600098,  1.203125  ,  1.484375  , ...,  0.07275391,
         1.4609375 ,  1.7890625 ],
       [-0.49023438,  1.828125  ,  0.58984375, ...,  2.421875  ,
         3.265625  ,  0.14257812]], dtype=float32)


k_mean_space
(5, 2)


array([[83.57424 , 47.336105],
       [91.81897 , 47.336105],
       [44.852764, 88.09498 ],
       [43.691433, 84.89473 ],
       [41.466118, 84.102646]], dtype=float32)


k_mean_clusters
(5,)


array([1, 1, 0, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-13367.8071657 , -11131.81007584])


closest
(2,)


array([4, 0])


new_candidates
torch.Size([2, 55])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342]], device='cuda:0')


new_candidate_parents


[[2, 3, 4], [0, 1]]


new_candidate_logprobs
torch.Size([2])


tensor([-13367.8072, -11131.8101], device='cuda:0', dtype=torch.float64)


candidate_parents


[[2, 3, 4], [0, 1]]

event: k_means
id: 44-k
data: [{"content": "stands", "parents": [2, 3, 4], "prob": 0.0}, {"content": "est", "parents": [0, 1], "prob": 0.0}]





candidates
torch.Size([2, 55])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-13367.8072, -11131.8101], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.2188, -0.8828, -4.4688,  ...,  0.0000,  0.0000,  0.0000],
        [ 4.5938, -2.1406, -3.9844,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.7624e-01, 1.5780e-02, 7.4537e-03,  ..., 8.8943e-23, 3.2720e-23,
         1.2037e-23],
        [8.3651e-01, 1.1321e-01, 4.1647e-02,  ..., 9.7859e-23, 4.6225e-23,
         1.3244e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9762, 0.9920, 0.9995,  ..., 1.0000, 1.0000, 1.0000],
        [0.8365, 0.9497, 0.9914,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 55])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   29


carryover_candidate_logprobs
torch.Size([3])


tensor([-13367.8072, -11131.8101, -11131.8101], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[  472],
        [22170],
        [15028]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.0240, -0.1785, -2.1785], device='cuda:0')


new_candidates
torch.Size([3, 56])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 22170],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  993


new_candidate_logprobs
torch.Size([3])


tensor([-13367.8312, -11131.9886, -11133.9886], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 44-p
data: [{"content": "at", "parents": [0], "prob": 0.0}, {"content": "reaches", "parents": [1], "prob": 0.0}, {"content": "stands", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15705 MB.

batch_candidates
torch.Size([3, 56])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 56, 32064])

hidden_states[-1]
torch.Size([3, 56, 3072])
infer - after batch run: GPU memory used: 15705 MB.

candidates
torch.Size([3, 56])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 22170],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  993


candidate_logprobs
torch.Size([3])


tensor([-13367.8312, -11131.9886, -11133.9886], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-2.71875   ,  2.6875    ,  2.4375    , ...,  1.2421875 ,
         1.8984375 ,  0.75390625],
       [-1.3515625 ,  2.546875  ,  1.2578125 , ...,  0.453125  ,
         0.4375    , -0.09716797],
       [-0.9921875 ,  1.96875   ,  1.1796875 , ...,  1.921875  ,
         3.328125  ,  0.02307129]], dtype=float32)


k_mean_space
(3, 2)


array([[33.090748, 78.055626],
       [33.090748, 80.2617  ],
       [71.91879 ,  0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-24499.81980366, -11133.98859438])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 56])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028]], device='cuda:0')


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-24499.8198, -11133.9886], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 45-k
data: [{"content": "at", "parents": [0, 1], "prob": 0.0}, {"content": "stands", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 56])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-24499.8198, -11133.9886], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.3145, -3.5312, -4.2188,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.6094,  0.7227, -3.9375,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.7748e-01, 1.5800e-02, 5.8123e-03,  ..., 1.6638e-22, 1.1435e-22,
         4.7668e-23],
        [9.4880e-01, 3.2466e-02, 1.7378e-02,  ..., 4.3900e-22, 3.0172e-22,
         2.1856e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9775, 0.9933, 0.9991,  ..., 1.0000, 1.0000, 1.0000],
        [0.9488, 0.9813, 0.9986,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 56])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-24499.8198, -11133.9886], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[14235],
        [  472]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0228, -0.0526], device='cuda:0')


new_candidates
torch.Size([2, 57])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472]], device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-24499.8426, -11134.0411], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 45-p
data: [{"content": "approximately", "parents": [0], "prob": 0.0}, {"content": "at", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15705 MB.

batch_candidates
torch.Size([2, 57])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 57, 32064])

hidden_states[-1]
torch.Size([2, 57, 3072])
infer - after batch run: GPU memory used: 15705 MB.

candidates
torch.Size([2, 57])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-24499.8426, -11134.0411], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.0232, -1.3047, -5.9688,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.6523, -2.8594, -3.2500,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.1399e-08, 4.1399e-08,  ..., 1.0121e-24, 7.8824e-25,
         2.5590e-25],
        [6.4318e-01, 3.4427e-01, 9.1745e-03,  ..., 4.9064e-22, 2.0453e-22,
         1.5929e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.6432, 0.9875, 0.9966,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 57])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         3200


carryover_candidate_logprobs
torch.Size([3])


tensor([-24499.8426, -11134.0411, -11134.0411], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29871],
        [14235],
        [  385]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-1.1921e-07, -4.4133e-01, -1.0663e+00], device='cuda:0')


new_candidates
torch.Size([3, 58])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,


new_candidate_logprobs
torch.Size([3])


tensor([-24499.8426, -11134.4825, -11135.1075], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 46-p
data: [{"content": "", "parents": [0], "prob": 0.0}, {"content": "approximately", "parents": [1], "prob": 0.0}, {"content": "an", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15705 MB.

batch_candidates
torch.Size([3, 58])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 58, 32064])

hidden_states[-1]
torch.Size([3, 58, 3072])
infer - after batch run: GPU memory used: 15705 MB.

candidates
torch.Size([3, 58])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472, 14235],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,


candidate_logprobs
torch.Size([3])


tensor([-24499.8426, -11134.4825, -11135.1075], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.40820312,  1.359375  ,  2.375     , ...,  3.140625  ,
         2.375     ,  0.9921875 ],
       [-1.1796875 ,  0.70703125,  1.3515625 , ...,  1.171875  ,
         1.5625    ,  1.4453125 ],
       [-1.0546875 , -1.3515625 , -0.859375  , ...,  0.734375  ,
        -0.8515625 ,  2.453125  ]], dtype=float32)


k_mean_space
(3, 2)


array([[ 46.140568, 101.07742 ],
       [ 46.140568,  89.95451 ],
       [ 83.81693 ,   0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-35634.32505468, -11135.10747658])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 58])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385]],
       device='cuda:0')


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-35634.3251, -11135.1075], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 47-k
data: [{"content": "", "parents": [0, 1], "prob": 0.0}, {"content": "an", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 58])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-35634.3251, -11135.1075], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.2148,  1.1406, -3.0938,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.4531,  0.6602, -3.5938,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[8.1757e-01, 1.8243e-01, 2.4763e-08,  ..., 1.6953e-21, 1.4961e-21,
         1.1652e-21],
        [4.4464e-01, 3.4628e-01, 9.9212e-02,  ..., 2.9933e-22, 2.6416e-22,
         1.1012e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.8176, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4446, 0.7909, 0.8901,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([6])


tensor([0, 0, 1, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([6, 58])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,


carryover_candidate_logprobs
torch.Size([6])


tensor([-35634.3251, -35634.3251, -11135.1075, -11135.1075, -11135.1075,
        -11135.1075], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([6, 1])


tensor([[29947],
        [29906],
        [21210],
        [11858],
        [24293],
        [15899]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([6])


tensor([-0.2014, -1.7014, -0.8105, -1.0605, -2.3105, -3.4355], device='cuda:0')


new_candidates
torch.Size([6, 59])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973,


new_candidate_logprobs
torch.Size([6])


tensor([-35634.5265, -35636.0265, -11135.9180, -11136.1680, -11137.4180,
        -11138.5430], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [1], [1], [1], [1]]

event: top_p
id: 47-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "2", "parents": [0], "prob": 0.0}, {"content": "impress", "parents": [1], "prob": 0.0}, {"content": "elev", "parents": [1], "prob": 0.0}, {"content": "aston", "parents": [1], "prob": 0.0}, {"content": "estimated", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15705 MB.

batch_candidates
torch.Size([6, 59])

batch_candidate_logprobs
torch.Size([6])

batch_logits
torch.Size([6, 59, 32064])

hidden_states[-1]
torch.Size([6, 59, 3072])
infer - after batch run: GPU memory used: 15749 MB.

candidates
torch.Size([6, 59])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973,


candidate_logprobs
torch.Size([6])


tensor([-35634.5265, -35636.0265, -11135.9180, -11136.1680, -11137.4180,
        -11138.5430], device='cuda:0', dtype=torch.float64)


embeddings_np
(6, 3072)


array([[-0.45898438, -3.3125    ,  2.125     , ...,  2.21875   ,
         3.359375  ,  0.7890625 ],
       [-0.58984375, -0.91015625,  2.375     , ...,  0.37695312,
         1.2890625 , -0.3984375 ],
       [ 0.90625   , -0.12792969, -2.8125    , ...,  2.609375  ,
         0.88671875, -0.18457031],
       [ 2.046875  , -2.1875    ,  0.12890625, ...,  2.84375   ,
        -1.546875  ,  0.671875  ],
       [-0.953125  , -2.328125  , -1.71875   , ...,  1.328125  ,
         2.625     , -0.92578125],
       [-1.8671875 , -1.6640625 , -1.8046875 , ...,  2.359375  ,
         0.21875   ,  2.578125  ]], dtype=float32)


k_mean_space
(6, 2)


array([[ 62.434204, 105.98312 ],
       [ 64.43854 , 106.72823 ],
       [ 88.965164,  44.967594],
       [ 69.777596,  98.808556],
       [ 91.16164 ,  44.967594],
       [ 65.920044,  96.12698 ]], dtype=float32)


k_mean_clusters
(6,)


array([0, 0, 1, 0, 1, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-93545.26389084, -22273.33595476])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 59])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210]],
       device='cuda:0')


new_candidate_parents


[[0, 1, 3, 5], [2, 4]]


new_candidate_logprobs
torch.Size([2])


tensor([-93545.2639, -22273.3360], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1, 3, 5], [2, 4]]

event: k_means
id: 48-k
data: [{"content": "8", "parents": [0, 1, 3, 5], "prob": 0.0}, {"content": "impress", "parents": [2, 4], "prob": 0.0}]





candidates
torch.Size([2, 59])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-93545.2639, -22273.3360], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.6836, -2.5312, -6.5625,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.7969, -1.9766,  1.3516,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 2.3824e-07, 3.3983e-09,  ..., 8.3079e-26, 5.0390e-26,
         1.2741e-26],
        [1.0000e+00, 1.6374e-07, 2.9401e-11,  ..., 8.1448e-28, 3.3953e-28,
         8.5846e-29]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 59])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210]],
       device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-93545.2639, -22273.3360], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29892],
        [  573]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-2.3842e-07, -1.1921e-07], device='cuda:0')


new_candidates
torch.Size([2, 60])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573]],
       device='cuda:0')


new_candidate_logprobs
torch.Size([2])


tensor([-93545.2639, -22273.3360], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 48-p
data: [{"content": ",", "parents": [0], "prob": 0.0}, {"content": "ive", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 60])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 60, 32064])

hidden_states[-1]
torch.Size([2, 60, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 60])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573]],
       device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-93545.2639, -22273.3360], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.7969,  2.1562, -2.3281,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.5625,  2.6562,  2.1562,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 2.2603e-06, 5.7150e-07,  ..., 1.5021e-22, 3.7979e-23,
         4.4469e-26],
        [4.9114e-01, 3.8250e-01, 8.5347e-02,  ..., 1.9027e-21, 1.1540e-21,
         7.9315e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4911, 0.8736, 0.9590,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 60])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573],
        [    1, 32010,  1724,   338,   278,  9939,


carryover_candidate_logprobs
torch.Size([4])


tensor([-93545.2639, -22273.3360, -22273.3360, -22273.3360], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[29947],
        [ 3171],
        [29871],
        [11858]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-2.9802e-06, -7.1103e-01, -9.6103e-01, -2.4610e+00], device='cuda:0')


new_candidates
torch.Size([4, 61])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171],
        [    1, 32


new_candidate_logprobs
torch.Size([4])


tensor([-93545.2639, -22274.0470, -22274.2970, -22275.7970], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 49-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "height", "parents": [1], "prob": 0.0}, {"content": "", "parents": [1], "prob": 0.0}, {"content": "elev", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([4, 61])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 61, 32064])

hidden_states[-1]
torch.Size([4, 61, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([4, 61])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171],
        [    1, 32


candidate_logprobs
torch.Size([4])


tensor([-93545.2639, -22274.0470, -22274.2970, -22275.7970], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[ 0.19238281, -1.8671875 ,  0.296875  , ...,  1.5078125 ,
         0.26757812,  0.3828125 ],
       [ 1.140625  , -0.6484375 ,  0.21777344, ...,  0.63671875,
        -0.87109375,  2.203125  ],
       [ 0.12402344,  1.5703125 ,  1.875     , ...,  2.28125   ,
         2.765625  ,  0.77734375],
       [ 1.59375   , -3.375     ,  1.6484375 , ...,  2.515625  ,
        -1.453125  ,  0.54296875]], dtype=float32)


k_mean_space
(4, 2)


array([[52.27941 , 99.49687 ],
       [93.34996 , 47.75584 ],
       [52.27941 , 96.035355],
       [97.48419 , 47.75584 ]], dtype=float32)


k_mean_clusters
(4,)


array([0, 1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-115819.56087894,  -44549.84396978])


closest
(2,)


array([0, 1])


new_candidates
torch.Size([2, 61])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171]], device='cuda:0')


new_candidate_parents


[[0, 2], [1, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.8440], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 2], [1, 3]]

event: k_means
id: 50-k
data: [{"content": "8", "parents": [0, 2], "prob": 0.0}, {"content": "height", "parents": [1, 3], "prob": 0.0}]





candidates
torch.Size([2, 61])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171]], device='cuda:0')


candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.8440], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.9375,  1.2812,  0.5430,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.3594, -1.1953, -4.0312,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.0587e-10, 2.5946e-11,  ..., 7.1878e-28, 7.1878e-28,
         5.2068e-29],
        [1.0000e+00, 8.7642e-08, 1.9556e-08,  ..., 1.3697e-25, 8.3079e-26,
         8.3079e-26]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 61])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.8440], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29946],
        [  310]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([ 0.0000e+00, -1.1921e-07], device='cuda:0')


new_candidates
torch.Size([2, 62])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310]], dev


new_candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.8440], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 50-p
data: [{"content": "4", "parents": [0], "prob": 0.0}, {"content": "of", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 62])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 62, 32064])

hidden_states[-1]
torch.Size([2, 62, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 62])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310]], dev


candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.8440], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.0113,  3.4375, -1.3203,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.2656, -1.7578, -1.2891,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9994e-01, 5.8291e-05, 2.9399e-11,  ..., 2.8427e-27, 5.5975e-28,
         1.2490e-28],
        [9.4500e-01, 5.3313e-02, 1.4208e-03,  ..., 9.2563e-22, 9.2563e-22,
         3.0051e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9999, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9450, 0.9983, 0.9997,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 62])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310]], dev


carryover_candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.8440], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29947],
        [14235]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-5.8295e-05, -5.6568e-02], device='cuda:0')


new_candidates
torch.Size([2, 63])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310


new_candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.9005], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 51-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "approximately", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 63])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 63, 32064])

hidden_states[-1]
torch.Size([2, 63, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 63])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310


candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.9005], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.0752, -2.5781, -0.4199,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.1250, -3.0156, -5.6562,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.7069e-01, 2.9312e-02, 1.9363e-06,  ..., 3.6142e-25, 2.4840e-25,
         1.0355e-25],
        [1.0000e+00, 4.1399e-08, 1.0467e-08,  ..., 4.7809e-25, 7.3317e-26,
         3.4633e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9707, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 63])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310


carryover_candidate_logprobs
torch.Size([2])


tensor([-115819.5609,  -44549.9005], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29889],
        [29871]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0298,  0.0000], device='cuda:0')


new_candidates
torch.Size([2, 64])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171


new_candidate_logprobs
torch.Size([2])


tensor([-115819.5907,  -44549.9005], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 52-p
data: [{"content": ".", "parents": [0], "prob": 0.0}, {"content": "", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 64])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 64, 32064])

hidden_states[-1]
torch.Size([2, 64, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 64])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171


candidate_logprobs
torch.Size([2])


tensor([-115819.5907,  -44549.9005], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 1.6719,  1.1484, -0.5039,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.5664, -0.2412, -2.3594,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.4450e-07, 1.1033e-09,  ..., 5.3111e-27, 2.8428e-27,
         2.2140e-27],
        [6.2246e-01, 3.7754e-01, 1.2958e-08,  ..., 1.2907e-21, 1.1391e-21,
         1.0052e-21]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.6225, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 64])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171


carryover_candidate_logprobs
torch.Size([3])


tensor([-115819.5907,  -44549.9005,  -44549.9005], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29947],
        [29947],
        [29906]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-1.1921e-07, -4.7408e-01, -9.7408e-01], device='cuda:0')


new_candidates
torch.Size([3, 65])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
       


new_candidate_logprobs
torch.Size([3])


tensor([-115819.5907,  -44550.3746,  -44550.8746], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 53-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "8", "parents": [1], "prob": 0.0}, {"content": "2", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([3, 65])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 65, 32064])

hidden_states[-1]
torch.Size([3, 65, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([3, 65])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
       


candidate_logprobs
torch.Size([3])


tensor([-115819.5907,  -44550.3746,  -44550.8746], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.16894531, -1.203125  ,  1.1640625 , ..., -2.734375  ,
         0.796875  , -0.421875  ],
       [-0.33203125, -3.609375  ,  1.859375  , ...,  1.9921875 ,
         3.046875  ,  0.796875  ],
       [-0.69140625, -0.75      ,  2.171875  , ...,  0.33789062,
         0.81640625, -0.17285156]], dtype=float32)


k_mean_space
(3, 2)


array([[1.04655556e+02, 5.12968712e+01],
       [9.53674316e-07, 8.36702271e+01],
       [9.11667099e+01, 5.12968712e+01]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -44550.37461438, -160370.46530485])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 65])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
       


new_candidate_parents


[[1], [0, 2]]


new_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1], [0, 2]]

event: k_means
id: 54-k
data: [{"content": "8", "parents": [1], "prob": 0.0}, {"content": "8", "parents": [0, 2], "prob": 0.0}]





candidates
torch.Size([2, 65])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
       


candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 1.1562, -2.9531, -6.0625,  ...,  0.0000,  0.0000,  0.0000],
        [-1.1172, -1.7891, -1.4531,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 8.7642e-08, 7.5826e-10,  ..., 3.4633e-26, 3.0563e-26,
         1.1244e-26],
        [1.0000e+00, 6.4759e-07, 6.0236e-08,  ..., 1.5217e-27, 1.1851e-27,
         9.2293e-28]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 65])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
       


carryover_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29892],
        [29953]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-07, -7.1526e-07], device='cuda:0')


new_candidates
torch.Size([2, 66])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,



new_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 54-p
data: [{"content": ",", "parents": [0], "prob": 0.0}, {"content": "6", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 66])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 66, 32064])

hidden_states[-1]
torch.Size([2, 66, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 66])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,



candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 3.1719,  2.0469, -1.8828,  ...,  0.0000,  0.0000,  0.0000],
        [-0.9492, -4.2812,  1.4688,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 2.5613e-06, 8.3153e-07,  ..., 8.0402e-23, 2.9578e-23,
         6.4702e-26],
        [1.0000e+00, 7.3382e-07, 1.8554e-07,  ..., 2.1006e-26, 1.8538e-26,
         5.3111e-27]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 66])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,



carryover_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29947],
        [27881]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-3.4571e-06, -1.1921e-06], device='cuda:0')


new_candidates
torch.Size([2, 67])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 


new_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 55-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "meters", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 67])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 67, 32064])

hidden_states[-1]
torch.Size([2, 67, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 67])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 


candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.5000,  0.8555,  0.1426,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2734, -0.8125, -4.1875,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 2.0612e-09, 2.2897e-11,  ..., 9.7276e-29, 4.0551e-29,
         1.9155e-29],
        [9.9979e-01, 1.3981e-04, 7.4836e-05,  ..., 6.1375e-25, 4.7799e-25,
         4.7799e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9998, 0.9999, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 67])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 


carryover_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4653], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29946],
        [  313]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([ 0.0000, -0.0002], device='cuda:0')


new_candidates
torch.Size([2, 68])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 


new_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4655], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 56-p
data: [{"content": "4", "parents": [0], "prob": 0.0}, {"content": "(", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 68])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 68, 32064])

hidden_states[-1]
torch.Size([2, 68, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 68])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 


candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4655], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.7227,  2.3750, -2.1250,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.1387,  1.1953, -4.8438,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9984e-01, 1.5844e-04, 3.3310e-11,  ..., 2.5084e-27, 9.2278e-28,
         2.6438e-28],
        [9.9916e-01, 8.0406e-04, 1.8910e-05,  ..., 1.0315e-22, 8.0335e-23,
         2.0312e-23]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9998, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9992, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 68])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 


carryover_candidate_logprobs
torch.Size([2])


tensor([ -44550.3746, -160370.4655], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29947],
        [29906]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0002, -0.0008], device='cuda:0')


new_candidates
torch.Size([2, 69])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 


new_candidate_logprobs
torch.Size([2])


tensor([ -44550.3748, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 57-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": "2", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 69])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 69, 32064])

hidden_states[-1]
torch.Size([2, 69, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 69])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 


candidate_logprobs
torch.Size([2])


tensor([ -44550.3748, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.2188, -3.9062, -1.6094,  ...,  0.0000,  0.0000,  0.0000],
        [-1.4453, -3.9375, -5.7500,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[8.1757e-01, 1.8242e-01, 8.2820e-06,  ..., 3.0441e-25, 2.6864e-25,
         2.0922e-25],
        [1.0000e+00, 3.8507e-09, 1.4166e-09,  ..., 5.4175e-25, 1.2088e-25,
         1.0668e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.8176, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 69])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472, 


carryover_candidate_logprobs
torch.Size([3])


tensor([ -44550.3748,  -44550.3748, -160370.4664], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29889],
        [27881],
        [29929]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.2014, -1.7014,  0.0000], device='cuda:0')


new_candidates
torch.Size([3, 70])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028, 


new_candidate_logprobs
torch.Size([3])


tensor([ -44550.5762,  -44552.0762, -160370.4664], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [1]]

event: top_p
id: 58-p
data: [{"content": ".", "parents": [0], "prob": 0.0}, {"content": "meters", "parents": [0], "prob": 0.0}, {"content": "9", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([3, 70])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 70, 32064])

hidden_states[-1]
torch.Size([3, 70, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([3, 70])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028, 


candidate_logprobs
torch.Size([3])


tensor([ -44550.5762,  -44552.0762, -160370.4664], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.52734375, -0.99609375,  1.53125   , ..., -0.03112793,
         1.875     , -0.390625  ],
       [-0.859375  , -1.5859375 ,  1.109375  , ..., -0.42382812,
        -0.30078125,  0.63671875],
       [-0.25976562, -1.1484375 ,  0.41601562, ..., -0.3515625 ,
         1.0859375 ,  2.25      ]], dtype=float32)


k_mean_space
(3, 2)


array([[ 52.528854, 106.014946],
       [ 52.528854, 106.43049 ],
       [ 92.32566 ,   0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -89102.65240082, -160370.46636016])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 70])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 59-k
data: [{"content": ".", "parents": [0, 1], "prob": 0.0}, {"content": "9", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 70])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 


candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.0156,  1.1953, -0.3555,  ...,  0.0000,  0.0000,  0.0000],
        [-1.3359, -1.5078, -7.1250,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 6.8256e-08, 7.5826e-10,  ..., 2.3803e-26, 5.3111e-27,
         3.2213e-27],
        [1.0000e+00, 4.6912e-08, 9.7362e-10,  ..., 1.2996e-24, 1.0121e-24,
         8.9319e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 70])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29947],
        [29892]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-07,  0.0000e+00], device='cuda:0')


new_candidates
torch.Size([2, 71])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 59-p
data: [{"content": "8", "parents": [0], "prob": 0.0}, {"content": ",", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 71])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 71, 32064])

hidden_states[-1]
torch.Size([2, 71, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 71])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224


candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.4609, -2.5156, -1.9844,  ...,  0.0000,  0.0000,  0.0000],
        [-4.3438,  1.4219, -3.2188,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 8.7642e-08, 1.3440e-08,  ..., 2.9963e-28, 2.9963e-28,
         6.6857e-29],
        [1.0000e+00, 1.8190e-09, 9.7362e-10,  ..., 1.8909e-24, 1.1469e-24,
         1.0121e-24]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 71])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29953],
        [29900]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-07,  0.0000e+00], device='cuda:0')


new_candidates
torch.Size([2, 72])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 60-p
data: [{"content": "6", "parents": [0], "prob": 0.0}, {"content": "0", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 72])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 72, 32064])

hidden_states[-1]
torch.Size([2, 72, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 72])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967


candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-2.3281, -5.4062,  0.5586,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.8906,  0.1562,  3.3281,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 5.7150e-07, 3.9279e-07,  ..., 6.4702e-26, 6.0182e-27,
         4.6870e-27],
        [1.0000e+00, 9.9312e-08, 1.4166e-09,  ..., 9.4141e-26, 2.6972e-26,
         8.7565e-27]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 72])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[27881],
        [29941]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-06, -1.1921e-07], device='cuda:0')


new_candidates
torch.Size([2, 73])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 61-p
data: [{"content": "meters", "parents": [0], "prob": 0.0}, {"content": "3", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 73])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 73, 32064])

hidden_states[-1]
torch.Size([2, 73, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 73])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892


candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.3535, -1.1953, -4.2812,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2930, -4.3438, -6.5625,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9899e-01, 8.0392e-04, 2.0326e-04,  ..., 1.2983e-24, 6.9491e-25,
         1.0657e-25],
        [1.0000e+00, 6.8256e-08, 2.2897e-11,  ..., 7.0467e-30, 6.2186e-30,
         2.5923e-30]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9990, 0.9998, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 73])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6524, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[  313],
        [29896]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.0086e-03, -1.1921e-07], device='cuda:0')


new_candidates
torch.Size([2, 74])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6534, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 62-p
data: [{"content": "(", "parents": [0], "prob": 0.0}, {"content": "1", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 74])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 74, 32064])

hidden_states[-1]
torch.Size([2, 74, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 74])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551


candidate_logprobs
torch.Size([2])


tensor([ -89102.6534, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.9844,  0.0850, -4.8438,  ...,  0.0000,  0.0000,  0.0000],
        [-2.2031, -4.9375, -8.1875,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9873e-01, 1.1694e-03, 5.8221e-05,  ..., 1.7000e-22, 9.0992e-23,
         4.2982e-23],
        [1.0000e+00, 2.9023e-06, 1.9556e-08,  ..., 6.9561e-25, 6.9561e-25,
         4.2191e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9987, 0.9999, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 74])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6534, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29906],
        [29889]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.2670e-03, -2.8610e-06], device='cuda:0')


new_candidates
torch.Size([2, 75])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
       


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 63-p
data: [{"content": "2", "parents": [0], "prob": 0.0}, {"content": ".", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 75])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 75, 32064])

hidden_states[-1]
torch.Size([2, 75, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 75])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
       


candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.6406, -3.5469, -5.3438,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.5938,  3.7656,  0.2520,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.9445e-09, 2.9990e-09,  ..., 2.8998e-25, 6.4702e-26,
         5.0390e-26],
        [1.0000e+00, 1.1033e-09, 5.9053e-10,  ..., 1.8174e-28, 1.6038e-28,
         1.5723e-30]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]], device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 75])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
       


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29929],
        [29955]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([0., 0.], device='cuda:0')


new_candidates
torch.Size([2, 76])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,



new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4664], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 64-p
data: [{"content": "9", "parents": [0], "prob": 0.0}, {"content": "7", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 76])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 76, 32064])

hidden_states[-1]
torch.Size([2, 76, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 76])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,



candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4664], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.5117, -1.2031, -7.1875,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.3262, -4.0938, -1.3125,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 6.0236e-08, 9.7362e-10,  ..., 1.2996e-24, 1.1469e-24,
         1.0121e-24],
        [9.9950e-01, 3.7994e-04, 2.4289e-05,  ..., 3.6016e-22, 6.2586e-23,
         3.7960e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9995, 0.9999, 0.9999,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 76])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,



carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4664], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29892],
        [ 6900]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-07, -5.0009e-04], device='cuda:0')


new_candidates
torch.Size([2, 77])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069, 


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4669], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 65-p
data: [{"content": ",", "parents": [0], "prob": 0.0}, {"content": "feet", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 77])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 77, 32064])

hidden_states[-1]
torch.Size([2, 77, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 77])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069, 


candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4669], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-3.5312,  1.4922, -3.1875,  ...,  0.0000,  0.0000,  0.0000],
        [-2.5625, -7.5938, -9.6875,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.4166e-09, 7.5826e-10,  ..., 1.0121e-24, 8.9319e-25,
         6.9562e-25],
        [9.0462e-01, 9.5346e-02, 3.6244e-05,  ..., 2.1110e-28, 4.1567e-29,
         1.6117e-30]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9046, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 77])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069, 


carryover_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.4669], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29900],
        [29897]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([ 0.0000, -0.1002], device='cuda:0')


new_candidates
torch.Size([2, 78])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 


new_candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.5671], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 66-p
data: [{"content": "0", "parents": [0], "prob": 0.0}, {"content": ")", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 78])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 78, 32064])

hidden_states[-1]
torch.Size([2, 78, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 78])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 


candidate_logprobs
torch.Size([2])


tensor([ -89102.6547, -160370.5671], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.8750,  0.5898,  3.5469,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.7930,  3.5469, -4.6875,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 3.6535e-08, 6.6916e-10,  ..., 2.1006e-26, 9.9224e-27,
         3.6503e-27],
        [4.2977e-01, 3.3471e-01, 2.3004e-01,  ..., 9.3929e-23, 8.2892e-23,
         5.6971e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4298, 0.7645, 0.9945,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 78])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 


carryover_candidate_logprobs
torch.Size([4])


tensor([ -89102.6547, -160370.5671, -160370.5671, -160370.5671],
       device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[29941],
        [ 5034],
        [ 2038],
        [  408]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([ 0.0000, -0.8445, -1.0945, -1.4695], device='cuda:0')


new_candidates
torch.Size([4, 79])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917, 


new_candidate_logprobs
torch.Size([4])


tensor([ -89102.6547, -160371.4116, -160371.6616, -160372.0366],
       device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 67-p
data: [{"content": "3", "parents": [0], "prob": 0.0}, {"content": "according", "parents": [1], "prob": 0.0}, {"content": "above", "parents": [1], "prob": 0.0}, {"content": "as", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([4, 79])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 79, 32064])

hidden_states[-1]
torch.Size([4, 79, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([4, 79])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917, 


candidate_logprobs
torch.Size([4])


tensor([ -89102.6547, -160371.4116, -160371.6616, -160372.0366],
       device='cuda:0', dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-0.51171875, -0.84375   ,  1.9453125 , ...,  0.2578125 ,
         3.046875  ,  0.00418091],
       [-2.75      , -0.8203125 ,  2.375     , ...,  0.859375  ,
         1.9453125 , -2.09375   ],
       [-1.2109375 , -3.28125   ,  1.6015625 , ...,  1.7421875 ,
        -0.1484375 , -1.6484375 ],
       [ 0.81640625,  0.5234375 ,  1.3671875 , ..., -1.6171875 ,
        -0.18847656, -2.15625   ]], dtype=float32)


k_mean_space
(4, 2)


array([[ 55.4786  , 101.0504  ],
       [ 96.584274,  50.044727],
       [ 55.4786  ,  94.532745],
       [ 93.12519 ,  50.044727]], dtype=float32)


k_mean_clusters
(4,)


array([0, 1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-249474.3162859 , -320743.44821589])


closest
(2,)


array([0, 1])


new_candidates
torch.Size([2, 79])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917, 


new_candidate_parents


[[0, 2], [1, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4482], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 2], [1, 3]]

event: k_means
id: 68-k
data: [{"content": "3", "parents": [0, 2], "prob": 0.0}, {"content": "according", "parents": [1, 3], "prob": 0.0}]





candidates
torch.Size([2, 79])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917, 


candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4482], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.2871, -3.9688, -6.3750,  ...,  0.0000,  0.0000,  0.0000],
        [-4.3438, -5.1875, -1.4688,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.9556e-08, 1.3888e-11,  ..., 1.0253e-29, 6.2186e-30,
         5.4879e-30],
        [1.0000e+00, 8.1520e-09, 2.1941e-09,  ..., 5.8243e-24, 3.5326e-24,
         2.1426e-24]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]], device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 79])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917, 


carryover_candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4482], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29896],
        [  304]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([0., 0.], device='cuda:0')


new_candidates
torch.Size([2, 80])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202, 


new_candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4482], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 68-p
data: [{"content": "1", "parents": [0], "prob": 0.0}, {"content": "to", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 80])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 80, 32064])

hidden_states[-1]
torch.Size([2, 80, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 80])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202, 


candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4482], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-2.7031, -5.5938, -8.1875,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.3047, -9.6250, -2.2031,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.2099e-06, 6.3488e-09,  ..., 4.7809e-25, 4.7809e-25,
         3.7234e-25],
        [9.9800e-01, 1.5004e-03, 4.2988e-04,  ..., 7.6131e-22, 7.6131e-22,
         3.5962e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9980, 0.9995, 0.9999,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 80])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202, 


carryover_candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4482], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29889],
        [  263]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-06, -2.0053e-03], device='cuda:0')


new_candidates
torch.Size([2, 81])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429


new_candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4502], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 69-p
data: [{"content": ".", "parents": [0], "prob": 0.0}, {"content": "a", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 81])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 81, 32064])

hidden_states[-1]
torch.Size([2, 81, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 81])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429


candidate_logprobs
torch.Size([2])


tensor([-249474.3163, -320743.4502], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  3.7969,   4.0938,   0.3906,  ...,   0.0000,   0.0000,   0.0000],
        [ -3.4531, -11.5625,  -7.5000,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 6.6916e-10, 5.9053e-10,  ..., 1.9538e-27, 8.1448e-28,
         3.1581e-29],
        [8.6863e-01, 1.1756e-01, 6.6321e-03,  ..., 2.6732e-19, 2.0819e-19,
         1.1144e-19]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.8686, 0.9862, 0.9928,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 81])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429


carryover_candidate_logprobs
torch.Size([3])


tensor([-249474.3163, -320743.4502, -320743.4502], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29955],
        [29871],
        [26554]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([ 0.0000, -0.1408, -2.1408], device='cuda:0')


new_candidates
torch.Size([3, 82])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278


new_candidate_logprobs
torch.Size([3])


tensor([-249474.3163, -320743.5911, -320745.5911], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 70-p
data: [{"content": "7", "parents": [0], "prob": 0.0}, {"content": "", "parents": [1], "prob": 0.0}, {"content": "revision", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([3, 82])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 82, 32064])

hidden_states[-1]
torch.Size([3, 82, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([3, 82])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278


candidate_logprobs
torch.Size([3])


tensor([-249474.3163, -320743.5911, -320745.5911], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[ 0.98046875,  0.49804688, -0.87890625, ..., -1.7421875 ,
         2.578125  , -1.484375  ],
       [-1.5859375 , -0.02624512,  1.90625   , ...,  2.        ,
         0.03112793, -1.4453125 ],
       [-0.38085938,  1.5       ,  3.25      , ..., -0.63671875,
         1.96875   ,  0.20507812]], dtype=float32)


k_mean_space
(3, 2)


array([[113.433784,  56.21971 ],
       [  0.      ,  92.58368 ],
       [102.94448 ,  56.21971 ]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-320743.59105623, -570219.90734329])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 82])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278


new_candidate_parents


[[1], [0, 2]]


new_candidate_logprobs
torch.Size([2])


tensor([-320743.5911, -570219.9073], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1], [0, 2]]

event: k_means
id: 71-k
data: [{"content": "", "parents": [1], "prob": 0.0}, {"content": "7", "parents": [0, 2], "prob": 0.0}]





candidates
torch.Size([2, 82])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278


candidate_logprobs
torch.Size([2])


tensor([-320743.5911, -570219.9073], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.0391,  1.6094, -6.7812,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.1289, -4.6562, -1.0312,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 4.6912e-08, 2.3589e-08,  ..., 4.0832e-22, 3.6034e-22,
         1.9287e-22],
        [9.9948e-01, 3.3529e-04, 5.1418e-05,  ..., 7.6244e-22, 1.1692e-22,
         7.0918e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9995, 0.9998, 0.9999,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 82])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278


carryover_candidate_logprobs
torch.Size([2])


tensor([-320743.5911, -570219.9073], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29906],
        [ 6900]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([ 0.0000, -0.0005], device='cuda:0')


new_candidates
torch.Size([2, 83])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322


new_candidate_logprobs
torch.Size([2])


tensor([-320743.5911, -570219.9079], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 71-p
data: [{"content": "2", "parents": [0], "prob": 0.0}, {"content": "feet", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([2, 83])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 83, 32064])

hidden_states[-1]
torch.Size([2, 83, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([2, 83])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322


candidate_logprobs
torch.Size([2])


tensor([-320743.5911, -570219.9079], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-4.7188, -6.8125, -3.3750,  ...,  0.0000,  0.0000,  0.0000],
        [-2.4219, -8.0625, -8.5625,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.0816e-11, 8.3404e-13,  ..., 2.2583e-25, 1.8722e-25,
         9.9224e-27],
        [8.7930e-01, 1.1900e-01, 1.6974e-03,  ..., 7.5484e-29, 4.0404e-29,
         2.0116e-30]], device='cuda:0')

tensor([1., 1.], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.8793, 0.9983, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 83])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322


carryover_candidate_logprobs
torch.Size([3])


tensor([-320743.5911, -570219.9079, -570219.9079], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29900],
        [  511],
        [29897]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([ 0.0000, -0.1286, -2.1286], device='cuda:0')


new_candidates
torch.Size([3, 84])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871, 29906, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284


new_candidate_logprobs
torch.Size([3])


tensor([-320743.5911, -570220.0365, -570222.0365], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 72-p
data: [{"content": "0", "parents": [0], "prob": 0.0}, {"content": "),", "parents": [1], "prob": 0.0}, {"content": ")", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([3, 84])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 84, 32064])

hidden_states[-1]
torch.Size([3, 84, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([3, 84])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 19429,   300,  5202,  4917,   681, 11069,   310,
          7551, 29892,   967, 19224, 15028,   472, 14235, 29871, 29947, 29892,
         29947, 29946, 29947, 29889, 29947, 29953, 27881,   313, 29906, 29929,
         29892, 29900, 29941, 29896, 29889, 29955,  6900, 29897,  5034,   304,
           263, 29871, 29906, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284


candidate_logprobs
torch.Size([3])


tensor([-320743.5911, -570220.0365, -570222.0365], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-1.5234375 , -0.99609375,  0.03637695, ...,  1.625     ,
         1.4609375 ,  2.109375  ],
       [-1.28125   ,  1.7265625 ,  0.01965332, ..., -0.97265625,
        -2.09375   ,  0.88671875],
       [-1.21875   ,  1.15625   ,  2.25      , ..., -0.859375  ,
        -2.0625    , -0.10009766]], dtype=float32)


k_mean_space
(3, 2)


array([[107.121155,   0.      ],
       [ 37.03315 , 113.02563 ],
       [ 37.03315 , 113.65738 ]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-1140442.07298035,  -320743.59105623])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 84])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284


new_candidate_parents


[[1, 2], [0]]


new_candidate_logprobs
torch.Size([2])


tensor([-1140442.0730,  -320743.5911], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0]]

event: k_means
id: 73-k
data: [{"content": "),", "parents": [1, 2], "prob": 0.0}, {"content": "0", "parents": [0], "prob": 0.0}]





candidates
torch.Size([2, 84])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284


candidate_logprobs
torch.Size([2])


tensor([-1140442.0730,  -320743.5911], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-3.2344, -5.0938, -9.3125,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.3906, -4.6250,  0.1543,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[6.9146e-01, 1.3616e-01, 1.2016e-01,  ..., 1.1167e-21, 9.8545e-22,
         6.7729e-22],
        [1.0000e+00, 7.5826e-10, 1.1628e-10,  ..., 4.4469e-26, 3.4633e-26,
         3.0563e-26]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.6915, 0.8276, 0.9478,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 84])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284


carryover_candidate_logprobs
torch.Size([4])


tensor([-1140442.0730, -1140442.0730, -1140442.0730,  -320743.5911],
       device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[  408],
        [ 5034],
        [ 2466],
        [29906]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-0.3689, -1.9939, -2.1189,  0.0000], device='cuda:0')


new_candidates
torch.Size([4, 85])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
       


new_candidate_logprobs
torch.Size([4])


tensor([-1140442.4419, -1140444.0669, -1140444.1919,  -320743.5911],
       device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [0], [1]]

event: top_p
id: 73-p
data: [{"content": "as", "parents": [0], "prob": 0.0}, {"content": "according", "parents": [0], "prob": 0.0}, {"content": "though", "parents": [0], "prob": 0.0}, {"content": "2", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([4, 85])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 85, 32064])

hidden_states[-1]
torch.Size([4, 85, 3072])
infer - after batch run: GPU memory used: 15793 MB.

candidates
torch.Size([4, 85])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
       


candidate_logprobs
torch.Size([4])


tensor([-1140442.4419, -1140444.0669, -1140444.1919,  -320743.5911],
       device='cuda:0', dtype=torch.float64)


embeddings_np
(4, 3072)


array([[ 2.        ,  0.78125   ,  1.203125  , ..., -1.46875   ,
        -0.83203125, -1.0078125 ],
       [-2.40625   , -0.73828125,  2.5625    , ...,  1.46875   ,
         1.90625   , -2.390625  ],
       [ 2.4375    , -1.15625   ,  0.19042969, ..., -0.40039062,
        -0.13476562, -0.58203125],
       [-0.8359375 , -0.16308594,  2.9375    , ...,  0.38867188,
         0.5234375 ,  1.1796875 ]], dtype=float32)


k_mean_space
(4, 2)


array([[ 54.738003, 110.45944 ],
       [ 60.54124 , 109.51611 ],
       [ 57.205933, 112.39335 ],
       [ 94.68073 ,   0.      ]], dtype=float32)


k_mean_clusters
(4,)


array([0, 0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-3421330.70078279,  -320743.59105623])


closest
(2,)


array([0, 3])


new_candidates
torch.Size([2, 85])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
       


new_candidate_parents


[[0, 1, 2], [3]]


new_candidate_logprobs
torch.Size([2])


tensor([-3421330.7008,  -320743.5911], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1, 2], [3]]

event: k_means
id: 74-k
data: [{"content": "as", "parents": [0, 1, 2], "prob": 0.0}, {"content": "2", "parents": [3], "prob": 0.0}]





candidates
torch.Size([2, 85])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
       


candidate_logprobs
torch.Size([2])


tensor([-3421330.7008,  -320743.5911], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-4.8750, -8.2500, -4.0938,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.6719, -0.9180,  2.9531,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[7.8180e-01, 6.4174e-02, 3.8924e-02,  ..., 1.6211e-21, 1.2626e-21,
         9.8328e-22],
        [1.0000e+00, 1.7258e-08, 9.2374e-09,  ..., 1.7940e-23, 1.0881e-23,
         5.8243e-24]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.7818, 0.8460, 0.8849,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([5])


tensor([0, 0, 0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([5, 85])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
       


carryover_candidate_logprobs
torch.Size([5])


tensor([-3421330.7008, -3421330.7008, -3421330.7008, -3421330.7008,
         -320743.5911], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([5, 1])


tensor([[  639],
        [10325],
        [  337],
        [16725],
        [29900]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([5])


tensor([-0.2462, -2.7462, -3.2462, -3.3712,  0.0000], device='cuda:0')


new_candidates
torch.Size([5, 86])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408,   639],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,



new_candidate_logprobs
torch.Size([5])


tensor([-3421330.9469, -3421333.4469, -3421333.9469, -3421334.0719,
         -320743.5911], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [0], [0], [1]]

event: top_p
id: 74-p
data: [{"content": "per", "parents": [0], "prob": 0.0}, {"content": "recently", "parents": [0], "prob": 0.0}, {"content": "re", "parents": [0], "prob": 0.0}, {"content": "confirmed", "parents": [0], "prob": 0.0}, {"content": "0", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15793 MB.

batch_candidates
torch.Size([5, 86])

batch_candidate_logprobs
torch.Size([5])

batch_logits
torch.Size([5, 86, 32064])

hidden_states[-1]
torch.Size([5, 86, 3072])
infer - after batch run: GPU memory used: 15847 MB.

candidates
torch.Size([5, 86])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408,   639],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,



candidate_logprobs
torch.Size([5])


tensor([-3421330.9469, -3421333.4469, -3421333.9469, -3421334.0719,
         -320743.5911], device='cuda:0', dtype=torch.float64)


embeddings_np
(5, 3072)


array([[ 0.11523438,  1.546875  ,  1.390625  , ...,  2.125     ,
        -0.56640625, -0.6171875 ],
       [ 1.7578125 ,  1.8046875 ,  1.875     , ..., -2.109375  ,
         0.4140625 ,  1.765625  ],
       [ 1.125     , -1.3515625 ,  2.484375  , ...,  0.43359375,
        -0.53125   , -0.46875   ],
       [-0.5       ,  1.734375  ,  2.515625  , ...,  0.32617188,
        -0.40429688, -0.6484375 ],
       [ 0.14746094,  1.25      ,  1.6015625 , ...,  2.03125   ,
        -0.30273438,  1.4765625 ]], dtype=float32)


k_mean_space
(5, 2)


array([[82.73067 , 46.120174],
       [43.513916, 72.145256],
       [43.513916, 88.956055],
       [89.496994, 57.86546 ],
       [84.75287 , 50.64435 ]], dtype=float32)


k_mean_clusters
(5,)


array([1, 0, 0, 1, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-6842667.39387134, -7163408.60992754])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 86])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,



new_candidate_parents


[[1, 2], [0, 3, 4]]


new_candidate_logprobs
torch.Size([2])


tensor([-6842667.3939, -7163408.6099], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0, 3, 4]]

event: k_means
id: 75-k
data: [{"content": "recently", "parents": [1, 2], "prob": 0.0}, {"content": "per", "parents": [0, 3, 4], "prob": 0.0}]





candidates
torch.Size([2, 86])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,



candidate_logprobs
torch.Size([2])


tensor([-6842667.3939, -7163408.6099], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ -3.5469,  -8.6250,  -6.3125,  ...,   0.0000,   0.0000,   0.0000],
        [ -1.9922, -10.2500,  -4.1562,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[5.5451e-01, 3.8111e-01, 1.8974e-02,  ..., 1.5873e-20, 1.2362e-20,
         1.0147e-21],
        [9.2199e-01, 7.5682e-02, 7.4196e-04,  ..., 3.8400e-20, 2.9906e-20,
         2.3291e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.5545, 0.9356, 0.9546,  ..., 1.0000, 1.0000, 1.0000],
        [0.9220, 0.9977, 0.9984,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 86])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,



carryover_candidate_logprobs
torch.Size([3])


tensor([-6842667.3939, -6842667.3939, -7163408.6099], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[  337],
        [17005],
        [  278]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.5897, -0.9647, -0.0812], device='cuda:0')


new_candidates
torch.Size([3, 87])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 


new_candidate_logprobs
torch.Size([3])


tensor([-6842667.9835, -6842668.3585, -7163408.6911], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [1]]

event: top_p
id: 75-p
data: [{"content": "re", "parents": [0], "prob": 0.0}, {"content": "measured", "parents": [0], "prob": 0.0}, {"content": "the", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([3, 87])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 87, 32064])

hidden_states[-1]
torch.Size([3, 87, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([3, 87])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 


candidate_logprobs
torch.Size([3])


tensor([-6842667.9835, -6842668.3585, -7163408.6911], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[ 1.8671875 , -0.5859375 ,  2.828125  , ...,  0.11425781,
        -0.22070312, -1.2109375 ],
       [-0.3203125 ,  1.25      ,  1.8359375 , ...,  1.9453125 ,
        -0.7109375 ,  1.6015625 ],
       [ 0.47851562,  0.59375   ,  1.328125  , ...,  0.78515625,
        -0.13867188,  1.5625    ]], dtype=float32)


k_mean_space
(3, 2)


array([[104.07754 ,  51.661457],
       [  0.      ,  84.13519 ],
       [ 93.07599 ,  51.661457]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([ -6842668.35853753, -14006076.67468239])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 87])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 


new_candidate_parents


[[1], [0, 2]]


new_candidate_logprobs
torch.Size([2])


tensor([ -6842668.3585, -14006076.6747], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1], [0, 2]]

event: k_means
id: 76-k
data: [{"content": "measured", "parents": [1], "prob": 0.0}, {"content": "re", "parents": [0, 2], "prob": 0.0}]





candidates
torch.Size([2, 87])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 


candidate_logprobs
torch.Size([2])


tensor([ -6842668.3585, -14006076.6747], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ -1.6719, -12.3750,  -4.9062,  ...,   0.0000,   0.0000,   0.0000],
        [  2.9375,  -7.0000,  -3.5156,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[5.9238e-01, 4.0713e-01, 2.5516e-04,  ..., 1.3919e-21, 1.3919e-21,
         1.0840e-21],
        [8.4319e-01, 4.7570e-02, 2.8852e-02,  ..., 9.1750e-24, 8.0969e-24,
         3.8247e-24]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.5924, 0.9995, 0.9998,  ..., 1.0000, 1.0000, 1.0000],
        [0.8432, 0.8908, 0.9196,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([5])


tensor([0, 0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([5, 87])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 


carryover_candidate_logprobs
torch.Size([5])


tensor([ -6842668.3585,  -6842668.3585, -14006076.6747, -14006076.6747,
        -14006076.6747], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([5, 1])


tensor([[  491],
        [  297],
        [29899],
        [15807],
        [11292]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([5])


tensor([-0.5236, -0.8986, -0.1706, -3.0456, -3.5456], device='cuda:0')


new_candidates
torch.Size([5, 88])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139, 


new_candidate_logprobs
torch.Size([5])


tensor([ -6842668.8822,  -6842669.2572, -14006076.8452, -14006079.7202,
        -14006080.2202], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [1], [1], [1]]

event: top_p
id: 76-p
data: [{"content": "by", "parents": [0], "prob": 0.0}, {"content": "in", "parents": [0], "prob": 0.0}, {"content": "-", "parents": [1], "prob": 0.0}, {"content": "calcul", "parents": [1], "prob": 0.0}, {"content": "vised", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([5, 88])

batch_candidate_logprobs
torch.Size([5])

batch_logits
torch.Size([5, 88, 32064])

hidden_states[-1]
torch.Size([5, 88, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([5, 88])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139, 


candidate_logprobs
torch.Size([5])


tensor([ -6842668.8822,  -6842669.2572, -14006076.8452, -14006079.7202,
        -14006080.2202], device='cuda:0', dtype=torch.float64)


embeddings_np
(5, 3072)


array([[-1.1328125 , -1.3203125 , -0.59765625, ...,  1.765625  ,
        -1.8984375 ,  0.07275391],
       [ 0.60546875,  0.6484375 , -0.07714844, ...,  0.45898438,
        -0.10742188,  0.7890625 ],
       [ 1.8359375 ,  0.97265625,  1.8359375 , ...,  0.01147461,
         1.2265625 , -0.76953125],
       [-0.19335938, -0.1328125 ,  1.5078125 , ..., -1.625     ,
        -0.2734375 ,  0.44140625],
       [ 0.7265625 ,  1.203125  ,  3.71875   , ..., -1.0390625 ,
         0.8515625 , -0.5078125 ]], dtype=float32)


k_mean_space
(5, 2)


array([[102.00618 ,  57.696507],
       [104.15336 ,  58.52702 ],
       [  0.      ,  81.364685],
       [ 94.91752 ,  59.829624],
       [100.81866 ,  60.101532]], dtype=float32)


k_mean_clusters
(5,)


array([1, 1, 0, 1, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-14006076.84524215, -41697498.07978937])


closest
(2,)


array([2, 0])


new_candidates
torch.Size([2, 88])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139, 


new_candidate_parents


[[2], [0, 1, 3, 4]]


new_candidate_logprobs
torch.Size([2])


tensor([-14006076.8452, -41697498.0798], device='cuda:0', dtype=torch.float64)


candidate_parents


[[2], [0, 1, 3, 4]]

event: k_means
id: 77-k
data: [{"content": "-", "parents": [2], "prob": 0.0}, {"content": "by", "parents": [0, 1, 3, 4], "prob": 0.0}]





candidates
torch.Size([2, 88])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139, 


candidate_logprobs
torch.Size([2])


tensor([-14006076.8452, -41697498.0798], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.4844, -3.5625, -3.8125,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.5781, -9.4375, -4.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.8749e-01, 5.8718e-03, 3.5615e-03,  ..., 6.5173e-24, 5.0757e-24,
         4.4792e-24],
        [8.8564e-01, 1.0577e-01, 5.2662e-03,  ..., 9.5131e-19, 8.9368e-19,
         1.8732e-19]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9875, 0.9934, 0.9969,  ..., 1.0000, 1.0000, 1.0000],
        [0.8856, 0.9914, 0.9967,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 88])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139, 


carryover_candidate_logprobs
torch.Size([3])


tensor([-14006076.8452, -41697498.0798, -41697498.0798], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[1004],
        [7551],
        [ 278]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.0126, -0.1214, -2.2464], device='cuda:0')


new_candidates
torch.Size([3, 89])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278, 


new_candidate_logprobs
torch.Size([3])


tensor([-14006076.8578, -41697498.2012, -41697500.3262], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 77-p
data: [{"content": "me", "parents": [0], "prob": 0.0}, {"content": "China", "parents": [1], "prob": 0.0}, {"content": "the", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([3, 89])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 89, 32064])

hidden_states[-1]
torch.Size([3, 89, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([3, 89])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278, 


candidate_logprobs
torch.Size([3])


tensor([-14006076.8578, -41697498.2012, -41697500.3262], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.25390625,  1.4140625 ,  0.65234375, ..., -0.953125  ,
         2.59375   ,  1.9453125 ],
       [ 0.13378906, -1.9453125 , -0.18554688, ...,  0.53125   ,
        -3.765625  ,  2.140625  ],
       [-0.7734375 , -0.70703125, -0.13574219, ...,  0.8359375 ,
        -1.296875  , -0.08789062]], dtype=float32)


k_mean_space
(3, 2)


array([[ 55.777264, 113.16891 ],
       [ 55.777264, 100.86515 ],
       [ 91.538994,   0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-55703575.05906281, -41697500.3262336 ])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 89])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278, 


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-55703575.0591, -41697500.3262], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 78-k
data: [{"content": "me", "parents": [0, 1], "prob": 0.0}, {"content": "the", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 89])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278, 


candidate_logprobs
torch.Size([2])


tensor([-55703575.0591, -41697500.3262], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 3.6719, -2.7031, -0.2334,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.3203, -7.5625, -4.7188,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 3.2242e-08, 1.7258e-08,  ..., 2.1006e-26, 2.1006e-26,
         1.7243e-27],
        [9.4279e-01, 2.2172e-02, 8.1568e-03,  ..., 8.4793e-18, 2.4294e-18,
         8.9371e-19]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9428, 0.9650, 0.9731,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 89])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278, 


carryover_candidate_logprobs
torch.Size([2])


tensor([-55703575.0591, -41697500.3262], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 294],
        [7551]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([ 0.0000, -0.0589], device='cuda:0')


new_candidates
torch.Size([2, 90])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373, 


new_candidate_logprobs
torch.Size([2])


tensor([-55703575.0591, -41697500.3851], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 78-p
data: [{"content": "as", "parents": [0], "prob": 0.0}, {"content": "China", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([2, 90])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 90, 32064])

hidden_states[-1]
torch.Size([2, 90, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([2, 90])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373, 


candidate_logprobs
torch.Size([2])


tensor([-55703575.0591, -41697500.3851], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.7188, -8.7500, -0.6680,  ...,  0.0000,  0.0000,  0.0000],
        [ 5.7188, -2.5625, -0.5234,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.3710e-06, 1.2099e-06,  ..., 2.1006e-26, 1.6359e-26,
         2.8428e-27],
        [8.4192e-01, 1.0055e-01, 1.2009e-02,  ..., 7.9809e-19, 5.4852e-19,
         5.7813e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.8419, 0.9425, 0.9545,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 90])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373, 


carryover_candidate_logprobs
torch.Size([3])


tensor([-55703575.0591, -41697500.3851, -41697500.3851], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[ 2955],
        [29899],
        [  322]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-2.6226e-06, -1.7207e-01, -2.2971e+00], device='cuda:0')


new_candidates
torch.Size([3, 91])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294


new_candidate_logprobs
torch.Size([3])


tensor([-55703575.0591, -41697500.5572, -41697502.6822], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 79-p
data: [{"content": "ured", "parents": [0], "prob": 0.0}, {"content": "-", "parents": [1], "prob": 0.0}, {"content": "and", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([3, 91])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 91, 32064])

hidden_states[-1]
torch.Size([3, 91, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([3, 91])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294


candidate_logprobs
torch.Size([3])


tensor([-55703575.0591, -41697500.5572, -41697502.6822], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[ 0.53515625,  2.        ,  3.0625    , ...,  0.40625   ,
        -0.48632812,  2.1875    ],
       [ 1.09375   ,  0.53125   ,  0.37304688, ...,  0.5       ,
        -0.48632812, -2.375     ],
       [-0.89453125,  1.8671875 , -2.6875    , ...,  1.375     ,
        -2.375     , -0.03564453]], dtype=float32)


k_mean_space
(3, 2)


array([[100.559586,   0.      ],
       [ 38.0713  , 108.69292 ],
       [ 38.0713  , 106.34453 ]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-83395003.23942307, -55703575.05906543])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 91])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294


new_candidate_parents


[[1, 2], [0]]


new_candidate_logprobs
torch.Size([2])


tensor([-83395003.2394, -55703575.0591], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0]]

event: k_means
id: 80-k
data: [{"content": "-", "parents": [1, 2], "prob": 0.0}, {"content": "ured", "parents": [0], "prob": 0.0}]





candidates
torch.Size([2, 91])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294


candidate_logprobs
torch.Size([2])


tensor([-83395003.2394, -55703575.0591], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.9141e+00,  5.1562e+00,  3.7031e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.7280e-03, -1.2812e+01, -3.9531e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9971e-01, 3.5347e-05, 3.5347e-05,  ..., 6.7300e-22, 5.2414e-22,
         4.0820e-22],
        [8.9133e-01, 1.0645e-01, 1.0436e-03,  ..., 2.8344e-22, 2.5014e-22,
         9.2020e-23]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9997, 0.9997, 0.9998,  ..., 1.0000, 1.0000, 1.0000],
        [0.8913, 0.9978, 0.9988,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 91])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294


carryover_candidate_logprobs
torch.Size([3])


tensor([-83395003.2394, -55703575.0591, -55703575.0591], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[29940],
        [  297],
        [  491]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-2.8978e-04, -1.1504e-01, -2.2400e+00], device='cuda:0')


new_candidates
torch.Size([3, 92])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388


new_candidate_logprobs
torch.Size([3])


tensor([-83395003.2397, -55703575.1741, -55703577.2991], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 80-p
data: [{"content": "N", "parents": [0], "prob": 0.0}, {"content": "in", "parents": [1], "prob": 0.0}, {"content": "by", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([3, 92])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 92, 32064])

hidden_states[-1]
torch.Size([3, 92, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([3, 92])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388


candidate_logprobs
torch.Size([3])


tensor([-83395003.2397, -55703575.1741, -55703577.2991], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-1.671875  ,  0.99609375,  1.0390625 , ..., -0.65234375,
        -0.40234375,  2.390625  ],
       [ 0.05957031,  0.2578125 , -0.01586914, ..., -0.08837891,
         0.03466797,  0.3984375 ],
       [-1.7578125 , -1.609375  , -0.53125   , ...,  1.6640625 ,
        -1.90625   , -0.109375  ]], dtype=float32)


k_mean_space
(3, 2)


array([[101.67525,   0.     ],
       [ 44.18272, 110.81887],
       [ 44.18272, 110.90138]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-1.11407152e+08, -8.33950032e+07])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 92])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388


new_candidate_parents


[[1, 2], [0]]


new_candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0]]

event: k_means
id: 81-k
data: [{"content": "in", "parents": [1, 2], "prob": 0.0}, {"content": "N", "parents": [0], "prob": 0.0}]





candidates
torch.Size([2, 92])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388


candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.8750, -5.5625, -2.3594,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.8125, -2.8594, -5.7188,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9908e-01, 9.1104e-04, 8.9315e-06,  ..., 8.6361e-22, 2.8037e-22,
         1.1688e-22],
        [9.9992e-01, 5.1441e-05, 6.9617e-06,  ..., 3.8737e-21, 2.3495e-21,
         1.2576e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9991, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.9999, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 92])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388


carryover_candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29871],
        [ 1022]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-9.2239e-04, -8.2615e-05], device='cuda:0')


new_candidates
torch.Size([2, 93])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039


new_candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 81-p
data: [{"content": "", "parents": [0], "prob": 0.0}, {"content": "ep", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([2, 93])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 93, 32064])

hidden_states[-1]
torch.Size([2, 93, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([2, 93])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039


candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.3535,  1.5547, -6.2812,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.7656, -5.3438,  2.0625,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 7.7344e-08, 5.1600e-11,  ..., 4.3036e-23, 2.9578e-23,
         2.3036e-23],
        [1.0000e+00, 2.6996e-07, 1.2752e-07,  ..., 8.9319e-25, 6.9562e-25,
         1.2088e-25]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 93])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039


carryover_candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[29906],
        [  284]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-1.1921e-07, -4.7684e-07], device='cuda:0')


new_candidates
torch.Size([2, 94])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379


new_candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 82-p
data: [{"content": "2", "parents": [0], "prob": 0.0}, {"content": "al", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([2, 94])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 94, 32064])

hidden_states[-1]
torch.Size([2, 94, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([2, 94])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379


candidate_logprobs
torch.Size([2])


tensor([-1.1141e+08, -8.3395e+07], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.1953,  3.1094, -0.3281,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.1914, -5.5938,  0.4336,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 1.4450e-07, 1.1254e-07,  ..., 6.7320e-22, 2.8063e-22,
         1.9287e-22],
        [7.7976e-01, 7.2529e-02, 4.9848e-02,  ..., 7.9467e-18, 4.2536e-18,
         2.2768e-18]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.7798, 0.8523, 0.9021,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 94])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871, 29906],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379


carryover_candidate_logprobs
torch.Size([4])


tensor([-1.1141e+08, -8.3395e+07, -8.3395e+07, -8.3395e+07], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[29900],
        [ 5139],
        [20830],
        [14002]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-5.9605e-07, -2.4877e-01, -2.6238e+00, -2.9988e+00], device='cuda:0')


new_candidates
torch.Size([4, 95])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871, 29906, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
       


new_candidate_logprobs
torch.Size([4])


tensor([-1.1141e+08, -8.3395e+07, -8.3395e+07, -8.3395e+07], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 83-p
data: [{"content": "0", "parents": [0], "prob": 0.0}, {"content": "border", "parents": [1], "prob": 0.0}, {"content": "Border", "parents": [1], "prob": 0.0}, {"content": "joint", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([4, 95])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 95, 32064])

hidden_states[-1]
torch.Size([4, 95, 3072])
infer - after batch run: GPU memory used: 15901 MB.

candidates
torch.Size([4, 95])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325,   337, 29899,  1004,   294,
          2955,   297, 29871, 29906, 29900],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
       


candidate_logprobs
torch.Size([4])


tensor([-1.1141e+08, -8.3395e+07, -8.3395e+07, -8.3395e+07], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-1.15625   , -0.98046875,  1.15625   , ...,  2.09375   ,
         1.7421875 ,  3.109375  ],
       [ 0.95703125,  2.015625  , -0.33398438, ...,  0.359375  ,
         0.05932617,  0.22460938],
       [ 1.546875  ,  1.8515625 ,  0.02563477, ...,  0.75390625,
         0.859375  ,  0.33984375],
       [-0.47070312,  2.15625   ,  0.84765625, ..., -0.33203125,
        -2.09375   ,  0.26953125]], dtype=float32)


k_mean_space
(4, 2)


array([[107.21287 ,  54.168865],
       [ 25.328566,  84.84668 ],
       [ 25.328566,  90.05351 ],
       [ 91.64854 ,  54.168865]], dtype=float32)


k_mean_clusters
(4,)


array([1, 0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-1.66790009e+08, -1.94802159e+08])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 95])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
       


new_candidate_parents


[[1, 2], [0, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([-1.6679e+08, -1.9480e+08], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0, 3]]

event: k_means
id: 84-k
data: [{"content": "border", "parents": [1, 2], "prob": 0.0}, {"content": "0", "parents": [0, 3], "prob": 0.0}]





candidates
torch.Size([2, 95])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
       


candidate_logprobs
torch.Size([2])


tensor([-1.6679e+08, -1.9480e+08], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-0.1406, -6.3438,  3.4531,  ...,  0.0000,  0.0000,  0.0000],
        [ 2.6562, -3.5312,  2.3125,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[2.8204e-01, 2.1966e-01, 1.5097e-01,  ..., 1.2222e-16, 9.5186e-17,
         2.4067e-17],
        [1.0000e+00, 1.4931e-10, 2.5946e-11,  ..., 2.5088e-27, 1.9538e-27,
         1.0458e-27]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.2820, 0.5017, 0.6527,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([13])


tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([13, 95])


tensor([[    1, 32010,  1724,  ...,  1022,   284,  5139],
        [    1, 32010,  1724,  ...,  1022,   284,  5139],
        [    1, 32010,  1724,  ...,  1022,   284,  5139],
        ...,
        [    1, 32010,  1724,  ...,  1022,   284,  5139],
        [    1, 32010,  1724,  ...,  1022,   284,  5139],
        [    1, 32010,  1724,  ..., 29871, 29906, 29900]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([13])


tensor([-1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08,
        -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08,
        -1.6679e+08, -1.6679e+08, -1.9480e+08], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([13, 1])


tensor([[20039],
        [17327],
        [10365],
        [18994],
        [20398],
        [ 6890],
        [ 1298],
        [ 5139],
        [ 3577],
        [21142],
        [21118],
        [ 9434],
        [29906]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([13])


tensor([-1.2657, -1.5157, -1.8907, -2.1407, -2.7657, -3.6407, -4.2657, -4.7657,
        -5.0782, -5.0782, -5.2657, -5.3282,  0.0000], device='cuda:0')


new_candidates
torch.Size([13, 96])


tensor([[    1, 32010,  1724,  ...,   284,  5139, 20039],
        [    1, 32010,  1724,  ...,   284,  5139, 17327],
        [    1, 32010,  1724,  ...,   284,  5139, 10365],
        ...,
        [    1, 32010,  1724,  ...,   284,  5139, 21118],
        [    1, 32010,  1724,  ...,   284,  5139,  9434],
        [    1, 32010,  1724,  ..., 29906, 29900, 29906]], device='cuda:0')


new_candidate_logprobs
torch.Size([13])


tensor([-1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08,
        -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08,
        -1.6679e+08, -1.6679e+08, -1.9480e+08], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [1]]

event: top_p
id: 84-p
data: [{"content": "measurement", "parents": [0], "prob": 0.0}, {"content": "agreement", "parents": [0], "prob": 0.0}, {"content": "adjust", "parents": [0], "prob": 0.0}, {"content": "survey", "parents": [0], "prob": 0.0}, {"content": "measurements", "parents": [0], "prob": 0.0}, {"content": "framework", "parents": [0], "prob": 0.0}, {"content": "point", "parents": [0], "prob": 0.0}, {"content": "border", "parents": [0], "prob": 0.0}, {"content": "package", "parents": [0], "prob": 0.0}, {"content": "authorities", "parents": [0], "prob": 0.0}, {"content": "committee", "parents": [0], "prob": 0.0}, {"content": "trib", "parents": [0], "prob": 0.0}, {"content": "2", "parents": [1], "prob": 0.0}]





num_batches 2
infer start: GPU memory used: 15901 MB.

batch_candidates
torch.Size([8, 96])

batch_candidate_logprobs
torch.Size([8])

batch_logits
torch.Size([8, 96, 32064])

hidden_states[-1]
torch.Size([8, 96, 3072])
infer - after batch run: GPU memory used: 16283 MB.


tensor([[    1, 32010,  1724,  ...,   284,  5139, 20039],
        [    1, 32010,  1724,  ...,   284,  5139, 17327],
        [    1, 32010,  1724,  ...,   284,  5139, 10365],
        ...,
        [    1, 32010,  1724,  ...,   284,  5139, 21118],
        [    1, 32010,  1724,  ...,   284,  5139,  9434],
        [    1, 32010,  1724,  ..., 29906, 29900, 29906]], device='cuda:0')


candidate_logprobs
torch.Size([13])


tensor([-1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08,
        -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08, -1.6679e+08,
        -1.6679e+08, -1.6679e+08, -1.9480e+08], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(13, 3072)


array([[ 1.6875    ,  0.78125   ,  1.234375  , ...,  0.80859375,
         0.29492188,  1.96875   ],
       [ 1.28125   ,  1.703125  ,  0.37304688, ...,  2.0625    ,
        -1.2265625 ,  2.1875    ],
       [ 1.265625  ,  0.5       ,  0.74609375, ...,  1.0078125 ,
         0.4765625 , -0.14160156],
       ...,
       [-0.3046875 ,  2.125     , -0.64453125, ...,  2.328125  ,
         0.28320312,  3.75      ],
       [ 3.765625  ,  1.15625   ,  2.171875  , ..., -1.015625  ,
         1.1796875 ,  1.6875    ],
       [-0.94140625, -0.21582031,  2.578125  , ...,  0.80859375,
         0.88671875,  2.1875    ]], dtype=float32)


k_mean_space
(13, 2)


array([[75.85983 , 53.2701  ],
       [79.18972 , 52.08574 ],
       [72.96491 , 66.398766],
       [77.769196, 58.548855],
       [77.01824 , 52.82819 ],
       [60.47782 , 74.38021 ],
       [76.18287 , 53.91624 ],
       [60.51139 , 72.10352 ],
       [74.32712 , 57.46262 ],
       [80.55487 , 52.86458 ],
       [82.08054 , 51.48785 ],
       [67.90187 , 91.6062  ],
       [71.40156 , 97.38265 ]], dtype=float32)


k_mean_clusters
(13,)


array([1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-6.95172201e+08, -1.50111011e+09])


closest
(2,)


array([ 5, 10])


new_candidates
torch.Size([2, 96])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,



new_candidate_parents


[[5, 7, 11, 12], [0, 1, 2, 3, 4, 6, 8, 9, 10]]


new_candidate_logprobs
torch.Size([2])


tensor([-6.9517e+08, -1.5011e+09], device='cuda:0', dtype=torch.float64)


candidate_parents


[[5, 7, 11, 12], [0, 1, 2, 3, 4, 6, 8, 9, 10]]

event: k_means
id: 85-k
data: [{"content": "framework", "parents": [5, 7, 11, 12], "prob": 0.0}, {"content": "committee", "parents": [0, 1, 2, 3, 4, 6, 8, 9, 10], "prob": 0.0}]





candidates
torch.Size([2, 96])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,



candidate_logprobs
torch.Size([2])


tensor([-6.9517e+08, -1.5011e+09], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[  6.0000,  -2.4219,   3.8750,  ...,   0.0000,   0.0000,   0.0000],
        [ -1.9688, -12.5000,  -5.2500,  ...,   0.0000,   0.0000,   0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.4659e-01, 4.7128e-02, 2.6588e-03,  ..., 8.3463e-20, 3.9425e-20,
         1.8623e-20],
        [5.5480e-01, 4.3208e-01, 1.1514e-02,  ..., 1.0915e-20, 9.6324e-21,
         2.1493e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9466, 0.9937, 0.9964,  ..., 1.0000, 1.0000, 1.0000],
        [0.5548, 0.9869, 0.9984,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 96])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,



carryover_candidate_logprobs
torch.Size([3])


tensor([-6.9517e+08, -1.5011e+09, -1.5011e+09], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[17327],
        [  297],
        [29889]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.0549, -0.5892, -0.8392], device='cuda:0')


new_candidates
torch.Size([3, 97])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297, 


new_candidate_logprobs
torch.Size([3])


tensor([-6.9517e+08, -1.5011e+09, -1.5011e+09], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 85-p
data: [{"content": "agreement", "parents": [0], "prob": 0.0}, {"content": "in", "parents": [1], "prob": 0.0}, {"content": ".", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 16907 MB.

batch_candidates
torch.Size([3, 97])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 97, 32064])

hidden_states[-1]
torch.Size([3, 97, 3072])
infer - after batch run: GPU memory used: 16907 MB.

candidates
torch.Size([3, 97])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297, 


candidate_logprobs
torch.Size([3])


tensor([-6.9517e+08, -1.5011e+09, -1.5011e+09], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[ 0.68359375,  2.140625  ,  0.49804688, ...,  2.359375  ,
        -0.9140625 ,  1.546875  ],
       [ 0.11621094,  0.734375  , -0.20898438, ..., -0.02783203,
         1.8359375 ,  1.2421875 ],
       [-0.8203125 ,  0.49023438,  0.8125    , ...,  3.609375  ,
         0.6328125 ,  0.2890625 ]], dtype=float32)


k_mean_space
(3, 2)


array([[4.9816750e+01, 1.0133368e+02],
       [4.9816750e+01, 1.0309782e+02],
       [8.9258774e+01, 1.3486991e-06]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-2.19628231e+09, -1.50111011e+09])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 97])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297, 


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-2.1963e+09, -1.5011e+09], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 86-k
data: [{"content": "agreement", "parents": [0, 1], "prob": 0.0}, {"content": ".", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 97])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297, 


candidate_logprobs
torch.Size([2])


tensor([-2.1963e+09, -1.5011e+09], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[-1.5391, -8.3125, -0.9336,  ...,  0.0000,  0.0000,  0.0000],
        [ 8.3125, -5.5312,  6.8438,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[6.0550e-01, 2.2275e-01, 1.0522e-01,  ..., 1.1191e-20, 1.8268e-21,
         5.9308e-22],
        [2.9692e-01, 1.8009e-01, 1.5893e-01,  ..., 2.9962e-19, 2.4839e-19,
         4.3164e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.6055, 0.8282, 0.9335,  ..., 1.0000, 1.0000, 1.0000],
        [0.2969, 0.4770, 0.6359,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([9])


tensor([0, 0, 0, 1, 1, 1, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([9, 97])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297, 


carryover_candidate_logprobs
torch.Size([9])


tensor([-2.1963e+09, -2.1963e+09, -2.1963e+09, -1.5011e+09, -1.5011e+09,
        -1.5011e+09, -1.5011e+09, -1.5011e+09, -1.5011e+09], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([9, 1])


tensor([[  297],
        [29889],
        [  310],
        [  910],
        [32007],
        [  739],
        [   13],
        [ 2233],
        [ 2398]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([9])


tensor([-0.5017, -1.5017, -2.2517, -1.2143, -1.7143, -1.8393, -1.9643, -2.0893,
        -2.8393], device='cuda:0')


new_candidates
torch.Size([9, 98])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630, 


new_candidate_logprobs
torch.Size([9])


tensor([-2.1963e+09, -2.1963e+09, -2.1963e+09, -1.5011e+09, -1.5011e+09,
        -1.5011e+09, -1.5011e+09, -1.5011e+09, -1.5011e+09], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [0], [1], [1], [1], [1], [1], [1]]

event: top_p
id: 86-p
data: [{"content": "in", "parents": [0], "prob": 0.0}, {"content": ".", "parents": [0], "prob": 0.0}, {"content": "of", "parents": [0], "prob": 0.0}, {"content": "This", "parents": [1], "prob": 0.0}, {"content": "<|end|>", "parents": [1], "prob": 0.0}, {"content": "It", "parents": [1], "prob": 0.0}, {"content": "\n", "parents": [1], "prob": 0.0}, {"content": "Cl", "parents": [1], "prob": 0.0}, {"content": "However", "parents": [1], "prob": 0.0}]





num_batches 2
infer start: GPU memory used: 16907 MB.

batch_candidates
torch.Size([8, 98])

batch_candidate_logprobs
torch.Size([8])

batch_logits
torch.Size([8, 98, 32064])

hidden_states[-1]
torch.Size([8, 98, 3072])
infer - after batch run: GPU memory used: 16907 MB.

batch_candidates
torch.Size([1, 98])

batch_candidate_logprobs
torch.Size([1])

batch_logits
torch.Size([1, 98, 32064])

hidden_states[-1]
torch.Size([1, 98, 3072])
infer - after batch run: GPU memory used: 16907 MB.

candidates
torch.Size([9, 98])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327,   297],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630, 


candidate_logprobs
torch.Size([9])


tensor([-2.1963e+09, -2.1963e+09, -2.1963e+09, -1.5011e+09, -1.5011e+09,
        -1.5011e+09, -1.5011e+09, -1.5011e+09, -1.5011e+09], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(9, 3072)


array([[ 1.0703125 ,  0.85546875, -0.6328125 , ..., -0.07080078,
         1.75      ,  0.72265625],
       [-0.578125  ,  0.28320312,  0.671875  , ...,  3.609375  ,
         0.71484375,  0.31054688],
       [ 1.6640625 ,  1.8203125 , -1.6484375 , ..., -0.17480469,
         1.609375  ,  0.31054688],
       ...,
       [ 1.2578125 , -1.171875  , -1.765625  , ...,  2.609375  ,
         1.546875  ,  0.9921875 ],
       [-1.546875  , -1.25      , -1.859375  , ...,  0.00367737,
         1.640625  , -2.4375    ],
       [ 0.85546875, -0.78515625,  0.98828125, ...,  1.6953125 ,
        -0.26171875, -0.54296875]], dtype=float32)


k_mean_space
(9, 2)


array([[103.21889 ,  68.366005],
       [ 89.98854 ,  57.678463],
       [103.21846 ,  67.78547 ],
       [102.06598 ,  65.70598 ],
       [ 52.755882,  91.17285 ],
       [102.17885 ,  68.59135 ],
       [ 52.755882,  89.90262 ],
       [ 98.43852 ,  69.09728 ],
       [ 90.27609 ,  61.74015 ]], dtype=float32)


k_mean_clusters
(9,)


array([1, 1, 1, 1, 0, 1, 0, 1, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-3.00222023e+09, -1.25932874e+10])


closest
(2,)


array([4, 1])


new_candidates
torch.Size([2, 98])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630, 


new_candidate_parents


[[4, 6], [0, 1, 2, 3, 5, 7, 8]]


new_candidate_logprobs
torch.Size([2])


tensor([-3.0022e+09, -1.2593e+10], device='cuda:0', dtype=torch.float64)


candidate_parents


[[4, 6], [0, 1, 2, 3, 5, 7, 8]]

event: k_means
id: 87-k
data: [{"content": "<|end|>", "parents": [4, 6], "prob": 0.0}, {"content": ".", "parents": [0, 1, 2, 3, 5, 7, 8], "prob": 0.0}]





candidates
torch.Size([2, 98])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630, 


candidate_logprobs
torch.Size([2])


tensor([-3.0022e+09, -1.2593e+10], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.0427, -1.7031,  5.2188,  ...,  0.0000,  0.0000,  0.0000],
        [ 7.9062, -5.9062,  6.6562,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[7.7590e-01, 2.2230e-01, 9.0849e-04,  ..., 2.8519e-20, 1.1888e-20,
         6.3634e-21],
        [3.1575e-01, 2.1701e-01, 1.3162e-01,  ..., 2.8118e-19, 2.1898e-19,
         4.5901e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.7759, 0.9982, 0.9991,  ..., 1.0000, 1.0000, 1.0000],
        [0.3158, 0.5328, 0.6644,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([8])


tensor([0, 0, 1, 1, 1, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([8, 98])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630, 


carryover_candidate_logprobs
torch.Size([8])


tensor([-3.0022e+09, -3.0022e+09, -1.2593e+10, -1.2593e+10, -1.2593e+10,
        -1.2593e+10, -1.2593e+10, -1.2593e+10], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([8, 1])


tensor([[32001],
        [32000],
        [  910],
        [32007],
        [  739],
        [   13],
        [ 2398],
        [ 2233]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([8])


tensor([-0.2537, -1.5037, -1.1528, -1.5278, -2.0278, -2.1528, -2.2778, -2.5278],
       device='cuda:0')


new_candidates
torch.Size([8, 99])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32001],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976, 


new_candidate_logprobs
torch.Size([8])


tensor([-3.0022e+09, -3.0022e+09, -1.2593e+10, -1.2593e+10, -1.2593e+10,
        -1.2593e+10, -1.2593e+10, -1.2593e+10], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [1], [1], [1], [1], [1], [1]]

event: top_p
id: 87-p
data: [{"content": "<|assistant|>", "parents": [0], "prob": 0.0}, {"content": "<|endoftext|>", "parents": [0], "prob": 0.0}, {"content": "This", "parents": [1], "prob": 0.0}, {"content": "<|end|>", "parents": [1], "prob": 0.0}, {"content": "It", "parents": [1], "prob": 0.0}, {"content": "\n", "parents": [1], "prob": 0.0}, {"content": "However", "parents": [1], "prob": 0.0}, {"content": "Cl", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([8, 99])

batch_candidate_logprobs
torch.Size([8])

batch_logits
torch.Size([8, 99, 32064])

hidden_states[-1]
torch.Size([8, 99, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([8, 99])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32001],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976, 


candidate_logprobs
torch.Size([8])


tensor([-3.0022e+09, -3.0022e+09, -1.2593e+10, -1.2593e+10, -1.2593e+10,
        -1.2593e+10, -1.2593e+10, -1.2593e+10], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(8, 3072)


array([[-2.359375  ,  0.20800781, -1.09375   , ...,  0.98828125,
        -1.453125  ,  1.9453125 ],
       [-0.546875  ,  0.17675781,  0.4453125 , ..., -1.546875  ,
         1.078125  , -1.4375    ],
       [-1.734375  , -0.6015625 , -0.23730469, ...,  0.40039062,
        -0.375     ,  1.0859375 ],
       ...,
       [ 1.1171875 , -1.0546875 , -1.6640625 , ...,  2.703125  ,
         1.203125  ,  1.0703125 ],
       [ 0.8671875 , -0.7578125 ,  1.        , ...,  1.7109375 ,
        -0.48632812, -0.7109375 ],
       [-1.4921875 , -1.1640625 , -1.859375  , ...,  0.08300781,
         1.703125  , -2.28125   ]], dtype=float32)


k_mean_space
(8, 2)


array([[ 61.827183,  89.12596 ],
       [ 56.549843,  79.40007 ],
       [ 62.501106, 102.01083 ],
       [ 87.21983 ,  52.96106 ],
       [ 65.23254 , 101.828835],
       [ 88.85133 ,  52.96106 ],
       [ 57.34347 ,  89.4451  ],
       [ 67.333954,  98.270386]], dtype=float32)


k_mean_clusters
(8,)


array([0, 0, 0, 1, 0, 1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-5.63775901e+10, -2.51865748e+10])


closest
(2,)


array([1, 3])


new_candidates
torch.Size([2, 99])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32000],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976, 


new_candidate_parents


[[0, 1, 2, 4, 6, 7], [3, 5]]


new_candidate_logprobs
torch.Size([2])


tensor([-5.6378e+10, -2.5187e+10], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1, 2, 4, 6, 7], [3, 5]]

event: k_means
id: 88-k
data: [{"content": "<|endoftext|>", "parents": [0, 1, 2, 4, 6, 7], "prob": 0.0}, {"content": "<|end|>", "parents": [3, 5], "prob": 0.0}]





candidates
torch.Size([2, 99])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32000],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976, 


candidate_logprobs
torch.Size([2])


tensor([-5.6378e+10, -2.5187e+10], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 0.9141, -5.3750,  1.1641,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.2812, -1.6641,  4.9688,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9144e-01, 2.7847e-03, 5.1512e-04,  ..., 7.2253e-14, 4.9659e-14,
         2.3457e-14],
        [9.1365e-01, 8.4983e-02, 6.4885e-04,  ..., 2.0368e-20, 9.6213e-21,
         7.4931e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9914, 0.9942, 0.9947,  ..., 1.0000, 1.0000, 1.0000],
        [0.9137, 0.9986, 0.9993,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 99])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32000],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976, 


carryover_candidate_logprobs
torch.Size([2])


tensor([-5.6378e+10, -2.5187e+10], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[32000],
        [32001]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0086, -0.0903], device='cuda:0')


new_candidates
torch.Size([2, 100])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32000, 32000],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889, 


new_candidate_logprobs
torch.Size([2])


tensor([-5.6378e+10, -2.5187e+10], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 88-p
data: [{"content": "<|endoftext|>", "parents": [0], "prob": 0.0}, {"content": "<|assistant|>", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([2, 100])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 100, 32064])

hidden_states[-1]
torch.Size([2, 100, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([2, 100])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32000, 32000],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889, 


candidate_logprobs
torch.Size([2])


tensor([-5.6378e+10, -2.5187e+10], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 2.7188, -1.6484,  2.0469,  ...,  0.0000,  0.0000,  0.0000],
        [ 3.1719,  3.5781,  1.2500,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[8.8611e-01, 9.8438e-03, 8.1608e-03,  ..., 3.5258e-12, 3.0158e-12,
         1.6655e-12],
        [3.2582e-01, 1.9762e-01, 1.3582e-01,  ..., 2.5352e-20, 1.9744e-20,
         6.8235e-21]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.8861, 0.8960, 0.9041,  ..., 1.0000, 1.0000, 1.0000],
        [0.3258, 0.5234, 0.6593,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([10])


tensor([0, 0, 0, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([10, 100])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139, 21118, 29889, 32007, 32000, 32000],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889, 


carryover_candidate_logprobs
torch.Size([10])


tensor([-5.6378e+10, -5.6378e+10, -5.6378e+10, -2.5187e+10, -2.5187e+10,
        -2.5187e+10, -2.5187e+10, -2.5187e+10, -2.5187e+10, -2.5187e+10],
       device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([10, 1])


tensor([[32000],
        [29871],
        [  278],
        [ 2398],
        [ 1094],
        [ 5806],
        [  739],
        [  319],
        [  512],
        [ 4451]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([10])


tensor([-0.1209, -4.6209, -4.8084, -1.1214, -1.6214, -1.9964, -2.1214, -2.3714,
        -3.6214, -4.1214], device='cuda:0')


new_candidates
torch.Size([10, 101])


tensor([[    1, 32010,  1724,  ..., 32000, 32000, 32000],
        [    1, 32010,  1724,  ..., 32000, 32000, 29871],
        [    1, 32010,  1724,  ..., 32000, 32000,   278],
        ...,
        [    1, 32010,  1724,  ..., 32007, 32001,   319],
        [    1, 32010,  1724,  ..., 32007, 32001,   512],
        [    1, 32010,  1724,  ..., 32007, 32001,  4451]], device='cuda:0')


new_candidate_logprobs
torch.Size([10])


tensor([-5.6378e+10, -5.6378e+10, -5.6378e+10, -2.5187e+10, -2.5187e+10,
        -2.5187e+10, -2.5187e+10, -2.5187e+10, -2.5187e+10, -2.5187e+10],
       device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [0], [0], [1], [1], [1], [1], [1], [1], [1]]

event: top_p
id: 89-p
data: [{"content": "<|endoftext|>", "parents": [0], "prob": 0.0}, {"content": "", "parents": [0], "prob": 0.0}, {"content": "the", "parents": [0], "prob": 0.0}, {"content": "However", "parents": [1], "prob": 0.0}, {"content": "As", "parents": [1], "prob": 0.0}, {"content": "While", "parents": [1], "prob": 0.0}, {"content": "It", "parents": [1], "prob": 0.0}, {"content": "A", "parents": [1], "prob": 0.0}, {"content": "In", "parents": [1], "prob": 0.0}, {"content": "Out", "parents": [1], "prob": 0.0}]





num_batches 2
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([8, 101])

batch_candidate_logprobs
torch.Size([8])

batch_logits
torch.Size([8, 101, 32064])

hidden_states[-1]
torch.Size([8, 101, 3072])
infer - after batch run: GPU memory used: 17015 MB.

batch_candidates
torch.Size([2, 101])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 101, 32064])

hidden_states[-1]
torch.Size([2, 101, 3072])
infer - after batch run:

tensor([[    1, 32010,  1724,  ..., 32000, 32000, 32000],
        [    1, 32010,  1724,  ..., 32000, 32000, 29871],
        [    1, 32010,  1724,  ..., 32000, 32000,   278],
        ...,
        [    1, 32010,  1724,  ..., 32007, 32001,   319],
        [    1, 32010,  1724,  ..., 32007, 32001,   512],
        [    1, 32010,  1724,  ..., 32007, 32001,  4451]], device='cuda:0')


candidate_logprobs
torch.Size([10])


tensor([-5.6378e+10, -5.6378e+10, -5.6378e+10, -2.5187e+10, -2.5187e+10,
        -2.5187e+10, -2.5187e+10, -2.5187e+10, -2.5187e+10, -2.5187e+10],
       device='cuda:0', dtype=torch.float64)


embeddings_np
(10, 3072)


array([[-1.6953125 ,  0.79296875, -0.18066406, ..., -1.9296875 ,
         2.734375  , -0.1171875 ],
       [ 0.00735474, -0.26367188, -0.09033203, ...,  1.109375  ,
         1.703125  , -0.09472656],
       [-0.74609375,  1.046875  ,  0.48242188, ..., -0.08007812,
         0.99609375,  0.09228516],
       ...,
       [-1.3125    ,  0.6640625 ,  0.8203125 , ...,  1.875     ,
        -0.578125  , -1.3515625 ],
       [-1.4453125 , -0.83203125,  1.3515625 , ...,  0.94921875,
        -0.69140625, -1.0625    ],
       [-3.609375  , -1.515625  , -0.6484375 , ...,  1.3046875 ,
         0.14257812, -0.8125    ]], dtype=float32)


k_mean_space
(10, 2)


array([[56.58597 , 82.66566 ],
       [69.83423 , 91.9198  ],
       [40.77184 , 70.94457 ],
       [54.435883, 73.02433 ],
       [63.609917, 92.94405 ],
       [40.221134, 73.196915],
       [60.181572,  0.      ],
       [54.99208 , 81.24544 ],
       [49.78813 , 78.94521 ],
       [69.865814, 94.185326]], dtype=float32)


k_mean_clusters
(10,)


array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-3.20252219e+11, -2.51865748e+10])


closest
(2,)


array([5, 6])


new_candidates
torch.Size([2, 101])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274


new_candidate_parents


[[0, 1, 2, 3, 4, 5, 7, 8, 9], [6]]


new_candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1, 2, 3, 4, 5, 7, 8, 9], [6]]

event: k_means
id: 90-k
data: [{"content": "While", "parents": [0, 1, 2, 3, 4, 5, 7, 8, 9], "prob": 0.0}, {"content": "It", "parents": [6], "prob": 0.0}]





candidates
torch.Size([2, 101])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274


candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 6.6875,  4.5625,  3.9219,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.5391, -0.9453,  2.3125,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.5032e-01, 1.0557e-02, 6.4032e-03,  ..., 1.8274e-16, 1.5631e-16,
         1.8094e-17],
        [9.7659e-01, 2.0268e-02, 2.4207e-03,  ..., 3.8591e-19, 2.4916e-19,
         5.2227e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9503, 0.9609, 0.9673,  ..., 1.0000, 1.0000, 1.0000],
        [0.9766, 0.9969, 0.9993,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 101])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274


carryover_candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[ 8040],
        [29915]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0510, -0.0237], device='cuda:0')


new_candidates
torch.Size([2, 102])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040


new_candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 90-p
data: [{"content": "Mount", "parents": [0], "prob": 0.0}, {"content": "'", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([2, 102])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 102, 32064])

hidden_states[-1]
torch.Size([2, 102, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([2, 102])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040


candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[6.1250, 4.5938, 4.0312,  ..., 0.0000, 0.0000, 0.0000],
        [0.7500, 4.4375, 4.7188,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9993e-01, 2.4299e-05, 2.4299e-05,  ..., 6.3867e-21, 3.6390e-21,
         2.8341e-21],
        [1.0000e+00, 1.3440e-08, 1.0467e-08,  ..., 7.1662e-22, 3.8358e-22,
         1.9287e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9999, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 102])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040


carryover_candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[18274],
        [29879]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-6.9263e-05,  0.0000e+00], device='cuda:0')


new_candidates
torch.Size([2, 103])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338


new_candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 91-p
data: [{"content": "Ever", "parents": [0], "prob": 0.0}, {"content": "s", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([2, 103])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 103, 32064])

hidden_states[-1]
torch.Size([2, 103, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([2, 103])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338


candidate_logprobs
torch.Size([2])


tensor([-3.2025e+11, -2.5187e+10], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 4.5938, -0.5508, -1.1250,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.6016,  2.1406,  1.9922,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[1.0000e+00, 7.3382e-07, 1.8554e-07,  ..., 1.5521e-25, 1.2088e-25,
         9.4141e-26],
        [7.3154e-01, 1.2712e-01, 9.9003e-02,  ..., 7.4553e-18, 5.4544e-18,
         3.5216e-18]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.7315, 0.8587, 0.9577,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 103])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338


carryover_candidate_logprobs
torch.Size([4])


tensor([-3.2025e+11, -2.5187e+10, -2.5187e+10, -2.5187e+10], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[  342],
        [ 4100],
        [18853],
        [ 7088]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-1.1921e-06, -3.1260e-01, -2.0626e+00, -2.3126e+00], device='cuda:0')


new_candidates
torch.Size([4, 104])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233


new_candidate_logprobs
torch.Size([4])


tensor([-3.2025e+11, -2.5187e+10, -2.5187e+10, -2.5187e+10], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 92-p
data: [{"content": "est", "parents": [0], "prob": 0.0}, {"content": "important", "parents": [1], "prob": 0.0}, {"content": "essential", "parents": [1], "prob": 0.0}, {"content": "worth", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([4, 104])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 104, 32064])

hidden_states[-1]
torch.Size([4, 104, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([4, 104])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274,   342],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233


candidate_logprobs
torch.Size([4])


tensor([-3.2025e+11, -2.5187e+10, -2.5187e+10, -2.5187e+10], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-2.03125   , -0.20214844, -0.34179688, ..., -0.23828125,
        -0.91796875,  0.19726562],
       [ 0.8828125 , -0.71875   ,  0.87890625, ...,  2.203125  ,
        -0.34960938, -2.0625    ],
       [ 1.21875   , -0.25195312,  0.24023438, ...,  2.640625  ,
        -0.16796875, -2.        ],
       [ 1.25      , -0.90234375, -0.08251953, ..., -0.60546875,
        -1.109375  , -1.578125  ]], dtype=float32)


k_mean_space
(4, 2)


array([[78.519325, 39.062798],
       [15.056187, 58.569244],
       [15.056187, 59.140648],
       [57.975124, 39.062798]], dtype=float32)


k_mean_clusters
(4,)


array([1, 0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-5.03731497e+10, -3.45438794e+11])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 104])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233


new_candidate_parents


[[1, 2], [0, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([-5.0373e+10, -3.4544e+11], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0, 3]]

event: k_means
id: 93-k
data: [{"content": "important", "parents": [1, 2], "prob": 0.0}, {"content": "est", "parents": [0, 3], "prob": 0.0}]





candidates
torch.Size([2, 104])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233


candidate_logprobs
torch.Size([2])


tensor([-5.0373e+10, -3.4544e+11], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[5.7188, 6.7188, 6.4062,  ..., 0.0000, 0.0000, 0.0000],
        [6.9062, 5.8750, 9.0625,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9945e-01, 2.0336e-04, 1.7946e-04,  ..., 2.8609e-20, 2.5248e-20,
         1.2695e-20],
        [6.4458e-01, 3.0448e-01, 2.8321e-02,  ..., 2.1116e-19, 1.9837e-19,
         1.7506e-19]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9994, 0.9996, 0.9998,  ..., 1.0000, 1.0000, 1.0000],
        [0.6446, 0.9491, 0.9774,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 104])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233


carryover_candidate_logprobs
torch.Size([3])


tensor([-5.0373e+10, -3.4544e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[ 304],
        [ 338],
        [8640]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-5.5394e-04, -4.3915e-01, -1.1891e+00], device='cuda:0')


new_candidates
torch.Size([3, 105])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
       


new_candidate_logprobs
torch.Size([3])


tensor([-5.0373e+10, -3.4544e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 93-p
data: [{"content": "to", "parents": [0], "prob": 0.0}, {"content": "is", "parents": [1], "prob": 0.0}, {"content": "holds", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([3, 105])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 105, 32064])

hidden_states[-1]
torch.Size([3, 105, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([3, 105])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
       


candidate_logprobs
torch.Size([3])


tensor([-5.0373e+10, -3.4544e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[ 0.796875  ,  1.0703125 ,  0.94921875, ...,  1.6640625 ,
        -1.875     , -0.75390625],
       [-0.609375  ,  1.8515625 , -0.34179688, ..., -0.39453125,
         0.390625  , -0.484375  ],
       [ 0.11669922,  2.9375    ,  1.1171875 , ...,  0.265625  ,
        -0.63671875, -0.27734375]], dtype=float32)


k_mean_space
(3, 2)


array([[35.923813, 76.97321 ],
       [35.923813, 63.988903],
       [60.985302,  0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-3.95811944e+11, -3.45438794e+11])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 105])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
       


new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-3.9581e+11, -3.4544e+11], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 94-k
data: [{"content": "to", "parents": [0, 1], "prob": 0.0}, {"content": "holds", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 105])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
       


candidate_logprobs
torch.Size([2])


tensor([-3.9581e+11, -3.4544e+11], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[6.4062, 3.5156, 3.6875,  ..., 0.0000, 0.0000, 0.0000],
        [5.3125, 3.5938, 9.8125,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[8.6690e-01, 1.0354e-01, 5.1548e-03,  ..., 1.0887e-18, 6.0122e-19,
         3.8818e-19],
        [9.9982e-01, 9.6094e-05, 4.0058e-05,  ..., 2.4761e-22, 1.9284e-22,
         1.2451e-22]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.8669, 0.9704, 0.9756,  ..., 1.0000, 1.0000, 1.0000],
        [0.9998, 0.9999, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True,  True, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 0, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 105])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
       


carryover_candidate_logprobs
torch.Size([3])


tensor([-3.9581e+11, -3.9581e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[ 4443],
        [15544],
        [  278]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-1.4283e-01, -2.2678e+00, -1.7913e-04], device='cuda:0')


new_candidates
torch.Size([3, 106])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,



new_candidate_logprobs
torch.Size([3])


tensor([-3.9581e+11, -3.9581e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [0], [1]]

event: top_p
id: 94-p
data: [{"content": "note", "parents": [0], "prob": 0.0}, {"content": "clarify", "parents": [0], "prob": 0.0}, {"content": "the", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([3, 106])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 106, 32064])

hidden_states[-1]
torch.Size([3, 106, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([3, 106])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,



candidate_logprobs
torch.Size([3])


tensor([-3.9581e+11, -3.9581e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-0.41015625, -0.4453125 ,  1.265625  , ...,  1.421875  ,
         0.24023438, -1.109375  ],
       [ 0.65234375, -0.2890625 ,  1.        , ...,  0.49804688,
        -1.4140625 , -0.28125   ],
       [-1.671875  ,  0.29492188,  0.53515625, ..., -0.11474609,
        -0.20996094,  0.61328125]], dtype=float32)


k_mean_space
(3, 2)


array([[23.462244, 74.95013 ],
       [23.462244, 77.73858 ],
       [72.66311 ,  0.      ]], dtype=float32)


k_mean_clusters
(3,)


array([0, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-7.91623888e+11, -3.45438794e+11])


closest
(2,)


array([0, 2])


new_candidates
torch.Size([2, 106])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,



new_candidate_parents


[[0, 1], [2]]


new_candidate_logprobs
torch.Size([2])


tensor([-7.9162e+11, -3.4544e+11], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 1], [2]]

event: k_means
id: 95-k
data: [{"content": "note", "parents": [0, 1], "prob": 0.0}, {"content": "the", "parents": [2], "prob": 0.0}]





candidates
torch.Size([2, 106])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,



candidate_logprobs
torch.Size([2])


tensor([-7.9162e+11, -3.4544e+11], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[5.8125, 6.6250, 4.4375,  ..., 0.0000, 0.0000, 0.0000],
        [4.7188, 2.6719, 3.7812,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.4447e-01, 4.7023e-02, 3.0060e-03,  ..., 1.1142e-18, 1.0799e-18,
         5.1013e-19],
        [5.1623e-01, 4.5558e-01, 2.0017e-02,  ..., 4.8936e-19, 3.8111e-19,
         1.4020e-19]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9445, 0.9915, 0.9945,  ..., 1.0000, 1.0000, 1.0000],
        [0.5162, 0.9718, 0.9918,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([3])


tensor([0, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([3, 106])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,



carryover_candidate_logprobs
torch.Size([3])


tensor([-7.9162e+11, -3.4544e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([3, 1])


tensor([[ 393],
        [3611],
        [2407]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([3])


tensor([-0.0571, -0.6612, -0.7862], device='cuda:0')


new_candidates
torch.Size([3, 107])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038, 


new_candidate_logprobs
torch.Size([3])


tensor([-7.9162e+11, -3.4544e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1]]

event: top_p
id: 95-p
data: [{"content": "that", "parents": [0], "prob": 0.0}, {"content": "title", "parents": [1], "prob": 0.0}, {"content": "record", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([3, 107])

batch_candidate_logprobs
torch.Size([3])

batch_logits
torch.Size([3, 107, 32064])

hidden_states[-1]
torch.Size([3, 107, 3072])
infer - after batch run: GPU memory used: 17015 MB.

candidates
torch.Size([3, 107])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038, 


candidate_logprobs
torch.Size([3])


tensor([-7.9162e+11, -3.4544e+11, -3.4544e+11], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(3, 3072)


array([[-1.125     ,  0.01806641,  0.69140625, ..., -0.13378906,
        -0.92578125, -1.03125   ],
       [-1.375     ,  1.390625  ,  0.953125  , ...,  0.89453125,
         0.984375  ,  0.47460938],
       [-1.65625   ,  1.1171875 ,  1.359375  , ...,  0.24316406,
         0.9921875 ,  0.421875  ]], dtype=float32)


k_mean_space
(3, 2)


array([[69.70976 ,  0.      ],
       [18.049467, 71.78563 ],
       [18.049467, 72.230835]], dtype=float32)


k_mean_clusters
(3,)


array([1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-6.90877588e+11, -7.91623888e+11])


closest
(2,)


array([1, 0])


new_candidates
torch.Size([2, 107])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274,   342,  8640,   278,  3611],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038, 


new_candidate_parents


[[1, 2], [0]]


new_candidate_logprobs
torch.Size([2])


tensor([-6.9088e+11, -7.9162e+11], device='cuda:0', dtype=torch.float64)


candidate_parents


[[1, 2], [0]]

event: k_means
id: 96-k
data: [{"content": "title", "parents": [1, 2], "prob": 0.0}, {"content": "that", "parents": [0], "prob": 0.0}]





candidates
torch.Size([2, 107])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
          5806,  8040, 18274,   342,  8640,   278,  3611],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038, 


candidate_logprobs
torch.Size([2])


tensor([-6.9088e+11, -7.9162e+11], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[ 8.1875,  4.3750, 10.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 8.3125,  3.3125,  4.3125,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.5038e-01, 4.7317e-02, 1.8347e-03,  ..., 2.1187e-20, 6.8784e-21,
         5.7024e-21],
        [2.6796e-01, 1.6253e-01, 1.4343e-01,  ..., 8.8659e-19, 6.9048e-19,
         3.0337e-20]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9504, 0.9977, 0.9995,  ..., 1.0000, 1.0000, 1.0000],
        [0.2680, 0.4305, 0.5739,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([13])


tensor([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([13, 107])


tensor([[    1, 32010,  1724,  ...,  8640,   278,  3611],
        [    1, 32010,  1724,  ...,   304,  4443,   393],
        [    1, 32010,  1724,  ...,   304,  4443,   393],
        ...,
        [    1, 32010,  1724,  ...,   304,  4443,   393],
        [    1, 32010,  1724,  ...,   304,  4443,   393],
        [    1, 32010,  1724,  ...,   304,  4443,   393]], device='cuda:0')


carryover_candidate_logprobs
torch.Size([13])


tensor([-6.9088e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11,
        -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11,
        -7.9162e+11, -7.9162e+11, -7.9162e+11], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([13, 1])


tensor([[  363],
        [  746],
        [  727],
        [  565],
        [ 1550],
        [  278],
        [19223],
        [  376],
        [  297],
        [ 3171],
        [ 1422],
        [ 8679],
        [ 3683]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([13])


tensor([-0.0509, -1.3169, -1.8169, -1.9419, -2.3169, -2.4419, -3.1919, -3.5669,
        -3.6919, -4.0669, -4.3169, -4.4419, -4.5669], device='cuda:0')


new_candidates
torch.Size([13, 108])


tensor([[    1, 32010,  1724,  ...,   278,  3611,   363],
        [    1, 32010,  1724,  ...,  4443,   393,   746],
        [    1, 32010,  1724,  ...,  4443,   393,   727],
        ...,
        [    1, 32010,  1724,  ...,  4443,   393,  1422],
        [    1, 32010,  1724,  ...,  4443,   393,  8679],
        [    1, 32010,  1724,  ...,  4443,   393,  3683]], device='cuda:0')


new_candidate_logprobs
torch.Size([13])


tensor([-6.9088e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11,
        -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11,
        -7.9162e+11, -7.9162e+11, -7.9162e+11], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1]]

event: top_p
id: 96-p
data: [{"content": "for", "parents": [0], "prob": 0.0}, {"content": "when", "parents": [1], "prob": 0.0}, {"content": "there", "parents": [1], "prob": 0.0}, {"content": "if", "parents": [1], "prob": 0.0}, {"content": "while", "parents": [1], "prob": 0.0}, {"content": "the", "parents": [1], "prob": 0.0}, {"content": "mountains", "parents": [1], "prob": 0.0}, {"content": "\"", "parents": [1], "prob": 0.0}, {"content": "in", "parents": [1], "prob": 0.0}, {"content": "height", "parents": [1], "prob": 0.0}, {"content": "different", "parents": [1], "prob": 0.0}, {"content": "depending", "parents": [1], "prob": 0.0}, {"content": "determ", "parents": [1], "prob": 0.0}]





num_batches 2
infer start: GPU memory used: 17015 MB.

batch_candidates
torch.Size([8, 108])

batch_candidate_logprobs
torch.Size([8])

batch_logits
torch.Size([8, 108, 32064])

hidden_states[-1]
torch.Size([8, 108, 3072])
infer - after batch run: GPU memory used: 17015 MB.

batch_candidates
torch.Size

tensor([[    1, 32010,  1724,  ...,   278,  3611,   363],
        [    1, 32010,  1724,  ...,  4443,   393,   746],
        [    1, 32010,  1724,  ...,  4443,   393,   727],
        ...,
        [    1, 32010,  1724,  ...,  4443,   393,  1422],
        [    1, 32010,  1724,  ...,  4443,   393,  8679],
        [    1, 32010,  1724,  ...,  4443,   393,  3683]], device='cuda:0')


candidate_logprobs
torch.Size([13])


tensor([-6.9088e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11,
        -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11, -7.9162e+11,
        -7.9162e+11, -7.9162e+11, -7.9162e+11], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(13, 3072)


array([[-1.1953125 , -0.703125  ,  1.7109375 , ...,  0.01428223,
        -0.5703125 ,  0.7421875 ],
       [-0.25      ,  0.74609375,  0.890625  , ..., -0.08789062,
        -0.6875    , -0.98046875],
       [-0.36328125,  0.0234375 ,  2.390625  , ..., -0.03857422,
        -1.65625   , -0.03710938],
       ...,
       [-0.31054688,  0.07958984,  2.0625    , ..., -0.23828125,
        -0.89453125, -1.015625  ],
       [-1.09375   , -0.1953125 ,  0.85546875, ...,  0.07470703,
         0.71875   , -1.03125   ],
       [-0.25390625, -0.9921875 ,  0.8828125 , ..., -0.6015625 ,
         0.89453125, -1.625     ]], dtype=float32)


k_mean_space
(13, 2)


array([[47.24588 , 54.191563],
       [54.447166, 37.708908],
       [46.706974, 59.032967],
       [62.21308 , 43.422924],
       [51.94825 , 38.06299 ],
       [62.92316 , 48.124672],
       [62.599422, 50.709393],
       [45.34918 , 61.25884 ],
       [62.031445, 44.595474],
       [62.21703 , 50.126686],
       [58.063267, 45.38551 ],
       [46.565506, 62.822563],
       [43.827595, 64.21852 ]], dtype=float32)


k_mean_clusters
(13,)


array([0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0], dtype=int32)


k_mean_logprob_mass
(2,)


array([-3.85737314e+12, -6.33299110e+12])


closest
(2,)


array([12,  1])


new_candidates
torch.Size([2, 108])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224, 


new_candidate_parents


[[0, 2, 7, 11, 12], [1, 3, 4, 5, 6, 8, 9, 10]]


new_candidate_logprobs
torch.Size([2])


tensor([-3.8574e+12, -6.3330e+12], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 2, 7, 11, 12], [1, 3, 4, 5, 6, 8, 9, 10]]

event: k_means
id: 97-k
data: [{"content": "determ", "parents": [0, 2, 7, 11, 12], "prob": 0.0}, {"content": "when", "parents": [1, 3, 4, 5, 6, 8, 9, 10], "prob": 0.0}]





candidates
torch.Size([2, 108])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224, 


candidate_logprobs
torch.Size([2])


tensor([-3.8574e+12, -6.3330e+12], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[4.1875, 2.1094, 4.1562,  ..., 0.0000, 0.0000, 0.0000],
        [5.9062, 0.3242, 3.4375,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.9967e-01, 2.0340e-04, 8.4790e-05,  ..., 5.5075e-17, 6.5778e-18,
         4.8124e-18],
        [4.2514e-01, 4.2514e-01, 5.0776e-02,  ..., 1.4066e-18, 1.4066e-18,
         1.0955e-18]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9997, 0.9999, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
        [0.4251, 0.8503, 0.9011,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 108])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224, 


carryover_candidate_logprobs
torch.Size([4])


tensor([-3.8574e+12, -6.3330e+12, -6.3330e+12, -6.3330e+12], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[ 2827],
        [ 5353],
        [13858],
        [16811]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-3.2848e-04, -8.5533e-01, -8.5533e-01, -2.9803e+00], device='cuda:0')


new_candidates
torch.Size([4, 109])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 


new_candidate_logprobs
torch.Size([4])


tensor([-3.8574e+12, -6.3330e+12, -6.3330e+12, -6.3330e+12], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 97-p
data: [{"content": "ining", "parents": [0], "prob": 0.0}, {"content": "discuss", "parents": [1], "prob": 0.0}, {"content": "considering", "parents": [1], "prob": 0.0}, {"content": "referring", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17187 MB.

batch_candidates
torch.Size([4, 109])

batch_candidate_logprobs
torch.Size([4])

batch_logits
torch.Size([4, 109, 32064])

hidden_states[-1]
torch.Size([4, 109, 3072])
infer - after batch run: GPU memory used: 17187 MB.

candidates
torch.Size([4, 109])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 


candidate_logprobs
torch.Size([4])


tensor([-3.8574e+12, -6.3330e+12, -6.3330e+12, -6.3330e+12], device='cuda:0',
       dtype=torch.float64)


embeddings_np
(4, 3072)


array([[-1.265625  , -0.24023438,  1.0625    , ...,  0.58984375,
        -1.6328125 , -0.82421875],
       [-0.24414062, -0.23339844,  2.0625    , ...,  0.453125  ,
        -0.18945312, -0.6328125 ],
       [-0.45898438, -1.703125  ,  1.4609375 , ..., -0.42382812,
        -1.859375  , -0.6796875 ],
       [-0.23632812, -0.23339844,  2.078125  , ...,  0.359375  ,
        -0.30273438,  0.38085938]], dtype=float32)


k_mean_space
(4, 2)


array([[26.784224, 54.185104],
       [60.243473, 26.482695],
       [26.784224, 68.19055 ],
       [62.646454, 26.482695]], dtype=float32)


k_mean_clusters
(4,)


array([0, 1, 0, 1], dtype=int32)


k_mean_logprob_mass
(2,)


array([-1.01903642e+13, -1.26659822e+13])


closest
(2,)


array([0, 1])


new_candidates
torch.Size([2, 109])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 


new_candidate_parents


[[0, 2], [1, 3]]


new_candidate_logprobs
torch.Size([2])


tensor([-1.0190e+13, -1.2666e+13], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0, 2], [1, 3]]

event: k_means
id: 98-k
data: [{"content": "ining", "parents": [0, 2], "prob": 0.0}, {"content": "discuss", "parents": [1, 3], "prob": 0.0}]





candidates
torch.Size([2, 109])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 


candidate_logprobs
torch.Size([2])


tensor([-1.0190e+13, -1.2666e+13], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[10.1250,  6.9375,  9.5625,  ...,  0.0000,  0.0000,  0.0000],
        [ 5.4062,  1.6875,  5.4062,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.3511e-01, 5.9780e-02, 2.6265e-03,  ..., 1.4025e-19, 1.3176e-19,
         1.2770e-19],
        [9.9988e-01, 9.6100e-05, 1.6700e-05,  ..., 3.3082e-18, 3.2064e-18,
         2.2737e-18]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9351, 0.9949, 0.9975,  ..., 1.0000, 1.0000, 1.0000],
        [0.9999, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True, False, False,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([2])


tensor([0, 1], device='cuda:0')


carryover_candidates
torch.Size([2, 109])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 


carryover_candidate_logprobs
torch.Size([2])


tensor([-1.0190e+13, -1.2666e+13], device='cuda:0', dtype=torch.float64)


new_candidate_toks
torch.Size([2, 1])


tensor([[278],
        [292]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([2])


tensor([-0.0671, -0.0001], device='cuda:0')


new_candidates
torch.Size([2, 110])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958, 


new_candidate_logprobs
torch.Size([2])


tensor([-1.0190e+13, -1.2666e+13], device='cuda:0', dtype=torch.float64)


candidate_parents


[[0], [1]]

event: top_p
id: 98-p
data: [{"content": "the", "parents": [0], "prob": 0.0}, {"content": "ing", "parents": [1], "prob": 0.0}]





num_batches 1
infer start: GPU memory used: 17187 MB.

batch_candidates
torch.Size([2, 110])

batch_candidate_logprobs
torch.Size([2])

batch_logits
torch.Size([2, 110, 32064])

hidden_states[-1]
torch.Size([2, 110, 3072])
infer - after batch run: GPU memory used: 17187 MB.

candidates
torch.Size([2, 110])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958, 


candidate_logprobs
torch.Size([2])


tensor([-1.0190e+13, -1.2666e+13], device='cuda:0', dtype=torch.float64)


last_tok_logits
torch.Size([2, 32064])


tensor([[10.5625,  7.8438,  7.1875,  ...,  0.0000,  0.0000,  0.0000],
        [ 9.2500,  6.2188,  6.5312,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')


sorted_logits
torch.Size([2, 32064])

sorted_indices
torch.Size([2, 32064])

sorted_probs
torch.Size([2, 32064])


tensor([[9.0575e-01, 7.4348e-02, 1.2920e-02,  ..., 1.0357e-18, 9.1396e-19,
         7.1180e-19],
        [5.1942e-01, 2.7803e-01, 1.4882e-01,  ..., 2.6618e-18, 1.6145e-18,
         1.5166e-18]], device='cuda:0')

tensor([1.0000, 1.0000], device='cuda:0')


cum_probs
torch.Size([2, 32064])


tensor([[0.9057, 0.9801, 0.9930,  ..., 1.0000, 1.0000, 1.0000],
        [0.5194, 0.7975, 0.9463,  ..., 1.0000, 1.0000, 1.0000]],
       device='cuda:0')


keep_indices
torch.Size([2, 32064])


tensor([[ True, False, False,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')


new_candidate_parents
torch.Size([4])


tensor([0, 1, 1, 1], device='cuda:0')


carryover_candidates
torch.Size([4, 110])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827,   278],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958, 


carryover_candidate_logprobs
torch.Size([4])


tensor([-1.0190e+13, -1.2666e+13, -1.2666e+13, -1.2666e+13], device='cuda:0',
       dtype=torch.float64)


new_candidate_toks
torch.Size([4, 1])


tensor([[  376],
        [  278],
        [  376],
        [19223]], device='cuda:0')


new_candidate_tok_logprobs
torch.Size([4])


tensor([-0.0990, -0.6550, -1.2800, -1.9050], device='cuda:0')


new_candidates
torch.Size([4, 111])


tensor([[    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378,   297,  4958,   310, 19224,  2038,  7205,
          3233,   338,  8040, 18274,   342, 29889,  5976,   630,   297,   278,
           379,  3039,   388,   294, 29892,   373,   278,  5139,  1546, 22806,
           284,   322,   278, 28273,   681,  5120,   310, 19429,   300,   297,
          7551, 29892,  8040, 18274,   342, 15028,   472,   385, 21210,   573,
          3171,   310, 14235, 29871, 29947, 29892, 29947, 29946, 29947, 29889,
         29947, 29953, 27881,   313, 29906, 29929, 29892, 29900, 29941, 29896,
         29889, 29955,  6900,   511,   408, 10325, 17005,   491,   278,  7551,
         29899, 29940,  1022,   284,  5139,  6890, 17327, 29889, 32007, 32001,
           739, 29915, 29879,  4100,   304,  4443,   393,  3683,  2827,   278,
           376],
        [    1, 32010,  1724,   338,   278,  9939, 14378, 29973, 29871, 32007,
         32001,   450,  9939, 14378


new_candidate_logprobs
torch.Size([4])


tensor([-1.0190e+13, -1.2666e+13, -1.2666e+13, -1.2666e+13], device='cuda:0',
       dtype=torch.float64)


candidate_parents


[[0], [1], [1], [1]]

event: top_p
id: 99-p
data: [{"content": "\"", "parents": [0], "prob": 0.0}, {"content": "the", "parents": [1], "prob": 0.0}, {"content": "\"", "parents": [1], "prob": 0.0}, {"content": "mountains", "parents": [1], "prob": 0.0}]




event: level
id: END
data: []




