In [1]:
import torch
from transformers import AutoTokenizer
from deeptrust.models.llama.modeling_llama import LlamaForCausalLM
import time
from pathlib import Path

model_name = "meta-llama/Llama-3.1-8B-Instruct"
model = LlamaForCausalLM.from_pretrained(model_name, device_map="cuda", torch_dtype=torch.float32)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

COMMIT_DIR = Path("/tmp/deeptrust-commits")
COMMIT_DIR.mkdir(exist_ok=True)

def get_commit_path_from_time():
    return COMMIT_DIR / f"{int(time.time())}.log"

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:27<00:00,  6.82s/it]


# Happy Path

In [2]:
from deeptrust.commits import Commit

input_text = "What is proto-danksharding and how is it related to eth sharding?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

model.deeptrust_commit = Commit(
    model_name=model_name,
    device="cuda",
    dtype="float32",
    engine="transformers",
    hashes=[],
    completion=None,
    input_tokens=len(input_ids[0]),
    generation_config={"do_sample": True, "max_length": 24, "num_return_sequences": 1},
)

output = model.generate(input_ids.cuda(), do_sample=True, max_length=24, num_return_sequences=1)

model.deeptrust_commit.completion = [i.item() for i in output[0]]
model.deeptrust_commit.to_file("./happy-gen.json")
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


<|begin_of_text|>What is proto-danksharding and how is it related to eth sharding? 
Proto-danksharding


In [3]:
model.deeptrust_commit.hashes

['float32[1, 18, 4096](73728, 4096, 1)<aa928465160a2bcfd32412cd6b89f643>',
 'float32[1, 1, 4096](4096, 4096, 1)<fa6fc26a964eebe8e885a03a64975dc3>',
 'float32[1, 1, 4096](4096, 4096, 1)<b9e40dd1771b575c7948658d5c6b69ca>',
 'float32[1, 1, 4096](4096, 4096, 1)<f15eb116b1748a119209b27ead0733d3>',
 'float32[1, 1, 4096](4096, 4096, 1)<a1788b73b649b1ab7cf111520e0ad429>',
 'float32[1, 1, 4096](4096, 4096, 1)<63d0cb508a8ad9f5bcefa99454c0e242>']

In [4]:
model.deeptrust_commit.hashes = []

with torch.inference_mode():
    _ = model(output[:, :-1])

model.deeptrust_commit.to_file("./happy-val.json")
model.deeptrust_commit.hashes

torch.Size([1, 23, 4096]) 94208 73728
torch.Size([1, 23, 4096]) 94208 77824
torch.Size([1, 23, 4096]) 94208 81920
torch.Size([1, 23, 4096]) 94208 86016
torch.Size([1, 23, 4096]) 94208 90112


['float32[1, 18, 4096](73728, 4096, 1)<aa928465160a2bcfd32412cd6b89f643>',
 'float32[1, 1, 4096](4096, 4096, 1)<fa6fc26a964eebe8e885a03a64975dc3>',
 'float32[1, 1, 4096](4096, 4096, 1)<b9e40dd1771b575c7948658d5c6b69ca>',
 'float32[1, 1, 4096](4096, 4096, 1)<f15eb116b1748a119209b27ead0733d3>',
 'float32[1, 1, 4096](4096, 4096, 1)<a1788b73b649b1ab7cf111520e0ad429>',
 'float32[1, 1, 4096](4096, 4096, 1)<63d0cb508a8ad9f5bcefa99454c0e242>']

# Bad path

In [5]:
bad_model_name = "meta-llama/Llama-3.2-1B"
bad_model = LlamaForCausalLM.from_pretrained(bad_model_name, device_map="cuda", torch_dtype=torch.bfloat16)
bad_tokenizer = AutoTokenizer.from_pretrained(bad_model_name, use_fast=True)

In [6]:
from deeptrust.commits import Commit

input_text = "What is proto-danksharding and how is it related to eth sharding?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

bad_model.deeptrust_commit = Commit(
    model_name=model_name,
    device="cuda",
    dtype="float32",
    engine="transformers",
    hashes=[],
    completion=None,
    input_tokens=len(input_ids[0]),
    generation_config={"do_sample": True, "max_length": 24, "num_return_sequences": 1},
)

output = bad_model.generate(input_ids.cuda(), do_sample=True, max_length=24, num_return_sequences=1)

model.deeptrust_commit.completion = [i.item() for i in output[0]]
model.deeptrust_commit.to_file("./bad-gen.json")
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


<|begin_of_text|>What is proto-danksharding and how is it related to eth sharding? What is the difference between proto


In [7]:
bad_model.deeptrust_commit.hashes

['bfloat16[1, 18, 2048](36864, 2048, 1)<01999b437754b382a5d466021fe4f175>',
 'bfloat16[1, 1, 2048](2048, 2048, 1)<f5c1df2c601a6943daf6504767766e99>',
 'bfloat16[1, 1, 2048](2048, 2048, 1)<9f6093e8ece73bc15bdacaa9f873fb16>',
 'bfloat16[1, 1, 2048](2048, 2048, 1)<d772bcc340ad20e21e1325368fea8108>',
 'bfloat16[1, 1, 2048](2048, 2048, 1)<0f83b21c0a1e072984d81beed14b1532>',
 'bfloat16[1, 1, 2048](2048, 2048, 1)<26e206f6a0a6ab5bc0892762e1fe16b8>']

In [8]:
model.deeptrust_commit.hashes = []

with torch.inference_mode():
    _ = model(output[:, :-1])

model.deeptrust_commit.to_file("./bad-val.json")
model.deeptrust_commit.hashes

torch.Size([1, 23, 4096]) 94208 73728
torch.Size([1, 23, 4096]) 94208 77824
torch.Size([1, 23, 4096]) 94208 81920
torch.Size([1, 23, 4096]) 94208 86016
torch.Size([1, 23, 4096]) 94208 90112


['float32[1, 18, 4096](73728, 4096, 1)<aa928465160a2bcfd32412cd6b89f643>',
 'float32[1, 1, 4096](4096, 4096, 1)<e6fdeb0f56fa4f69aa15ceec58e5d5d0>',
 'float32[1, 1, 4096](4096, 4096, 1)<7bfc01e4d9f72657e1f33657037954c4>',
 'float32[1, 1, 4096](4096, 4096, 1)<747ff6a5eca11c002f328322d635ad69>',
 'float32[1, 1, 4096](4096, 4096, 1)<560efd127e41d5fd69dac3c3036cbbd4>',
 'float32[1, 1, 4096](4096, 4096, 1)<1b16647798b96feea6c83ea09ad32b2a>']

# Validation

In [13]:
%%bash
diff happy-gen.json happy-val.json 2>&1 > /dev/null
echo $?

0


In [14]:
%%bash
diff bad-gen.json bad-val.json 2>&1 > /dev/null
echo $?

1
