In [1]:
import torch
from transformers import AutoTokenizer
from deeptrust.models.llama.modeling_llama import LlamaForCausalLM, COMMIT_CONFIG

model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", device_map="cuda", torch_dtype=torch.float32)
model

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:26<00:00,  6.69s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [2]:
import time
from pathlib import Path

COMMIT_DIR = Path("/tmp/deeptrust-commits")
COMMIT_DIR.mkdir(exist_ok=True)

def get_commit_path_from_time():
    return COMMIT_DIR / f"{int(time.time())}.log"

get_commit_path_from_time()

PosixPath('/tmp/deeptrust-commits/1729411585.log')

In [3]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", use_fast=True)

input_text = "What is proto-danksharding and how is it related to eth sharding?"
commit_file = get_commit_path_from_time().open("w")
print(f"Writing commit to {commit_file.name}")
COMMIT_CONFIG.commit_file = commit_file
commit_file.write(input_text)
commit_file.write("\n")

input_ids = tokenizer(input_text, return_tensors="pt").input_ids

output = model.generate(input_ids.cuda(), do_sample=True, max_length=100, num_return_sequences=1)

commit_file.write(tokenizer.decode(output[0]))

commit_file.close()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Writing commit to /tmp/deeptrust-commits/1729411586.log


Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


In [4]:
output[0]

tensor([128000,   3923,    374,  18940,   1773,   1201,    939,  29510,    323,
          1268,    374,    433,   5552,    311,   8537,    559,  29510,     30,
           482,  33986,     50,   5185,    198,     36,  19041,    372,    753,
         13707,    527,   3318,    389,    559,  29510,     11,    264,  94840,
          6425,    430,   1436,   5376,    279,   4009,    753,   8824,    311,
          1920,  14463,     13,  58777,   1773,   1201,    939,  29510,    374,
           264,   1401,   3777,    315,    420,   3197,    627,   2059,  29510,
           374,    264,  94840,   6425,    430,  18065,  45473,    279,  35046,
          4009,   1139,   9333,     11,   9678,   5315,    315,   7954,   2663,
         75210,     13,   9062,  53169,    690,    387,   8647,    369,   8863,
           264,  13651,    315,    279,   4009,    753,  14463,     11,    902,
          1288], device='cuda:0')

In [5]:
# input_text = """\
# What is proto-danksharding and how is it related to eth sharding??
# Proto-danksharding is a proposal by Vitalik Buterin that aims to improve the scalability and security of the Ethereum network. It is a precursor to the more advanced sharding solution called Danksharding.
# Danksharding is a sharding solution that was proposed by Vitalik Buterin in 2020. It aims to improve the scalability and security of the Ethereum network by breaking it down into\
# """
# input_text = """\
# What is proto-danksharding and how is it related to eth sharding??
# Proto-danksharding is a
# """
# 
commit_file = get_commit_path_from_time().open("w")
print(f"Writing commit to {commit_file.name}")
COMMIT_CONFIG.commit_file = commit_file
COMMIT_CONFIG.input_prompt_length = 18
# 
# input_ids = tokenizer(input_text, return_tensors="pt").input_ids
# 
# commit_file.write(tokenizer.decode(input_ids[0][:COMMIT_CONFIG.input_prompt_length], skip_special_tokens=True))
# commit_file.write("\n")

with torch.inference_mode():
    _ = model(output[:, :24])

commit_file.write(input_text)
commit_file.close()

Writing commit to /tmp/deeptrust-commits/1729411593.log


In [6]:
input_text = "What is proto-danksharding and how is it related to eth sharding?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
input_ids

tensor([[128000,   3923,    374,  18940,   1773,   1201,    939,  29510,    323,
           1268,    374,    433,   5552,    311,   8537,    559,  29510,     30]])

In [7]:
input_text = """\
What is proto-danksharding and how is it related to eth sharding??
Proto-danksharding is a proposal by Vitalik Buterin that aims to improve the scalability and security of the Ethereum network. It is a precursor to the more advanced sharding solution called Danksharding.
Danksharding is a sharding solution that was proposed by Vitalik Buterin in 2020. It aims to improve the scalability and security of the Ethereum network by breaking it down into\
"""
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
input_ids

tensor([[128000,   3923,    374,  18940,   1773,   1201,    939,  29510,    323,
           1268,    374,    433,   5552,    311,   8537,    559,  29510,     30,
           5380,  32649,   1773,   1201,    939,  29510,    374,    264,  14050,
            555,  55371,   1609,   2030,  85509,    430,  22262,    311,   7417,
            279,  94840,    323,   4868,    315,    279,  35046,   4009,     13,
           1102,    374,    264,  71261,    311,    279,    810,  11084,    559,
          29510,   6425,   2663,  71507,    939,  29510,    627,     35,   1201,
            939,  29510,    374,    264,    559,  29510,   6425,    430,    574,
          11223,    555,  55371,   1609,   2030,  85509,    304,    220,   2366,
             15,     13,   1102,  22262,    311,   7417,    279,  94840,    323,
           4868,    315,    279,  35046,   4009,    555,  15061,    433,   1523,
           1139]])