In [1]:
import torch
import sys
sys.path.append('/home/deniz/Masaüstü/llmdersi')  # v2'nin bir üst dizini

from v2.usta_model import UstaModel
from v2.usta_tokenizer import UstaTokenizer

device = "cpu"

if torch.cuda.is_available():
  device = "cuda"
elif torch.backends.mps.is_available():
  device = "mps"
  

print(f"Using device: {device}")

u_tokenizer = UstaTokenizer("newtokenizer.json")

prompts = [
  "the capital of the united",
  "madrid is in",
  "the capital of france is",
  "the capital of germany is"
]

tokens = u_tokenizer.encode(prompts[0])
tokens = tokens.to(device)
print(tokens)
batch_tokens = u_tokenizer.encode_batch(prompts, 32)
batch_tokens = batch_tokens.to(device)
batch_tokens.shape

Using device: cpu
tensor([ 0, 61,  1, 61,  2, 61,  0, 61,  3])


torch.Size([4, 32])

In [2]:
torch.manual_seed(1)
context_length = 32

u_model = UstaModel(
  vocab_size=len(u_tokenizer.vocab),
  embedding_dim=12,
  num_heads=4,
  context_length=context_length,
  num_layers=8,
  device=device
)

# load model
u_model.load_state_dict(torch.load("./u_model_4000.pth"))

RuntimeError: Error(s) in loading state_dict for UstaModel:
	Unexpected key(s) in state_dict: "layers.0.self_attention.mask", "layers.1.self_attention.mask", "layers.2.self_attention.mask", "layers.3.self_attention.mask", "layers.4.self_attention.mask", "layers.5.self_attention.mask", "layers.6.self_attention.mask", "layers.7.self_attention.mask". 

In [3]:
out = u_model(batch_tokens)
out.shape

torch.Size([4, 32, 64])

In [4]:
# temperature
# top_k 
# top_p


In [5]:
top_k = 10

In [6]:
sorted_outs = sorted(out[-1][-1].tolist(), reverse=True)
sorted_indexes = []
for so in sorted_outs[:top_k]:
  so_index = out[-1][-1].tolist().index(so)
  sorted_indexes.append(so_index)
sorted_outs = torch.tensor(sorted_outs[:top_k])
sorted_outs, sorted_indexes


(tensor([17.5470, 13.6248,  9.1306,  8.0484,  7.2238,  7.1877,  7.1396,  6.7132,
          6.3709,  6.3608]),
 [61, 60, 35, 58, 38, 18, 9, 59, 11, 49])

In [7]:
values, indexes = torch.topk(out[-1][-1], k=10)
values, indexes

(tensor([17.5470, 13.6248,  9.1306,  8.0484,  7.2238,  7.1877,  7.1396,  6.7132,
          6.3709,  6.3608], grad_fn=<TopkBackward0>),
 tensor([61, 60, 35, 58, 38, 18,  9, 59, 11, 49]))

In [8]:
temperature = 10.51
adjusted_outs = torch.tensor(sorted_outs) / temperature
adjusted_outs

  adjusted_outs = torch.tensor(sorted_outs) / temperature


tensor([1.6695, 1.2964, 0.8688, 0.7658, 0.6873, 0.6839, 0.6793, 0.6387, 0.6062,
        0.6052])

In [9]:
probs = torch.softmax(adjusted_outs, dim=-1)
probs

tensor([0.2124, 0.1462, 0.0953, 0.0860, 0.0795, 0.0793, 0.0789, 0.0758, 0.0733,
        0.0733])

In [10]:
top_p = 0.7

In [11]:
[0.2128, 0.36, 0.37, 0.38, 0.70, 0.71]
torch.sum(torch.tensor([0.2128, 0.1509, 0.0932, 0.0884]))

tensor(0.5453)

In [12]:
sample_count = {}
for _ in range(1000):
  sample = torch.multinomial(probs, 1)
  sample_count[sample.item()] = sample_count.get(sample.item(), 0) + 1
sample_count

{2: 93, 8: 71, 0: 219, 6: 89, 7: 78, 1: 140, 4: 87, 3: 80, 9: 63, 5: 80}

In [27]:
out = u_model.generate(tokens,max_new_tokens=3,temperature=0.00000051)
u_tokenizer.decode(out)

'the capital of the united europe '

In [13]:
outs = {}
for _ in range(100):
  out = u_model.generate(tokens, max_new_tokens = 3, temperature = 1.7, top_k = 10, top_p = 0.7)
  decoded = u_tokenizer.decode(out)
  outs[decoded] = outs.get(decoded, 0) + 1
outs

{'the capital of the united and ': 1,
 'the capital of the united.': 3,
 'the capital of the united own.': 1,
 'the capital of the united the ': 18,
 'the capital of the united europe ': 24,
 'the capital of the united capitals': 4,
 'the capital of the united its ': 2,
 'the capital of the united country ': 8,
 'the capital of the united a ': 1,
 'the capital of the united identitys': 2,
 'the capital of the united is ': 4,
 'the capital of the united place ': 2,
 'the capital of the united spain,': 2,
 'the capital of the united capital,': 3,
 'the capital of the united states': 1,
 'the capital of the united united ': 5,
 'the capital of the united capital ': 4,
 'the capital of the united has ': 1,
 'the capital of the united europe.': 2,
 'the capital of the united isberlin': 1,
 'the capital of the united city,': 1,
 'the capital of the united madrid ': 1,
 'the capital of the united europe,': 1,
 'the capital of the united together ': 1,
 'the capital of the united,, ': 1,
 'the