# Implementasi pada Mpu-Prapanca

In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
'''
# hyperparameter
batch_size = 64
block_size = 256
max_iters = 3000
eval_interval = 300
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd= 384

Step 2700: train loss 1.6100, val loss 1.9787  --> block_size = 16
Step 2700: train loss 1.1997, val loss 1.6784  --> block_size = 16, n_embd=128
Step 2700: train loss 0.9533, val loss 1.5886  --> block_size = 32, n_embd=128
Step 2700: train loss 0.7735, val loss 1.7474  --> block_size = 32, n_embd=256
Step 2700: train loss 0.3507, val loss 2.3076  --> block_size = 64, n_embd=256
batch_size = 32
block_size = 8
n_embd=32
'''
# hyperparameter
batch_size = 32
block_size = 64
max_iters = 3000
eval_interval = 300
learning_rate = 1e-3
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd= 256

torch.manual_seed(1337)

# We always start with a dataset to train on. Let's download the tiny shakespeare dataset
#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
path = "/content/drive/MyDrive/Pararaton_Mpu_Prapanca.txt"
with open(path, "r", encoding="utf-8") as f:
  text = f.read()

# tokenization
chars = sorted(list(set(text)))
vocab_size = len(chars)

# create a mapping from characters that occur in this text
stoi = { ch:i for i, ch in enumerate(chars)}
itos = { i:ch for i, ch in enumerate(chars)}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join(itos[i] for i in l)

# train and test splits
data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9*len(data))
train_data = data[:n]
val_data = data[n:]

# data loading
def get_batch(split):
  # generate a small batch of data input x and target y
  data = train_data if split == 'train' else val_data
  ix = torch.randint(len(data) - block_size, (batch_size,))
  x = torch.stack([data[i:i+block_size] for i in ix])
  y = torch.stack([data[i+1:i+block_size+1] for i in ix])
  x, y = x.to(device), y.to(device)

  return x, y

@torch.no_grad()
def estimate_loss():
  out = {}
  model.eval()
  for split in ('train', 'val'):
    losses = torch.zeros(eval_iters)
    for k in range(eval_iters):
      X, Y = get_batch(split)
      logits, loss = model(X, Y)
      losses[k] = loss.item()
    out[split] = losses.mean()
  model.train()
  return out

class Head(nn.Module):

  def __init__(self, head_size):
    super().__init__()
    self.key = nn.Linear(n_embd, head_size, bias=False)
    self.query = nn.Linear(n_embd, head_size, bias=False)
    self.value = nn.Linear(n_embd, head_size, bias=False)
    self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

  def forward(self, x):
    B,T,C = x.shape
    k = self.key(x)
    q = self.query(x)

    wei = q @ k.transpose(-2, -1) * C**-0.5
    wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
    wei = F.softmax(wei, dim=-1)

    v = self.value(x)
    out = wei @ v
    return out

class MultiHeadAttention(nn.Module):
  def __init__(self, num_heads, head_size):
    super().__init__()
    self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
    self.proj = nn.Linear(n_embd, n_embd)

  def forward(self, x):
    out = torch.cat([h(x) for h in self.heads], dim=-1)
    out = self.proj(out)
    return out

class FeedForward(nn.Module):

  def __init__(self, n_embd):
    super().__init__()
    self.net = nn.Sequential(
        nn.Linear(n_embd, 4 * n_embd),
        nn.ReLU(),
        nn.Linear(4 * n_embd, n_embd),
    )

  def forward(self, x):
    return self.net(x)


class Block(nn.Module):
  def __init__(self, n_embd, n_head):
    super().__init__()
    # we want to intersperses communitation and then computation
    head_size = n_embd // n_head
    self.sa = MultiHeadAttention(n_head, head_size)  # communication
    self.ffwd = FeedForward(n_embd)                  # computation
    self.ln1 = nn.LayerNorm(n_embd)
    self.ln2 = nn.LayerNorm(n_embd)

  def forward(self, x):
    x = x + self.sa(self.ln1(x))
    x = x + self.ffwd(self.ln2(x))
    return x

# super simple bigram model
class BigramLanguageModel(nn.Module):

  def __init__(self):
    super().__init__()

    self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
    self.position_embedding_table = nn.Embedding(block_size, n_embd)
    self.blocks = nn.Sequential(
        Block(n_embd, n_head=4),
        Block(n_embd, n_head=4),
        Block(n_embd, n_head=4),
        nn.LayerNorm(n_embd),
    )
    #self.sa_head = MultiHeadAttention(4, n_embd//4)  # i.e. 4 heads of 8-dimensional self-attention
    #self.ffwd = FeedForward(n_embd)
    self.lm_head = nn.Linear(n_embd, vocab_size)

  def forward(self, idx, targets=None):
    B, T = idx.shape
    # idx and targets are both (B, T) tensor of integers
    tok_emb = self.token_embedding_table(idx) # (B, T, C)  --> C in here is n_emb
    pos_emb = self.position_embedding_table(torch.arange(T, device=device))  # (T, C)
    x = tok_emb + pos_emb   #(B, T, C)
    x = self.blocks(x) # (B, T, C)
    #x = self.sa_head(x)
    #x = self.ffwd(x)  # (B, T, C)
    logits = self.lm_head(x) # (B, T, vocab_size)

    if targets is None:
      loss = None
    else:
      B, T, C = logits.shape
      logits = logits.view(B*T, C)
      targets = targets.view(B*T)
      loss = F.cross_entropy(logits, targets)

    return logits, loss

  def generate(self, idx, max_new_tokens):
    for _ in range(max_new_tokens):
      idx_cond = idx[:, -block_size:]
      logits, loss = self(idx_cond)
      # foucus only on the last time step
      logits = logits[:, -1, :] # (B, C)
      # apply softmax to get probabilities
      probs = F.softmax(logits, dim=-1) # (B, C)
      # sample from the distribution
      idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
      # append sampled index into running sequence
      idx = torch.cat((idx, idx_next), dim=1) # (B,T+1)
    return idx

model = BigramLanguageModel()
m = model.to(device)

# create pytorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in range(max_iters):

  if iter % eval_interval == 0:
    losses = estimate_loss()
    print(f"Step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

  # sample a batch of data
  xb, yb = get_batch('train')

  # evaluate the loss
  logits, loss = model(xb, yb)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()

# generate from the model
context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(decode(m.generate(context, max_new_tokens=500)[0].tolist()))


Step 0: train loss 4.4443, val loss 4.4305
Step 300: train loss 1.6273, val loss 1.9686
Step 600: train loss 1.1797, val loss 1.6356
Step 900: train loss 0.9775, val loss 1.6093
Step 1200: train loss 0.8042, val loss 1.6270
Step 1500: train loss 0.6769, val loss 1.7539
Step 1800: train loss 0.5534, val loss 1.9015
Step 2100: train loss 0.4542, val loss 2.0352
Step 2400: train loss 0.3978, val loss 2.2546
Step 2700: train loss 0.3455, val loss 2.3562

Pada waktu sunyi orang telah tidur, dikejar dan diamuk lagi oleh Nusapati.
Kata orang orang Batil: "Sudah wafatlah tuan waktu ia menuju ke pergi, jika sudah kereta, gugur, darah seperti rubuh oleh gunung Tugaran Singasari, sempurna takut berkata: "Tuanku, semengamur di jalan, demikian persamaanya.
Lalu itu pagi kekatang saja. Keris berada dian pengelah Gajahpara, hari mengira, sendiring mati oleh hamba hamba Madunan, anaknya seorang kelanjutnya Dewa."
Utusanya demikian persembuatannya saja."
Pad


In [2]:
context = torch.randint(3, (2, 2), device=device)
print(context)
print(decode(m.generate(context, max_new_tokens=50)[0].tolist()))

tensor([[0, 1],
        [0, 1]], device='cuda:0')

 ada waktu aku memuja".
Menjawablah Ken Angrok: "Be


In [3]:
context = torch.randint(50, (5,), device=device)
print(context)
print(context[0])
print(context[0].tolist())
print(decode(context.tolist()))

tensor([39,  6,  8,  9, 34], device='cuda:0')
tensor(39, device='cuda:0')
39
U-01O


In [4]:
torch.tensor((1, 1), dtype=torch.long, device=device)

tensor([1, 1], device='cuda:0')

In [5]:
torch.tensor([[1]])

tensor([[1]])

In [8]:
my_text = 'raja'
print(encode(my_text))
text = torch.tensor([encode(my_text)], dtype=torch.long, device=device)
print(text)

[60, 44, 53, 44]
tensor([[60, 44, 53, 44]], device='cuda:0')


In [9]:
print(decode(m.generate(text, max_new_tokens=50)[0].tolist()))

raja merusuh.
Adalah seorang neDewa, Bangka, Mpu Palot


In [10]:
print(decode(m.generate(text, max_new_tokens=500)[0].tolist()))

raja tidur. Raden Wijaya ditusukkan, hendaknyalah kamu wahai utusan, buyung Angrok, akhirnya mati oleh gemba." Segera Singasarilah waktu lahir ada dewa: "Yang bagan buah jambu tanda baik, sayembah kepada keris kepada daerah lingkungan kutu: Ken Angrok, semua tingkah laku Ken Endok, sedang cinta mencintai para kepada hari raya, hari tiga, hari dua, hari sif, pergilah yang mengejar. Ketersesampatan waktu ia menuju ke Jawa, tidak berlah yang menggantinya, lalu diberi raja Mantrolot itu, memanggil buru 


In [11]:
print(decode(m.generate(text, max_new_tokens=1000)[0].tolist()))

raja, tak laki Adapun di Tuyantapang daerah lingkungan Bapa, kemudian dijumpai anaknya hamba kerjanjian itu, kalau tuan pulang, kalau Ken Angrok memang sungguh sungga rah: "masih pada waktu seorang kelamaan ketahuannya, bergantung di Tumapel.
Sang Anusapati mempunyai seorang anak, : "Baiklah, kakan kuda masuk kedalam istana Daha, untuk melarikan puteri bangsawan, hendaknyalah kamu menyuruh membuat keris kepadanya, sungguh ia bersediga.
Sesudah Sang Amurwabumi sebuara perempuan yang diindahkan melihat penuh, gugur, tak melamakan pembalasan, tidak, saya akan tahu kanan kamu, buyung, agar supaya hamba dapat menghamba kepada Tunggul Ametung oleh Ken Endok, maka keluar aingku dan didalam soal soal yang luar biasa."
Ken Angrok pergi dari Karuman.
Kata kepada Ken Angrok: "Betulah tuanku kepada karena jumlah orang yang tidur, ditusuk oleh Tuan, tak untuk didjadikan dan persekutuannya: "Nah, bawa berjudi di Tumapel, nama nobatan Batara Guru, demikian itu pujian dari dewaktu ia orang yang mengge