In [1]:
from koRKut_tokenizer import koRKutTokenizer

korkut_tokenizer = koRKutTokenizer(vocab_file = "tokenizer.json", emptiness=61)

In [2]:
sentence = "the capital of the united"

In [3]:
out1 = korkut_tokenizer.encode(sentence)
out1

tensor([ 0, 61,  1, 61,  2, 61,  0, 61,  3])

In [4]:
korkut_tokenizer.tokenize(sentence)

['the', ' ', 'capital', ' ', 'of', ' ', 'the', ' ', 'united']

In [5]:
korkut_tokenizer.decode(([ 0, 61,  1, 61,  2, 61,  0, 61,  3]))

'the capital of the united'

In [6]:
from koRKut_embedding import koRKutEmbedding

korkut_embedding = koRKutEmbedding(vocab_size=len(korkut_tokenizer.vocab), embedding_dim=4, context_length=12)

In [7]:
out2 = korkut_embedding(out1)
out2

tensor([[[ 0.0285,  0.0900,  1.4076,  0.1380],
         [-0.4825,  2.1911,  0.0491,  0.4959],
         [-0.1496, -0.4199,  0.8769, -0.6559],
         [ 0.1561,  2.0489, -0.4592,  0.9213],
         [-0.6635,  0.0211,  0.1754, -1.0920],
         [ 0.3526,  1.8250,  0.3331,  1.3100],
         [ 0.4207, -0.0036,  1.3435,  0.1647],
         [-0.4496,  1.5284,  0.1820,  1.6464],
         [ 0.7509, -0.2153,  0.6908,  0.4892]]], grad_fn=<CopySlices>)

In [8]:
from koRKut_multi_head_attention import koRKutMultiHeadAttention

korkut_multi_head_attention = koRKutMultiHeadAttention(embedding_dim=4, output_dim=4, context_length=12, num_heads=2)

In [9]:
out3 = korkut_multi_head_attention(out2)
out3

tensor([[[ 0.5374, -0.2896,  0.0755,  0.4013],
         [ 0.5960, -0.3443,  0.1177,  0.4163],
         [ 0.4399, -0.1589, -0.0371,  0.3577],
         [ 0.5951, -0.1945,  0.0238,  0.3687],
         [ 0.4857, -0.0378, -0.1123,  0.3144],
         [ 0.6201, -0.3145,  0.1132,  0.4099],
         [ 0.4846, -0.2676,  0.0469,  0.3940],
         [ 0.2393, -0.2128,  0.0387,  0.4167],
         [ 0.3378, -0.3104,  0.0522,  0.4153]]], grad_fn=<ViewBackward0>)

In [10]:
from torch.nn.functional import cosine_similarity

v1 = out3[0,0]  # token 1
v2 = out3[0,1]  # token 2

cosine_similarity(v1, v2, dim=0)


tensor(0.9981, grad_fn=<SumBackward1>)

In [11]:
from koRKut_layer_norm import koRKutLayerNorm

korkut_layer_norm = koRKutLayerNorm(embedding_dim=4)

out4 = korkut_layer_norm(out3)
out4

tensor([[[ 1.1152, -1.4736, -0.3308,  0.6892],
         [ 1.1227, -1.5194, -0.2212,  0.6179],
         [ 1.1408, -1.2186, -0.7389,  0.8167],
         [ 1.3025, -1.2892, -0.5728,  0.5594],
         [ 1.3107, -0.8124, -1.1144,  0.6160],
         [ 1.1766, -1.4866, -0.2677,  0.5777],
         [ 1.0736, -1.4489, -0.3943,  0.7696],
         [ 0.5071, -1.4221, -0.3491,  1.2641],
         [ 0.7517, -1.5243, -0.2511,  1.0237]]], grad_fn=<MulBackward0>)

In [12]:
from koRKut_multi_layer_perceptron import koRKutMultiLayerPerceptron

korkut_multi_layer_perceptron = koRKutMultiLayerPerceptron(embedding_dim=4, hidden_dim=2)

out5 = korkut_multi_layer_perceptron(out4)
out5

tensor([[[ 0.2511, -0.1641, -0.4415,  0.6382],
         [ 0.2539, -0.1628, -0.4480,  0.6400],
         [ 0.2492, -0.1766, -0.4118,  0.6328],
         [ 0.2389, -0.1636, -0.4262,  0.6328],
         [ 0.2830, -0.2229, -0.3568,  0.6317],
         [ 0.2489, -0.1602, -0.4469,  0.6386],
         [ 0.2526, -0.1670, -0.4372,  0.6378],
         [ 0.3025, -0.1940, -0.4458,  0.6511],
         [ 0.2837, -0.1835, -0.4431,  0.6462]]], grad_fn=<ViewBackward0>)

In [15]:
from koRKut_decoder_block import koRKutDecoderBlock

korkut_decoder_block = koRKutDecoderBlock(embedding_dim=4, num_heads=2, context_length=12)

out6 = korkut_decoder_block(out2)
out6

tensor([[[-2.6538, -0.1789,  1.2974,  1.5353],
         [-1.6608,  0.1866, -0.2448,  1.7190],
         [-2.5279, -0.1254,  1.6113,  1.0421],
         [-1.7582,  0.3227,  0.0817,  1.3538],
         [-1.9011, -0.0102,  1.0659,  0.8454],
         [-1.7711,  0.1525, -0.4099,  2.0284],
         [-2.5621, -1.1675,  1.2379,  2.4917],
         [-2.0898,  0.0537, -0.3368,  2.3729],
         [-2.6118, -1.2180,  1.7710,  2.0588]]], grad_fn=<AddBackward0>)

In [16]:
from koRKut_model import koRKutModel

korkut_model = koRKutModel(vocab_size=len(korkut_tokenizer.vocab), embedding_dim=4, num_heads=4, context_length=12, num_layers=2)

In [17]:
korkut_model

koRKutModel(
  (embedding): koRKutEmbedding(
    (embedding): Embedding(64, 4)
  )
  (layers): Sequential(
    (0): koRKutDecoderBlock(
      (self_attention): koRKutMultiHeadAttention(
        (multi_head_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=4, out_features=4, bias=True)
        )
        (projection): Linear(in_features=4, out_features=4, bias=True)
      )
      (norm_layer1): koRKutLayerNorm()
      (multiLayerPerceptron): koRKutMultiLayerPerceptron(
        (gate_proj): Linear(in_features=4, out_features=4, bias=False)
        (up_proj): Linear(in_features=4, out_features=4, bias=False)
        (down_proj): Linear(in_features=4, out_features=4, bias=True)
        (silu): SiLU()
      )
      (norm_layer2): koRKutLayerNorm()
    )
    (1): koRKutDecoderBlock(
      (self_attention): koRKutMultiHeadAttention(
        (multi_head_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_featur