In [1]:
import torch

from transformers import AutoModelForCausalLM, pipeline, GPT2LMHeadModel, GPT2Tokenizer

import numpy as np

In [32]:
from model.tokens import Token, TOKEN_TYPE_EXPRESSIONS, TOKEN_TYPE_ANSWERS
from model.equation_interpreter import Equation
from model.vocabulary import Vocabulary
from model.tokens import Token

In [3]:
# Create a combined vocabulary
vocabulary = Vocabulary.construct_from_list(TOKEN_TYPE_EXPRESSIONS + TOKEN_TYPE_ANSWERS)
vectorized_sample = vocabulary.vectorize(["#", "/", "0", "-1", "[SEP]", "TT_INTEGER"])
vectorized_sample, [vocabulary.getToken(idx) for idx in vectorized_sample]

# Global variables
model_name = "JustSumAI"
project_name = "JustSumAI"
repo_name = f"{model_name}_cleaned_gpt2_data"

In [4]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

input_text = "This is my input sequence."
input_ids = tokenizer.encode(input_text, return_tensors='pt')
input_ids, input_ids.size()

(tensor([[1212,  318,  616, 5128, 8379,   13]]), torch.Size([1, 6]))

# Load model

In [9]:
model = GPT2LMHeadModel.from_pretrained(f"Dragonoverlord3000/{model_name}", cache_dir="..")
model

Downloading (…)lve/main/config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading pytorch_model.bin:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(68, 1600)
    (wpe): Embedding(1024, 1600)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-47): 48 x GPT2Block(
        (ln_1): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1600, out_features=68, bias=False)
)

In [35]:
test_example_ids = torch.LongTensor([vocabulary.vectorize(["#", "/", "0", "-1"])[:-1] + [vocabulary.separator_index]])
test_example_ids[0], test_example_ids.size()

(tensor([64, 40, 39, 19,  9, 67]), torch.Size([1, 6]))

In [36]:
test = model(test_example_ids).logits
print(test.size())
[vocabulary.getToken(torch.argmax(o).item()) for o in test[0]]

torch.Size([1, 6, 68])


['TT_INTEGER', 'TT_INTEGER', 'TT_ZERO', 'TT_INTEGER', 'TT_ZERO', 'TT_ZERO']

In [37]:
out = model.generate(test_example_ids, 
                     eos_token_id=vocabulary.end_seq_index, 
                     pad_token_id=vocabulary.mask_index)
out, out.size()



(tensor([[64, 40, 39, 19,  9, 67, 51, 49, 49, 49, 44, 60, 58, 49, 49, 49, 53, 60,
          44, 60, 58, 49, 57, 60, 59, 49, 49, 49, 61, 59, 49, 61, 65]]),
 torch.Size([1, 33]))

In [38]:
[vocabulary.getToken(o.item()) for o in out[0]]

['<BEGIN>',
 '#',
 '/',
 '0',
 '-1',
 '[SEP]',
 'TT_ZERO',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_PI',
 'TT_MULTIPLY',
 'TT_PLUS',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_SQRT',
 'TT_MULTIPLY',
 'TT_PI',
 'TT_MULTIPLY',
 'TT_PLUS',
 'TT_INTEGER',
 'TT_LOG',
 'TT_MULTIPLY',
 'TT_MINUS',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_DIVIDE',
 'TT_MINUS',
 'TT_INTEGER',
 'TT_DIVIDE',
 '<END>']

In [39]:
eq = Equation([Token(vocabulary.getToken(o.item())) for o in out[0]][6:-1], notation="postfix")
eq

<model.equation_interpreter.Equation at 0x2ac04d927d0>

In [40]:
eq.getMathmetaicalNotation()

'((Z-(Z/Z))/Z)'

In [21]:
out2 = model(test_example_ids)
out2

CausalLMOutputWithCrossAttentions(loss=None, logits=tensor([[[-4.7814, -4.3675, -4.1686, -3.2316, -3.2816, -1.6199, -3.3658,
          -3.6111, -4.0459, -2.6926, -5.1601, -2.9386, -2.4530, -3.1024,
          -4.8245, -4.4087, -2.9394, -3.4197, -4.9809, -4.0110, -4.5070,
          -4.2573, -4.8574, -3.7422, -4.2806, -2.4575, -3.9825, -4.0106,
          -3.5796, -2.7573, -2.9670, -2.7110, -4.4878, -3.1065, -4.4279,
          -1.9018, -3.9521, -3.4554, -1.6025, -3.0805, -2.7019, -3.9312,
          -4.6762, -4.6417,  1.8085, -4.9880, -5.3125, -3.1860, -4.5953,
           6.0951, -5.0509,  5.9883, -2.3325, -2.9468, -2.8142, -4.8986,
          -5.2478, -0.8302,  0.2941,  1.7131,  1.5417, -0.2900, -4.3382,
          -2.4926,  1.9595, -3.6438, -4.2766,  0.7926],
         [-3.8458, -4.5237, -3.5651, -3.0396, -3.5796, -2.6212, -3.4183,
          -3.8048, -3.5327, -2.7355, -4.8678, -2.3862, -2.5361, -2.8366,
          -5.6516, -4.6812, -3.8775, -3.5892, -4.8070, -4.3252, -4.4857,
          -3.728

In [22]:
out2.logits.size()

torch.Size([1, 6, 68])

In [23]:
out2.logits

tensor([[[-4.7814, -4.3675, -4.1686, -3.2316, -3.2816, -1.6199, -3.3658,
          -3.6111, -4.0459, -2.6926, -5.1601, -2.9386, -2.4530, -3.1024,
          -4.8245, -4.4087, -2.9394, -3.4197, -4.9809, -4.0110, -4.5070,
          -4.2573, -4.8574, -3.7422, -4.2806, -2.4575, -3.9825, -4.0106,
          -3.5796, -2.7573, -2.9670, -2.7110, -4.4878, -3.1065, -4.4279,
          -1.9018, -3.9521, -3.4554, -1.6025, -3.0805, -2.7019, -3.9312,
          -4.6762, -4.6417,  1.8085, -4.9880, -5.3125, -3.1860, -4.5953,
           6.0951, -5.0509,  5.9883, -2.3325, -2.9468, -2.8142, -4.8986,
          -5.2478, -0.8302,  0.2941,  1.7131,  1.5417, -0.2900, -4.3382,
          -2.4926,  1.9595, -3.6438, -4.2766,  0.7926],
         [-3.8458, -4.5237, -3.5651, -3.0396, -3.5796, -2.6212, -3.4183,
          -3.8048, -3.5327, -2.7355, -4.8678, -2.3862, -2.5361, -2.8366,
          -5.6516, -4.6812, -3.8775, -3.5892, -4.8070, -4.3252, -4.4857,
          -3.7281, -5.3745, -3.1497, -4.5261, -3.0888, -4.7982, -3.2

In [24]:
[vocabulary.getToken(torch.argmax(o).item()) for o in out2.logits[0]]

['TT_INTEGER',
 'TT_INTEGER',
 'TT_ZERO',
 'TT_INTEGER',
 'TT_INTEGER',
 'TT_INTEGER']

In [25]:
out2.logits.size()

torch.Size([1, 6, 68])

In [None]:
pred_tokens = [vocabulary.getToken(idx.item()) for idx in out[0]]
pred_tokens, len(pred_tokens)

In [None]:
for i in range(10):
    print(".", end="")
    output = model(test_example_ids).logits
    pred = torch.argmax(output[0][0]).item()
    print([vocabulary.getToken(torch.argmax(out).item()) for out in output[0]])
    test_example_ids = torch.LongTensor([[idx.item() for idx in list(test_example_ids[0])] + [pred]])
    
print()
pred_idx = [idx.item() for idx in test_example_ids[0]]
pred_token = [vocabulary.getToken(idx) for idx in pred_idx]
pred_idx, pred_token

In [None]:
pred_idx = [torch.argmax(out).item() for out in output[0]]
pred_token = [vocabulary.getToken(idx) for idx in pred_idx]
len(output), len(output[0]), pred_idx, pred_token

# Test

In [None]:
# pipe = pipeline('text-generation', model=f"Dragonoverlord3000/{model_name}")
# pipe