In [142]:
import matplotlib.pyplot as plt
def visMatr(matrix):
    plt.imshow(matrix, cmap='viridis', interpolation='nearest')
    plt.colorbar()
    plt.title('Matrix Visualization')
    plt.show()

In [150]:
import os
import torch
import torch.nn as nn
from model import Transformer, ModelParameters
from data.tokenizer import RegexTokenizer

models_folder = "models"
tokenizer_folder = "data"
model_name = "alilama_15M"

tok_path = os.path.join("data", "tokenizer.model")
try:
    tokenizer = RegexTokenizer()
    tokenizer.load(tok_path)
except:
    raise RuntimeError(f"tokenizer not found. Make sure {tok_path} exists")

device = "cpu"
token_count = len(tokenizer)
model_path = os.path.join(models_folder, model_name+".pth")

if os.path.exists(model_path):
    loaded_state = torch.load(model_path, map_location=device)
    inpt_params = loaded_state["params"]
    params = ModelParameters(**inpt_params)
    print("parameters:", inpt_params)
    params.max_seq_len = 10000 # increase as seen in ALiBi paper
    model = Transformer(token_count=token_count, device=device, params=params)
    model.load_state_dict(loaded_state['state_dict'])
    print("Model loaded successfully.")
else:
    print("Could not find model!")

def generateStream(size, inpt_tokens=None, temperature=1):
    for res in model.genLazy(size, inpt_tokens=inpt_tokens, temperature=temperature):
        l = [res[0][-1]]
        yield tokenizer.decode(l)

Model loaded successfully.


In [171]:
# this is the story the model tries to continue:
start_story = 'Mike loves to ride on his bike.' # Don't use a trailing space!!!
# REASONING TEST (sometimes works). We expect something like "come down.":
# start_story = 'If i throw a ball into the air, it will eventually'
modelInput = " STARTSTORY " + start_story
temperature = 100 # how deterministic the model decides the next token (0 = random)
model.train()
as_tens = tokenizer.encode_ordinary(modelInput)
print(len(as_tens))
as_tens = torch.tensor(as_tens).unsqueeze(0).to(device)
for l in generateStream(400, as_tens, temperature=temperature):
    print(l, end="")

# show attention scores
"""att_scores = model.getAttScores()
first_block = att_scores[0]
for head in range(first_block.shape[1]):
    att = first_block[0,head,:,:].detach().numpy()
    visMatr(att)"""

print("")

10
 He had a big bike with a lot of wheels and a bell. One day, he saw a little girl who was sad. She had lost her toy. Mike wanted to help her. He asked his mom, "Mom, have you seen my toy?" His mom said, "No, but let's look for it together." They looked and looked, but they could not find the toy. Then, Mike had an idea. He said, "I will help you find your toy." They looked everywhere, but they could not find the toy. Mike was sad. Then, Mike had an idea. He said, "Let's ask our mom for help." They went to their mom and asked, "Mom, can we help you find your toy?" Her mom smiled and said, "Yes, we can help you find it." They looked and looked, and finally, they found the toy under a big tree. The little girl was so happy! She said, "Thank you, Mike!" They all played together and had a fun day. STARTSTORY Once upon a time, there was a little girl named Lily. She loved to play outside in the sunshine. One day, she saw a big, scary dog. The dog was very friendly and Lily wanted to pet i