In [1]:
import matplotlib.pyplot as plt
def visMatr(matrix):
    plt.imshow(matrix, cmap='viridis', interpolation='nearest')
    plt.colorbar()
    plt.title('Matrix Visualization')
    plt.show()

In [2]:
import os
import torch
import torch.nn as nn
from model import Transformer, ModelParameters
from data.tokenizer import RegexTokenizer

models_folder = "models"
tokenizer_folder = "data"
model_name = "alilama_2M"

tok_path = os.path.join("data", "tokenizer.model")
try:
    tokenizer = RegexTokenizer()
    tokenizer.load(tok_path)
except:
    raise RuntimeError(f"tokenizer not found. Make sure {tok_path} exists")

device = "cpu"
token_count = len(tokenizer)
model_path = os.path.join(models_folder, model_name+".pth")

if os.path.exists(model_path):
    loaded_state = torch.load(model_path, map_location=device)
    inpt_params = loaded_state["params"]
    params = ModelParameters(**inpt_params)
    print("parameters:", inpt_params)
    params.max_seq_len = 10000 # increase as seen in ALiBi paper
    model = Transformer(token_count=token_count, device=device, params=params)
    model.load_state_dict(loaded_state['state_dict'])
    print("Model loaded successfully.")
else:
    print("Could not find model!")

def generateStream(size, inpt_tokens=None, temperature=1):
    for res in model.genLazy(size, inpt_tokens=inpt_tokens, temperature=temperature):
        l = [res[0][-1]]
        yield tokenizer.decode(l)

parameters: {'model_name': 'alilama', 'd_model': 128, 'blocks': 8, 'max_seq_len': 128, 'num_heads': 8, 'hidden_dim': 512, 'head_width': 16, 'weight_tying_dict': {}}
Model loaded successfully.


In [6]:
# this is the story the model tries to continue:
start_story = 'Mike loves to ride on his bike.' # Don't use a trailing space!!!
# REASONING TEST (sometimes works). We expect something like "come down.":
# start_story = 'If i throw a ball into the air, it will eventually'
modelInput = " STARTSTORY " + start_story
temperature = 10 # how deterministic the model decides the next token (0 = random)
model.train()
as_tens = tokenizer.encode_ordinary(modelInput)
print(len(as_tens))
as_tens = torch.tensor(as_tens).unsqueeze(0).to(device)
for l in generateStream(400, as_tens, temperature=temperature):
    print(l, end="")

# show attention scores
"""att_scores = model.getAttScores()
first_block = att_scores[0]
for head in range(first_block.shape[1]):
    att = first_block[0,head,:,:].detach().numpy()
    visMatr(att)"""

print("")

10
 He had a big bike with a lot of wheels and a bell. One day, he saw a little girl who was sad. She had lost her toy. Mike wanted to help her. He looked around and saw a little girl. She was holding a toy. Mike was happy to see her toy. He said, "Hi, I'm Mike. Do you want to play with me?" The little girl smiled and said, "Yes, I would love to play with you!" They played together for a while. Mike and the little girl were very happy. They rode their bikes and rode their bikes. They had so much fun. When it was time to go home, Mike said goodbye to the little girl. He said, "Thank you for playing with me. I had a great time!" STARTSTORY Once upon a time, there was a little girl named Lily. She loved to play with her toys and her favorite toy was a teddy bear. One day, Lily's mom asked her to help her with the laundry. Lily was happy to help and started to put the clothes in the washing machine. As she was putting the clothes in the washing machine, she accidentally dropped it on the f