In [None]:
import os
import sys
import numpy as np
from pathlib import Path

# Add the path to the custom library to the system path
sys.path.append(str(Path().resolve().parent.parent.parent))

# Import custom modules
from src import Tensor
from src.architectures.transformer import Tokenizer, DecoderTransformer

### Constants & Configurations

In [2]:
# Define the paths to the tokenizer and model files
tokenizer_path = os.path.join(os.getcwd(), 'checkpoints', 'tokenizer.json')
model_path = os.path.join(os.getcwd(), 'checkpoints', 'language_model')

In [3]:
# Hyperparameters
sequence_length = 256 # The size of the sequence length (the context window)
n_embed = 384 # The size of the token embeddings (the dimensionality of the embeddings)
n_attention_heads = 6 # The number of attention heads in the multi-head attention mechanism
n_decoder_blocks = 6 # The number of transformer'decoder blocks in the model

### Tokenizer

In [4]:
# Instantiate the tokenizer
tokenizer = Tokenizer()

# Load the state of the tokenizer
tokenizer.load(tokenizer_path)

# Extract the vocabulary size
vocab_size = tokenizer.get_vocab_size()

### Loading the model

In [None]:
# Check if the model is already trained
if os.path.exists(model_path):
    # Printa status
    print("Loading the model from the checkpoint...")
    
    # Load the model
    language_model = DecoderTransformer.load(model_path)
    
    # Print status
    print("Model loaded successfully.")

Loading the model from the checkpoint...
Model loaded successfully.


### Inference

In [6]:
# Generate some text context from the trained model
context = Tensor(np.zeros((1, 1), dtype=np.int32))

# Iterate over the tokens generated by the transformer
for token in language_model.autoregressive_generation(x=context, num_steps=300, stream=True): # type: ignore
    # Decode the token
    decoded_token = tokenizer.decode([token.data.squeeze().tolist()])

    # Print the decoded token
    print(decoded_token, end='', flush=True)

genisse uom mend'acquifallida,
ce lo sol, fitto.
Quando la la si che in che manobe s'acquifestendo poi dotti,
poco so de' erano pavi la ango, per fu' riviva 'l pastmiglio ha cose.
commia mi setter del prende,
s'l piobasse.
Traccia,
che Beno di latimai Gio nostra noto,
si piangolgira via per mano;
chiesa
apprende invi allaspetto che remo la tuo umana pecca la riffranno per li volse in sano
del si per lo rimonche veder no pone,
squi,
che; seguiva mordeva fiamma che asser che dentro, intra e in grande
che