In [1]:
import numpy as np
from tqdm import tqdm
import fire


In [2]:

def gelu(x):
    return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))


In [3]:

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)


In [4]:
def layer_norm(x, g, b, eps: float = 1e-5):
    # print("Layer Norm Input Shapes:")
    # print(f"x: {x}")
    # print(f"x shape: {x.shape}")
    # print(f"g shape: {g.shape}")
    # print(f"b shape: {b.shape}")
    
    mean = np.mean(x, axis=-1, keepdims=True)
    #print(f"mean: {mean}")
    variance = np.var(x, axis=-1, keepdims=True)
   # print(f"variance: {variance}")
    
    x_normalized = (x - mean) / np.sqrt(variance + eps)
  #  print(f"x_normalized: {x_normalized.shape}")
    
    result = g * x_normalized + b
   # print(f"result: {result}")
    
    return result

In [5]:
def linear(x, w, b):
    return x @ w + b

In [6]:
def ffn(x, c_fc, c_proj):
    a = gelu(linear(x, **c_fc))
    x = linear(a, **c_proj)
    return x

In [7]:

def attention(q, k, v, mask):
    return softmax(q @ k.T / np.sqrt(q.shape[-1]) + mask) @ v

In [8]:

def mha(x, c_attn, c_proj, n_head=1):
    x = linear(x, **c_attn)
    qkv = np.split(x, 3, axis=-1)
    qkv_heads = list(map(lambda x: np.split(x, n_head, axis=-1), qkv))
    causal_mask = (1 - np.tri(x.shape[0], dtype=x.dtype)) * -1e10
    out_heads = [attention(q, k, v, causal_mask) for q, k, v in zip(*qkv_heads)]
    x = np.hstack(out_heads)
    x = linear(x, **c_proj)
    return x


In [9]:

def transformer_block(x, mlp, attn, ln_1, ln_2, n_head):
    x =  x +  mha(x, **attn, n_head=n_head)
    x = x + ffn(layer_norm(x, **ln_2), **mlp)
    return x

## Custom code 

In [10]:
from utils import load_encoder_hparams_and_params
encoder, hparams, params = load_encoder_hparams_and_params("124M", "models")

2024-10-04 10:57:10.707758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-10-04 10:57:11.594018: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-10-04 10:57:11.594062: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-10-04 10:57:14.157569: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-10-0

In [11]:
inputs = encoder.encode("all heroes wear capes")
inputs

[439, 10281, 5806, 1451, 274]

In [12]:
def gpt2(inputs, wte, wpe, blocks, ln_f, n_head):  # [n_seq] -> [n_seq, n_vocab]
    # token + positional embeddings
    x = wte[inputs] + wpe[range(len(inputs))]  # [n_seq] -> [n_seq, n_embd]

    # forward pass through n_layer transformer blocks
    for block in blocks[:1]:
        x = transformer_block(x, **block, n_head=n_head)  # [n_seq, n_embd] -> [n_seq, n_embd]

    # projection to vocab
    #x = layer_norm(x, **ln_f)  # [n_seq, n_embd] -> [n_seq, n_embd]
    return x #@ wte.T  # [n_seq, n_embd] -> [n_seq, n_vocab]



In [13]:
logits = gpt2(inputs, **params, n_head=1)  # model forward pass

In [14]:
logits.shape

(5, 768)

In [15]:
logits

array([[-0.24636951,  0.83114105,  4.452487  , ..., -0.08223414,
         0.1122146 , -0.95171577],
       [-0.93640256,  0.46588486,  4.2565317 , ...,  0.4348594 ,
        -0.54787946, -0.78516173],
       [-0.6980567 ,  0.6011211 ,  4.7558928 , ...,  0.06484202,
        -0.28358984, -0.60146976],
       [-0.82171917,  0.8055482 ,  4.5270495 , ...,  0.14451209,
        -0.41624647, -0.5769494 ],
       [-1.4169543 ,  0.9740439 ,  4.69918   , ...,  0.2777383 ,
        -0.60946417, -0.36688083]], dtype=float32)

In [16]:
##################end custom code 

In [28]:
def gpt2(inputs, wte, wpe, blocks, ln_f, n_head):  # [n_seq] -> [n_seq, n_vocab]
    # token + positional embeddings
    x = wte[inputs] + wpe[range(len(inputs))]  # [n_seq] -> [n_seq, n_embd]

    # forward pass through n_layer transformer blocks
    for block in blocks:
        x = transformer_block(x, **block, n_head=n_head)  # [n_seq, n_embd] -> [n_seq, n_embd]

    # projection to vocab
    x = layer_norm(x, **ln_f)  # [n_seq, n_embd] -> [n_seq, n_embd]
    return x @ wte.T  # [n_seq, n_embd] -> [n_seq, n_vocab]


In [29]:
def generate(inputs, params, n_head, n_tokens_to_generate):
    for _ in tqdm(range(n_tokens_to_generate), "generating"):
        logits = gpt2(inputs, **params, n_head=n_head)
        next_id = np.argmax(logits[-1])
        inputs.append(int(next_id))
    return inputs[len(inputs) - n_tokens_to_generate:]

In [30]:
def main(prompt: str, n_tokens_to_generate: int = 40, model_size: str = "124M", models_dir: str = "models"):
    from utils import load_encoder_hparams_and_params

    # load encoder, hparams, and params from the released open-ai gpt-2 files
    encoder, hparams, params = load_encoder_hparams_and_params(model_size, models_dir)

    # encode the input string using the BPE tokenizer
    input_ids = encoder.encode(prompt)

    # make sure we are not surpassing the max sequence length of our model
    assert len(input_ids) + n_tokens_to_generate < hparams["n_ctx"]

    # generate output ids
    output_ids = generate(input_ids, params, hparams["n_head"], n_tokens_to_generate)

    # decode the ids back into a string
    output_text = encoder.decode(output_ids)

    return output_text

In [31]:
prompt = "all heroes wear capes"
generated_text = main(prompt, n_tokens_to_generate=15)
print(f"Generated text: {generated_text}")

generating: 100%|███████████████████████████████| 15/15 [00:03<00:00,  4.02it/s]

Generated text:  Mock EG satire MT MTlev EG prediction prediction prediction prediction� Nordic Nordic Nordic





In [None]:

```

## Running from Command Line

If you want to run this script from the command line, you can use the following cell:

```python
if __name__ == "__main__":
    fire.Fire(main)
```

Note: Make sure you have the `fire` library installed (`pip install fire`) to use the command-line interface.