## Standard module imports

In [1]:
import os
import regex
import json
import fire
import torch as pytorch
import numpy as np
import tensorflow as tf
from typing import Any

### Define the gpt_core_v2_class module

In [2]:
class gpt_core_v2_class:
    HPARAMS_117M = {
    "n_vocab": 50257,
    "n_ctx": 1024,
    "n_embd": 768,
    "n_head": 12,
    "n_layer": 12
    }

    HPARAMS_355M = {
    "n_vocab": 50257,
    "n_ctx": 1024,
    "n_embd": 1024,
    "n_head": 16,
    "n_layer": 24
    }

    HPARAMS = { "117M": HPARAMS_117M,
                "355M": HPARAMS_355M }

    HPARAMS_PROTO = {
    "n_vocab": 50257,
    "n_ctx": 1024,
    "n_embd": 768,
    "n_head": 12,
    "n_layer": 12
    }

    # use deprecated HParams workaround
    class HParams:
        def __init__(self, **kwargs):
            self._hparams = {}
            for k, v in kwargs.items():
                self.__validate_and_set__(pname=k, pvalue=v)

        def __getattr__(self, pname):
            if pname in self._hparams:
                return self._hparams[pname]
            raise AttributeError(f"'HParams' object has no attribute '{pname}'")

        def __setattr__(self, pname, pvalue):
            if pname in gpt_core_v2_class.HPARAMS_PROTO:
                pval = gpt_core_v2_class.HPARAMS_PROTO[pname] if pvalue is None else pvalue
                self._hparams[pname] = pval
            super().__setattr__(pname, pvalue)

        def __validate_and_set__(self, pname: str, pvalue: Any):
            if pname in gpt_core_v2_class.HPARAMS_PROTO:
                pval = gpt_core_v2_class.HPARAMS_PROTO[pname] if pvalue is None else pvalue
                setattr(self, pname, pval)
                self._hparams[pname] = pval
            else:
                raise ValueError(f"Key [{pname}] not found in HParams")

        def override_from_dict(self, dict_):
            for k, v in dict_.items():
                self.__validate_and_set__(pname=k, pvalue=v)

        @classmethod
        def from_json(cls, json_file):
            with open(json_file, 'r', encoding="UTF-8") as f:
                params = json.load(f)
            return cls(**params)

        def to_dict(self):
            return self._hparams.copy()
    
    @classmethod
    def default_hparams(cls):
        return cls.HParams(
            n_vocab=0,
            n_ctx=1024,
            n_embd=768,
            n_head=12,
            n_layer=12,
        )

    def shape_list(x):
        """Deal with dynamic shape in tensorflow cleanly."""
        static = x.shape.as_list()
        dynamic = tf.shape(x)
        return [dynamic[i] if s is None else s for i, s in enumerate(static)]

    def softmax(x, axis=-1):
        return tf.nn.softmax(x, axis=axis)

    def gelu(x):
        return tf.nn.gelu(x)

    #@tf.function
    def norm(x, scope, *, axis=-1, epsilon=1e-5):
        """Normalize to mean = 0, std = 1, then do a diagonal affine transform."""
        with tf.name_scope(scope):
            n_state = x.shape[-1]
            g = tf.Variable(tf.ones([n_state]), name='g')
            b = tf.Variable(tf.zeros([n_state]), name='b')
            u = tf.reduce_mean(x, axis=axis, keepdims=True)
            s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True)
            x = (x - u) * tf.math.rsqrt(s + epsilon)
            x = x*g + b
            return x

    def split_states(x, n):
        """Reshape the last dimension of x into [n, x.shape[-1]/n]."""
        *start, m = shape_list(x)
        return tf.reshape(x, start + [n, m//n])

    def merge_states(x):
        """Smash the last two dimensions of x into a single dimension."""
        *start, a, b = shape_list(x)
        return tf.reshape(x, start + [a*b])

    #@tf.function
    def conv1d(x, scope, nf, *, w_init_stdev=0.02):
        with tf.name_scope(scope):
            *start, nx = shape_list(x)
            w = tf.Variable(tf.random.normal([1, nx, nf], stddev=w_init_stdev), name='w')
            b = tf.Variable(tf.zeros([nf]), name='b')
            c = tf.reshape(tf.matmul(tf.reshape(x, [-1, nx]), tf.reshape(w, [-1, nf]))+b, start+[nf])
            return c

    def attention_mask(nd, ns, *, dtype):
        """1's in the lower triangle, counting from the lower right corner.
        Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs.
        """
        i = tf.range(nd)[:,None]
        j = tf.range(ns)
        m = i >= j - ns + nd
        return tf.cast(m, dtype)

    #@tf.function
    def attn(x, scope, n_state, *, past, hparams):
        assert len(x.shape) == 3  # Should be [batch, sequence, features]
        assert n_state % hparams.n_head == 0
        if past is not None:
            assert len(past.shape) == 5  # Should be [batch, 2, heads, sequence, features], where 2 is [k, v]

        def split_heads(x):
            # From [batch, sequence, features] to [batch, heads, sequence, features]
            return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3])

        def merge_heads(x):
            # Reverse of split_heads
            return merge_states(tf.transpose(x, [0, 2, 1, 3]))

        def mask_attn_weights(w):
            # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from src to dst.
            _, _, nd, ns = shape_list(w)
            b = attention_mask(nd, ns, dtype=w.dtype)
            b = tf.reshape(b, [1, 1, nd, ns])
            w = w*b - tf.cast(1e10, w.dtype)*(1-b)
            return w

        def multihead_attn(q, k, v):
            # q, k, v have shape [batch, heads, sequence, features]
            w = tf.matmul(q, k, transpose_b=True)
            w = w * tf.math.rsqrt(tf.cast(tf.shape(v)[-1], w.dtype))
            w = mask_attn_weights(w)
            w = softmax(w)
            a = tf.matmul(w, v)
            return a

        with tf.name_scope(scope):
            c = conv1d(x, 'c_attn', n_state*3)
            q, k, v = tf.split(c, 3, axis=2)
            q, k, v = map(split_heads, [q, k, v])
            present = tf.stack([k, v], axis=1)
            if past is not None:
                pk, pv = tf.unstack(past, axis=1)
                k = tf.concat([pk, k], axis=-2)
                v = tf.concat([pv, v], axis=-2)
            a = multihead_attn(q, k, v)
            a = merge_heads(a)
            a = conv1d(a, 'c_proj', n_state)
            return a, present

    #@tf.function
    def mlp(x, scope, n_state, *, hparams=None):
        with tf.name_scope(scope):
            nx = tf.shape(x)[-1]
            h = gelu(conv1d(x, 'c_fc', n_state))
            h2 = conv1d(h, 'c_proj', nx)
            return h2

    #@tf.function
    def block(x, scope, *, past, hparams):
        with tf.name_scope(scope):
            nx = tf.shape(x)[-1]
            a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams)
            x = x + a
            m = mlp(norm(x, 'ln_2'), 'mlp', nx*4, hparams=hparams)
            x = x + m
            return x, present

    def past_shape(*, hparams, batch_size=None, sequence=None):
        return [batch_size, hparams.n_layer, 2, hparams.n_head, sequence, hparams.n_embd // hparams.n_head]

    #@tf.function
    def expand_tile(value, size):
        """Add a new axis of given size."""
        value = tf.convert_to_tensor(value, name='value')
        ndims = tf.rank(value)
        return tf.tile(tf.expand_dims(value, axis=0), [size] + [1]*ndims)

    #@tf.function
    def positions_for(tokens, past_length):
        batch_size = tf.shape(tokens)[0]
        nsteps = tf.shape(tokens)[1]
        return expand_tile(past_length + tf.range(nsteps), batch_size)

    #@tf.function
    def model(hparams, input_tokens, past=None, scope='model', reuse=True, seed=None):
        with tf.name_scope(scope):
            
            if seed is not None:
                np.random.seed(seed)
                tf.random.set_seed(seed)

            results = {}
            batch, sequence = shape_list(input_tokens)

            wpe = tf.Variable(tf.random.normal([hparams.n_ctx, hparams.n_embd], stddev=0.01), name='wpe')
            wte = tf.Variable(tf.random.normal([hparams.n_vocab, hparams.n_embd], stddev=0.02), name='wte')

            past_length = 0 if past is None else tf.shape(past)[-2]
            h = tf.gather(wte, input_tokens) + tf.gather(wpe, positions_for(input_tokens, past_length))

            # Transformer
            presents = []
            pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer
            assert len(pasts) == hparams.n_layer
            for layer, past in enumerate(pasts):
                h, present = block(h, f'h{layer}', past=past, hparams=hparams)
                presents.append(present)
            results['present'] = tf.stack(presents, axis=1)
            h = norm(h, 'ln_f')

            # Language model loss.  Do tokens <n predict token n?
            h_flat = tf.reshape(h, [batch*sequence, hparams.n_embd])
            logits = tf.matmul(h_flat, wte, transpose_b=True)
            logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab])
            results['logits'] = logits
            return results    

### Define the gpt_class module

In [3]:
class gpt_class:
    
    gpt_core_v2 = gpt_core_v2_class()
    
    #@tf.function
    def top_k_logits(logits, k):
        """top k logits"""
        if k == 0:
            # no truncation
            return logits

        def _top_k():
            values, _ = tf.math.top_k(logits, k=k)
            min_values = values[:, -1, tf.newaxis]
            return tf.where(
                logits < min_values,
                tf.ones_like(logits, dtype=logits.dtype) * -1e10,
                logits,
            )
        return tf.cond(
            tf.equal(k, 0),
            lambda: logits,
            lambda: _top_k(),
        )

    def get_codec(model_name, models_dir):
        """extend the codec encoder"""
        return codec.get_encoder(model_name, models_dir)

    def get_default_hparams(model_name, models_dir, fn):
        """default hparams"""
        hparams = gpt_core_v2.default_hparams()
        try:
            models_dir = os.path.expanduser(os.path.expandvars(models_dir))
            with open(os.path.join(models_dir, model_name, fn), encoding="UTF-8") as f:
                hparams.override_from_dict(json.load(f))
        except Exception:
            pass
        return hparams

    #@tf.function
    def top_p_logits(logits, p):
        """Nucleus sampling"""
        batch = tf.shape(logits)[0]
        sorted_logits = tf.sort(logits, direction='DESCENDING', axis=-1)
        cumulative_probs = tf.cumsum(tf.nn.softmax(sorted_logits, axis=-1), axis=-1)
        indices = tf.stack([
            tf.range(0, batch),
            # number of indices to include
            tf.maximum(tf.reduce_sum(tf.cast(cumulative_probs <= p, tf.int32), axis=-1) - 1, 0),
        ], axis=-1)
        min_values = tf.gather_nd(sorted_logits, indices)
        return tf.where(
            logits < min_values[:, tf.newaxis],
            tf.ones_like(logits) * -1e10,
            logits,
        )

    #@tf.function
    def submit_text_query(
        hparams=gpt_core_v2.default_hparams(),
        context="Hello, how are you today?",
        length=50,
        model_name='124M',
        models_dir='../models',
        start_token=None,
        batch_size=None,
        temperature=1,
        top_k=0,
        top_p=1,
        seed=None):

        """Submit a text query to the model"""
        codec_instance = codec.get_encoder(model_name, models_dir)
        context_tokens = codec_instance.encode(context) # [15496, 11, 703, 389, 345, 5633]
        tokens_length = len(context_tokens)
        if length is not None and tokens_length > length:
            length = tokens_length
        #context_tokens_tensor = tf.convert_to_tensor([context_tokens] * batch_size, dtype=tf.int32)
        response_tokens = submit_token_query(
            hparams=hparams,
            context=context_tokens, #context_tokens_tensor,
            length=length,
            model_name=model_name,
            models_dir=models_dir,
            start_token=start_token,
            batch_size=batch_size,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            seed=seed)
        
        #generated = 0
        #for i in range(batch_size):
            #generated += 1
            #text = codec_instance.decode(response_tokens[i])
            #print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
            #print(text)

        # Decode the generated response_tokens
        text_response = codec_instance.decode(response_tokens[0])

        return text_response

    #@tf.function
    def submit_token_query(
        hparams=gpt_core_v2.default_hparams(),
        length=50,
        model_name='124M',
        models_dir='../models',
        start_token=None,
        batch_size=None,
        context=tf.convert_to_tensor([15496, 11, 703, 389, 345, 5633], dtype=tf.int32), #"Hello, how are you today?"
        temperature=1,
        top_k=0,
        top_p=1,
        seed=None
    ):
        """submit a query to the model"""

        if length is None:
            length = hparams.n_ctx // 2
        elif length > hparams.n_ctx:
            raise ValueError(f"Can't capture samples greater than the window size: {hparams.n_ctx}")

        if start_token is None:
            assert context is not None, 'Specify exactly one of start_token and context!'
            # Ensure context is a TensorFlow tensor
            context = tf.convert_to_tensor(context, dtype=tf.int32)
            # Check the shape of context
            context_shape = tf.shape(context)
            if len(context.shape) == 1:
                # If context is 1D, reshape it to 2D
                context = tf.reshape(context, [1, -1])
            elif len(context.shape) > 2:
                raise ValueError(f"Context should be 1D or 2D, but got shape {context.shape}")
            
            # Set batch_size if it's not provided
            if batch_size is None:
                batch_size = context_shape[0]
            else:
                # If batch_size is provided, ensure it matches the context
                tf.debugging.assert_equal(batch_size, context_shape[0], 
                                        message="Provided batch_size doesn't match context's first dimension")
        else:
            assert context is None, 'Specify exactly one of start_token and context!'
            context = tf.fill([batch_size, 1], start_token)

        def determine_length(provided_length=50, max_length=50):
            if provided_length is None:
                return max_length
            if provided_length > max_length:
                print(f"Warning: Provided length ({provided_length}) exceeds maximum length ({max_length}). Using maximum length.")
                return max_length
            return provided_length

        length = determine_length(provided_length=length, max_length=hparams.n_ctx)

        def step(hparams, tokens, past=None):
            lm_output = gpt_core_v2.model(hparams=hparams, input_tokens=tokens, past=past, seed=seed)

            logits = lm_output['logits'][:, :, :hparams.n_vocab]
            presents = lm_output['present']
            presents.set_shape(gpt_core_v2.past_shape(hparams=hparams, batch_size=batch_size))
            return {
                'logits': logits,
                'presents': presents,
            }

        def body(past, prev, output):
            next_outputs = step(hparams, prev, past=past)
            logits = next_outputs['logits'][:, -1, :] / tf.cast(temperature, tf.float32)
            logits = top_k_logits(logits, k=top_k)
            logits = top_p_logits(logits, p=top_p)
            samples = tf.random.categorical(logits, num_samples=1, dtype=tf.int32)
            return [
                next_outputs['presents'] if past is None else tf.concat([past, next_outputs['presents']], axis=-2),
                samples,
                tf.concat([output, samples], axis=1)
            ]

        past, prev, output = body(None, context, context)

        def cond(*args):
            return True

        _, _, tokens = tf.while_loop(
            cond=cond, body=body,
            maximum_iterations=length - 1,
            loop_vars=[
                past,
                prev,
                output
            ],
            shape_invariants=[
                tf.TensorShape(gpt_core_v2.past_shape(hparams=hparams, batch_size=batch_size)),
                tf.TensorShape([batch_size, None]),
                tf.TensorShape([batch_size, None]),
            ],
            back_prop=False,
        )

        return tokens    

### Define the Interactive_example_class module

In [4]:
class Interactive_example_class:
    def interactive_model(
        model_name='124M',
        seed=None,
        nsamples=1,
        batch_size=None,
        length=None,
        temperature=1,
        top_k=1,
        top_p=1,
        models_dir='../models' # Adjust as needed during DEBUG mode
    ):
        """
        Interactively run the model
        :model_name=124M : String, which model to use
        :seed=None : Integer seed for random number generators, fix seed to reproduce
        results
        :nsamples=1 : Number of samples to return total
        :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
        :length=None : Number of tokens in generated text, if None (default), is
        determined by model hyperparameters
        :temperature=1 : Float value controlling randomness in boltzmann
        distribution. Lower temperature results in less random completions. As the
        temperature approaches zero, the model will become deterministic and
        repetitive. Higher temperature results in more random completions.
        :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
        considered for each step (token), resulting in deterministic completions,
        while 40 means 40 words are considered at each step. 0 (default) is a
        special setting meaning no restrictions. 40 generally is a good value.
        :models_dir : path to parent folder containing model subfolders
        (i.e. contains the <model_name> folder)
        """

        # Set up the seed for reproducibility
        seed = 42  # Or whatever seed value you were using before

        print("Initializing model...")

        # Interactive prompt loop
        while True:
            raw_text = input("Model prompt >>> ") # "Hello, how are you ?"
            while not raw_text:
                print('Please supply a text Prompt to the model!')
                raw_text = input("Model prompt >>> ")

            # Interactive example
            # Increase nsamples to produce more generative examples
            for i in range(nsamples):
                text_output = gpt.submit_text_query(
                    context = raw_text,
                    length = length,
                    batch_size = batch_size,
                    temperature=temperature,
                    top_k = top_k,
                    top_p = top_p,
                    models_dir = models_dir,
                    model_name = model_name,
                    seed = seed )
                
                print("=" * 40 + f" COMPLETION {i} " + "=" * 40)
                for _, text in enumerate(text_output):
                    print(text)
            
            print("=" * 80)


    if __name__ == '__main__':
        fire.Fire(interactive_model)    

Initializing model...


Model prompt >>>  Hello, how are you ?


NameError: name 'gpt' is not defined