In [1]:
%load_ext autoreload
%autoreload 2

### Install client library (you may need to restart your kernel)

In [2]:
# %pip install -e client

#### This will give you you the `engine` module
##### this exposes `engine.get_info`, `engine.submit`, and `engine.retrieve`

In [3]:
import engine

##### `engine.get_info` returns a dictionary informing you of the currently loaded model as well as parameters about it, such as layer naming formats which will help you specify the particualar components of the model you want to access

In [4]:
info = engine.get_info()
info

{'attn_module_name_format': 'model.layers.{}.self_attn',
 'layer_name_format': 'model.layers.{}',
 'max_seq_length': 2048,
 'mlp_module_name_format': 'model.layers.{}.mlp',
 'model_name': 'LLaMa-30b',
 'n_attn_head': 52,
 'n_embd': 6656,
 'n_layer': 60}

#### `engine.submit(...)` submits your job request to the server. You can see the format and types of the request like this:

In [5]:
engine.models.submit.Request.schema()

{'title': 'Request',
 'type': 'object',
 'properties': {'job_id': {'title': 'Job Id', 'type': 'string'},
  'prompts': {'title': 'Prompts',
   'type': 'array',
   'items': {'type': 'string'}},
  'max_out_len': {'title': 'Max Out Len', 'default': 20, 'type': 'integer'},
  'top_k': {'title': 'Top K', 'default': 5, 'type': 'integer'},
  'generate_greedy': {'title': 'Generate Greedy',
   'default': True,
   'type': 'boolean'},
  'activation_requests': {'$ref': '#/definitions/ActivationRequest'}},
 'required': ['job_id', 'prompts'],
 'definitions': {'ActivationRequest': {'title': 'ActivationRequest',
   'type': 'object',
   'properties': {'layers': {'title': 'Layers',
     'type': 'array',
     'items': {'type': 'string'}}}}}}

##### Or by visiting our api documentation at [https://ndif.baulab.us/api/docs](https://ndif.baulab.us/api/docs)

##### In this example request, were going to:

<ul>
  <li>Process a couple of prompts like, "Michael Jordan plays the sport of"</li>
  <li>Have ten predicted tokens returned to us per prompt</li>
  <li>Have the top five most likely tokens returned to us, not just the top 1</li>
  <li>Finally, were going to use the format we recieved from the get_info() function, to specify that we also want the activations at a few layers in the model</li>
</ul>

In [6]:
response = engine.submit(
    prompts = [
        "Michael Jordan plays the sport of",
        "The Space Needle is located in the city of"
    ],
    max_new_tokens= 10,
    get_answers= True,
    top_k = 5,
    activation_requests = {
        'layers':  [info["layer_name_format"].format(l) for l in range(5, 10)]
    }
)

INFO: 2023-04-15 19:29:07,499 - => Submitting request...
INFO: 2023-04-15 19:29:07,506 - => Successfully submitted job 'XrNyvSSGxRVufFYLquyDJL'
INFO: 2023-04-15 19:29:07,507 - => Dumped request for job 'XrNyvSSGxRVufFYLquyDJL' to /disk/u/arnab/Projects/engine/jobs/XrNyvSSGxRVufFYLquyDJL


##### The server will recieve our request, and return a status (Which we hope will say that it received our request with no problem), as well as the job id identifying our request. This is loaded into the `engine.models.result.Result` model. The request we submited will be saved to our local `./jobs` directory for future reference.

In [7]:
print(response.description)
print(response.status)
print(response.job_id)

Your job has been recieved is is waiting approval
JobStatus.RECIVED
XrNyvSSGxRVufFYLquyDJL


##### Now to retrieve what we asked for, we call `engine.retrieve(<job_id>)` using the job_id recieved. This too will return an `engine.models.result.Result` object, and assuming your request has been approved and processed, the `data` field will be populated. 

##### This result will also be stored to disk in the same directory as the request

In [None]:
result = engine.retrieve(response.job_id)
result.description

In [12]:
result.data[0].__fields__

{'generated_text': ModelField(name='generated_text', type=str, required=True),
 'input_tokenized': ModelField(name='input_tokenized', type=Optional[list], required=False, default=None),
 'generated_tokens': ModelField(name='generated_tokens', type=list, required=True),
 'activations': ModelField(name='activations', type=Optional[Mapping[str, list[list[float]]]], required=False, default=None)}

In [13]:
import torch
import json
import copy

In [14]:
for r in result.data:
    print("txt =", r.generated_text)
    print("input_tokenized =", r.input_tokenized)
    print('generated_tokens =', r.generated_tokens)

    if r.activations is not None:
        print("activations")
        for layer in r.activations:
            print(f"     {layer} : {torch.tensor(r.activations[layer]).shape}")

    print()
    

txt = <s> Michael Jordan plays the sport of basketball.
Michael Jordan is a basketball player.
Michael Jordan
input_tokenized = [['<s>', 1], ['Michael', 5765], ['Jordan', 18284], ['plays', 13582], ['the', 278], ['sport', 7980], ['of', 310]]
generated_tokens = [[{'token': 'basketball', 'id': 20305, 'p': 0.472900390625}, {'token': 'golf', 'id': 29416, 'p': 0.135498046875}, {'token': 'baseball', 'id': 21573, 'p': 0.06109619140625}, {'token': 'k', 'id': 413, 'p': 0.047943115234375}, {'token': 'tennis', 'id': 22556, 'p': 0.025665283203125}], [{'token': '.', 'id': 29889, 'p': 0.1436767578125}, {'token': 'like', 'id': 763, 'p': 0.0987548828125}, {'token': 'with', 'id': 411, 'p': 0.094970703125}, {'token': 'in', 'id': 297, 'p': 0.08648681640625}, {'token': ',', 'id': 29892, 'p': 0.06378173828125}], [{'token': '\n', 'id': 13, 'p': 0.302978515625}, {'token': 'He', 'id': 940, 'p': 0.2286376953125}, {'token': 'Michael', 'id': 5765, 'p': 0.052642822265625}, {'token': 'His', 'id': 3600, 'p': 0.02409