In [4]:
import text_generation as tg

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# set environment variable
import os
os.environ['TGI_CENTRAL_ADDRESS'] = 'localhost:8765'

In [5]:
servers = tg.Client.list_from_central()
print(servers)

[{'name': '/mnt/data_2/patrick/croissantllm-models/small4_equals/', 'address': 'frightened-frank-flowers-fin-01:3000', 'owner': 'patrick', 'is_quantized': False}]


In [6]:
server_addr = servers[0]['address']

In [7]:
client = tg.Client(f"http://{server_addr}")

In [9]:
print(client.generate("CMU's PhD students are", max_new_tokens=20).generated_text)

among the best in the country in their field of study. They are also among the best in the


In [10]:
text = ""
for response in client.generate_stream("CMU's PhD students are", max_new_tokens=20):
    if not response.token.special:
        text += response.token.text
print(text)

 among the best in the country in their field of study. They are also among the best in the


## Getting Top K tokens at each step

In [12]:
resp = client.generate("CMU's PhD students are", max_new_tokens=4, top_tokens=3)
print(resp.generated_text)
for top_tokens in resp.details.top_tokens:
    print(top_tokens)

among the best in
[Token(id=5684, text='among', logprob=-2.5429688, special=False), Token(id=4645, text='working', logprob=-3.4179688, special=False), Token(id=1135, text='the', logprob=-3.6757812, special=False)]
[Token(id=1135, text='the', logprob=-0.40478516, special=False), Token(id=3108, text='those', logprob=-2.7402344, special=False), Token(id=488, text='', logprob=-2.8417969, special=False)]
[Token(id=3284, text='best', logprob=-1.5273438, special=False), Token(id=2481, text='most', logprob=-1.5664062, special=False), Token(id=3263, text='top', logprob=-2.2148438, special=False)]
[Token(id=1147, text='in', logprob=-0.45898438, special=False), Token(id=1171, text='and', logprob=-3.7089844, special=False), Token(id=5208, text='students', logprob=-3.9511719, special=False)]


## Benchmarking

In [15]:
# create 4 random sentences
SAMPLES = [
    "The quick brown fox jumps over the lazy dog.",
    "The five boxing wizards jump quickly.",
    "All questions asked by five watch experts amazed the judge.",
    "Jack quietly moved up front and seized the big ball of wax.",
]

### Sync Client

In [16]:
%%time
for sample in SAMPLES:
    print(client.generate(sample, max_new_tokens=20).generated_text)


The quick brown fox jumps over the lazy dog.
The quick brown fox j

The first step in the process is to create a list of potential candidates. This list should include

The first time I heard the term “fake news” was in the context of the 
He was a master of disguise, and he had a knack for getting into places he
CPU times: user 36.8 ms, sys: 3.42 ms, total: 40.2 ms
Wall time: 1.95 s


### Async Client

In [17]:
import asyncio
import nest_asyncio
nest_asyncio.apply()

async_client = tg.AsyncClient(f"http://{server_addr}")

In [18]:
%%time
async def batch_generate():
    return await asyncio.gather(*[async_client.generate(sample, max_new_tokens=20) for sample in SAMPLES])

results = asyncio.run(batch_generate())
for r in results:
    print(r.generated_text)


The quick brown fox jumps over the lazy dog.
The quick brown fox j

The first step in the process is to create a list of potential candidates. This list should include

The first time I heard the term “fake news” was in the context of the 
He was a master of disguise, and he had a knack for getting into places he
CPU times: user 105 ms, sys: 5.03 ms, total: 110 ms
Wall time: 620 ms
