In [8]:
import text_generation as tg

In [9]:
# set environment variable
import os
os.environ['TGI_CENTRAL_ADDRESS'] = 'tir-0-32:8765'

In [10]:
servers = tg.Client.list_from_central()
print(servers)

[{'name': 'upstage/Llama-2-70b-instruct-v2', 'address': 'tir-1-23.eth:9875', 'owner': 'ltjuatja', 'is_quantized': True}, {'name': 'chavinlo/alpaca-native', 'address': 'tir-1-28.eth:8080', 'owner': 'pfernand', 'is_quantized': False}, {'name': 'NousResearch/Llama-2-7b-hf', 'address': 'tir-0-15.eth:8080', 'owner': 'pfernand', 'is_quantized': False}]


In [11]:
server_addr = servers[1]['address']

In [12]:
client = tg.Client(f"http://{server_addr}")

In [13]:
print(client.generate("CMU's PhD students are", max_new_tokens=20).generated_text)

among the most successful in the world. Our graduates have gone on to successful careers in academ


In [14]:
text = ""
for response in client.generate_stream("CMU's PhD students are", max_new_tokens=20):
    if not response.token.special:
        text += response.token.text
print(text)

 among the most successful in the world. Our graduates have gone on to successful careers in academ


## Benchmarking

In [15]:
# create 4 random sentences
SAMPLES = [
    "The quick brown fox jumps over the lazy dog.",
    "The five boxing wizards jump quickly.",
    "All questions asked by five watch experts amazed the judge.",
    "Jack quietly moved up front and seized the big ball of wax.",
]

### Sync Client

In [16]:
%%time
for sample in SAMPLES:
    print(client.generate(sample, max_new_tokens=20).generated_text)


The quick brown fox jumps over the lazy dog.
The quick brown fox j

The first step in the process is to create a list of potential candidates. This list should include

The first time I heard the term “fake news” was in the context of the 
He was a master of disguise, and he had a knack for getting into places he
CPU times: user 36.8 ms, sys: 3.42 ms, total: 40.2 ms
Wall time: 1.95 s


### Async Client

In [17]:
import asyncio
import nest_asyncio
nest_asyncio.apply()

async_client = tg.AsyncClient(f"http://{server_addr}")

In [18]:
%%time
async def batch_generate():
    return await asyncio.gather(*[async_client.generate(sample, max_new_tokens=20) for sample in SAMPLES])

results = asyncio.run(batch_generate())
for r in results:
    print(r.generated_text)


The quick brown fox jumps over the lazy dog.
The quick brown fox j

The first step in the process is to create a list of potential candidates. This list should include

The first time I heard the term “fake news” was in the context of the 
He was a master of disguise, and he had a knack for getting into places he
CPU times: user 105 ms, sys: 5.03 ms, total: 110 ms
Wall time: 620 ms
