In [5]:
from unifiedllm import AsyncLLM,ProcessOutput

# Expects API keys to be saved in .env file
- PERPLEXITY_API_KEY
- OPENAI_API_KEY 
- ANTHROPIC_API_KEY 
- DEEPSEEK_API_KEY 
- GROQ_API_KEY

# Initialize The Async LLM

In [6]:
allm = AsyncLLM()

# Sending 2 requests in async mode

In [7]:
prompts = ["What is the capital of France?", "What is the capital of Germany?"]
system_messages = ["You are a helpful assistant and answer in less than 10 words." for _ in range(len(prompts))]

# Collecting the results of the requests using await

In [10]:
responses = await allm.batch_chat_complete(prompts=prompts,system_messages=system_messages,model="llama-3.2-1b-preview")

# Response is a list of tuples, each tuple contains text output and the cost

In [11]:
responses

[('Paris.', 2.3600000000000003e-06),
 ('Berlin is the capital of Germany.', 2.56e-06)]

# Post processing LLM output to get json output

In [12]:
Processing = ProcessOutput()

In [20]:
prompts = ["What is the capital of France?", "What is the capital of Germany?"]
system_messages = ["You are a helpful assistant and answer only in json format {'question':question,'answer':1 word answer}" for _ in range(len(prompts))]

responses = await allm.batch_chat_complete(prompts=prompts,system_messages=system_messages,model="llama-3.2-1b-preview")
answers = [i[0] for i in responses]
cost = [i[1] for i in responses]

# Using jsonify_llm_new method to extract json

In [26]:
[Processing.jsonify_llm_new(i) for i in answers]

[{'question': 'What is the capital of France?', 'answer': 'Paris'},
 {'question': 'What is the capital of Germany?', 'answer': 'Berlin'}]

# Extracting Json within a tag

In [31]:
prompts = ["What is the capital of France?", "What is the capital of Germany?"]
system_messages = ["You are a helpful assistant and answer only in json format <answer>{'question':question,'answer':1 word answer}</answer> within the <answer> and </answer> tags" for _ in range(len(prompts))]

responses = await allm.batch_chat_complete(prompts=prompts,system_messages=system_messages,model="llama-3.2-3b-preview")
answers = [i[0] for i in responses]
cost = [i[1] for i in responses]

In [32]:
answers

["<answer>{ 'question': 'What is the capital of France?', 'answer': 'Paris' }</answer>",
 "<answer>{ 'question': 'What is the capital of Germany?', 'answer': 'Berlin' }</answer>"]

In [34]:
[Processing.jsonify_llm(i,tag_name="answer") for i in answers]

[{'question': 'What is the capital of France?', 'answer': 'Paris'},
 {'question': 'What is the capital of Germany?', 'answer': 'Berlin'}]