In [2]:
from langchain.chat_models import init_chat_model
from langchain.callbacks.tracers import LangChainTracer


import os
from dotenv import load_dotenv
load_dotenv()

tracer_project = LangChainTracer(project_name="punching-bag-1")

In [8]:
def call_llm(prompt,model):
    print(f"Calling {model}")
    llm = init_chat_model(model=model, temperature=0.7)
    response = llm.invoke(
        prompt,
        config={
            "callbacks": [tracer_project]
        }
    )
    return response.content

In [9]:
models = [
    "ollama:llama3.2",
    "gpt-4o",
]

In [10]:
call_llm("What is the capital of France?", "ollama:llama3.2")

Calling ollama:llama3.2


'The capital of France is Paris.'

In [11]:
import concurrent.futures 

with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(call_llm, "What is the capital of France?", model) for model in models]
    results = list(concurrent.futures.as_completed(futures))
    for result in results:
        print(result.result())



Calling ollama:llama3.2
Calling gpt-4o
The capital of France is Paris.
The capital of France is Paris.
