# Async with OpenAI API

In [None]:
import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(api_key="your-api-key")

async def ask(prompt):
    response = await client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

async def main():
    responses = await asyncio.gather(
        ask("Explain quantum computing"),
        ask("Summarize Moby Dick"),
    )
    for res in responses:
        print(res)

asyncio.run(main())


# Full Async LLM Script Using AsyncOpenAI

In [None]:
# In terminal:

export OPENAI_API_KEY="your-api-key-here"  # macOS/Linux
# or
set OPENAI_API_KEY="your-api-key-here"     # Windows


In [None]:
import asyncio
import os
from openai import AsyncOpenAI

# Optional: Set API key here, or use env variable
os.environ["OPENAI_API_KEY"] = "your-api-key-here"

client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def call_llm(prompt: str) -> str:
    print(f"Sending prompt: {prompt}")
    
    response = await client.chat.completions.create(
        model="gpt-4",  # Or "gpt-3.5-turbo"
        messages=[{"role": "user", "content": prompt}]
    )
    
    answer = response.choices[0].message.content.strip()
    print(f"Received response for: {prompt}")
    return answer

async def main():
    prompts = [
        "Explain LangChain in simple terms.",
        "List 3 benefits of using async in Python.",
        "What are the use-cases of vector databases with LLMs?"
    ]

    tasks = [call_llm(p) for p in prompts]

    print("\nSending prompts asynchronously...\n")
    results = await asyncio.gather(*tasks)

    print("\nResponses:\n")
    for i, res in enumerate(results, 1):
        print(f"\nPrompt {i}:\n{res}")


# if __name__ == "__main__":
#     asyncio.run(main())


async def wrapper():
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main())
    finally:
        loop.close()

await wrapper()


# Limiting Concurrent API Calls

## 🤔 What Does "Limiting Concurrent API Calls" Mean?

When you use asyncio.gather() to call multiple APIs, they all run at the same time (concurrently).

But sometimes:

OpenAI or other providers rate-limit your requests (e.g., 10 requests per second).

Your app might overwhelm the API or your local resources (network, CPU).

You want to control load, especially in production.

### 🔐 Limiting concurrency means:
➡️ “Only allow X number of API calls to happen at once, even if I have 100 total tasks.”

In [None]:
import asyncio
import os
from openai import AsyncOpenAI

os.environ["OPENAI_API_KEY"] = "your-api-key-here"
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

async def call_llm(prompt: str, semaphore: asyncio.Semaphore) -> str:
    async with semaphore:
        print(f"Sending: {prompt}")
        response = await client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()

async def main():
    prompts = [
        "What's the capital of France?",
        "Explain quantum computing.",
        "How does LangChain work?",
        "Summarize the Matrix movie.",
        "List 3 benefits of Python."
    ]

    semaphore = asyncio.Semaphore(2)  # Only 2 requests at once

    tasks = [
        call_llm(prompt, semaphore)
        for prompt in prompts
    ]

    results = await asyncio.gather(*tasks)

    for i, res in enumerate(results, 1):
        print(f"\nPrompt {i}:\n{res}")
    
    return



# if __name__ == "__main__":
#     asyncio.run(main())


async def wrapper():
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main())
    finally:
        loop.close()

await wrapper()

## Why This Matters in LLM Projects

In large applications (like LangChain chains with multiple tools or prompts):
* You often trigger many API calls in parallel.
* OpenAI may throttle or reject your calls.
* This pattern keeps your app polite and stable.

# Add error handling for rate limits and timeouts

### Final Working Version (Error Handling + Retry + Concurrency Limit)

In [None]:
import asyncio
import os
from openai import AsyncOpenAI, RateLimitError, APITimeoutError, APIConnectionError
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type


# Create the client and a semaphore
os.environ["OPENAI_API_KEY"] = "your-api-key-here"
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
semaphore = asyncio.Semaphore(2)


# Wrap the API call with @retry logic
@retry(
    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
    wait=wait_exponential(multiplier=1, min=2, max=10),
    stop=stop_after_attempt(3),
    reraise=True
)
async def safe_call_llm(prompt: str) -> str:
    response = await client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        timeout=30
    )
    return response.choices[0].message.content.strip()



# Add the semaphore and full error handling
async def call_llm(prompt: str, semaphore: asyncio.Semaphore) -> str:
    async with semaphore:
        print(f"Calling LLM for: {prompt}")
        try:
            response = await safe_call_llm(prompt)
            return response
        except Exception as e:
            print(f"❌ Failed to handle prompt '{prompt}': {type(e).__name__} - {e}")
            return "Error: Could not generate response."

async def main():
    prompts = [
        "What's the capital of France?",
        "Explain quantum computing.",
        "How does LangChain work?",
        "Summarize the Matrix movie.",
        "List 3 benefits of Python."
    ]

    tasks = [call_llm(p, semaphore) for p in prompts]
    results = await asyncio.gather(*tasks)

    for i, res in enumerate(results, 1):
        print(f"\nPrompt {i}:\n{res}")

    return




# if __name__ == "__main__":
#     asyncio.run(main())


async def wrapper():
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main())
    finally:
        loop.close()

await wrapper()


### Note:

| Technique                         | Why It's Important                        |
| --------------------------------- | ----------------------------------------- |
| `@retry` with `tenacity`          | Automatically retries on temporary errors |
| `async with semaphore`            | Limits how many requests are sent at once |
| `asyncio.gather()`                | Runs many LLM calls concurrently          |
| Exception handling (`try/except`) | Prevents app crashes on failure           |


# LangChain agent

Create a LangChain-based LLM agent or chain that:

* Sends multiple prompts or tool calls concurrently

* Limits concurrency using asyncio.Semaphore

* Retries failed calls with tenacity

In [None]:
import asyncio
import os
from langchain_openai import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import tool
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from openai import RateLimitError, APIConnectionError

os.environ["OPENAI_API_KEY"] = "your-api-key-here"

@tool
def get_temperature(city: str) -> str:
    import random
    return f"The temperature in {city} is {random.randint(15, 30)}°C"

llm = ChatOpenAI(model="gpt-4", temperature=0)
tools = [get_temperature]

agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

semaphore = asyncio.Semaphore(2)  # Max 2 concurrent agent calls

@retry(
    retry=retry_if_exception_type((RateLimitError, APIConnectionError)),
    wait=wait_exponential(multiplier=1, min=2, max=10),
    stop=stop_after_attempt(3),
    reraise=True
)
async def safe_call_chain(chain, input_text):
    async with semaphore:
        return await chain.ainvoke({"input": input_text})

async def main():
    prompts = [
        "What's the temperature in Berlin?",
        "What's the temperature in Tokyo?",
        "What's the temperature in San Francisco?",
        "What's the temperature in Delhi?"
    ]

    tasks = [safe_call_chain(agent, prompt) for prompt in prompts]
    results = await asyncio.gather(*tasks)

    for i, res in enumerate(results, 1):
        print(f"\nPrompt {i} result:\n{res['output']}")

if __name__ == "__main__":
    asyncio.run(main())
