In [None]:
# --- Cell 1: Install dependencies ---
%pip install langroid fastapi uvicorn pydantic[dotenv] langchain faiss-cpu


# Part 1 Agentic AI LLMs

## Example 1: Basic OpenAIGPTConfig

In [None]:

# --- Cell 2: Single Langroid Agent (Hello World) ---
import langroid as lr
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.language_models.openai_gpt import OpenAIGPTConfig

# configure the model (requires OPENAI_API_KEY in env or compatible endpoint)
# llm_cfg = OpenAIGPTConfig(chat_model="gpt-3.5-turbo", temperature=0.0) # chosose model here

# if using local model, e.g., Llama2, use:
# from langroid.language_models.local_llm import LocalLLMConfig 
# llm_cfg = LocalLLMConfig(model_path="/path/to/llama2/model", temperature=0.0)

# if using LM studio, use:
from langroid.language_models.openai_gpt import OpenAIGPTConfig

llm_cfg = OpenAIGPTConfig(
    chat_model="local-llm", 
    temperature=0.0,
    api_base="http://localhost:1234/v1",
    api_key="not-needed"   # dummy value, required by client but ignored by LM Studio
)


cfg = ChatAgentConfig(name="qa-agent", llm=llm_cfg)
agent = lr.agent.chat_agent.ChatAgent(cfg)

# Example query
response = agent.llm_response("Hello, what is an AI agent?")
print("Agent:", response.content)


## Example 1 Opional: Basic Chat with Assistant API
Need openAPI Key

In [None]:
from langroid.agent.openai_assistant import (
    OpenAIAssistantConfig,
    OpenAIAssistant,
    AssistantTool,
)
from langroid.language_models.openai_gpt import OpenAIGPTConfig, OpenAIChatModel

cfg = OpenAIAssistantConfig(
    llm = OpenAIGPTConfig(chat_model=OpenAIChatModel.GPT4_TURBO)
)
agent = OpenAIAssistant(cfg)

response = agent.llm_response("What is the square of 3?")
print("Agent:", response.content)


## Example 2: Wrap Agent in a Task, run it
An OpenAIAssistant agent has various capabilities (LLM responses, agent methods/tools, etc) but there is no mechanism to iterate over these capabilities or with a human or with other agents. This is where the Task comes in: Wrapping this agent in a Task allows you to run interactive loops with a user or other agents (you will see more examples below).

In [None]:
from langroid.agent.task import Task

task = Task(
    agent,
    system_message="""User will give you a word,
      return its antonym if possible, else say DO-NOT-KNOW.
      Be concise!",
      """,
    single_round=True
)
result = task.run("ignorant")

## Example 2 Optional: OpenAIAssistant Agent + Task with Code Interpreter
Here we attach the "code_interpreter" tool (from the OpenAI Assistant API) to the agent defined above, and run it in a task.
Need openAPI Key

In [None]:
agent.add_assistant_tools([AssistantTool(type="code_interpreter")])
task = Task(agent, interactive=False, single_round=True)
result = task.run("What is the 10th Fibonacci number, if you start with 1,2?")

# Example 3: RAG
Attach a file (a lease document) and the "retrieval" tool, and ask questions about the document.

In [None]:
from langroid.language_models.openai_gpt import OpenAIGPTConfig, OpenAIGPT
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings  # can be replaced with local embeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings


import requests
file_url = "https://raw.githubusercontent.com/langroid/langroid-examples/main/examples/docqa/lease.txt"
response = requests.get(file_url)
with open('lease.txt', 'wb') as file:
    file.write(response.content)
    
# 1️⃣ Load document
with open("lease.txt", "r", encoding="utf-8") as f:
    text = f.read()

# 2️⃣ Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
chunks = text_splitter.split_text(text)
docs = [Document(page_content=chunk) for chunk in chunks]


# ✅ Initialize HuggingFaceEmbeddings using model_name
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"}  # or "cuda" for GPU
)

vectordb = FAISS.from_documents(docs, embeddings)

# 4️⃣ Setup local LLM
llm_cfg = OpenAIGPTConfig(
    chat_model="local-llm",
    temperature=0.0,
    api_base="http://localhost:1234/v1",
    api_key="not-needed"
)
llm = OpenAIGPT(llm_cfg)

# 5️⃣ Retrieval function
def retrieve_answer(query):
    results = vectordb.similarity_search(query, k=3)
    context = "\n".join([doc.page_content for doc in results])
    prompt = f"Answer the question based on the following document:\n{context}\n\nQuestion: {query}"
    response = llm.chat(prompt)
    return response

# 6️⃣ Ask the question
answer = retrieve_answer("What is the start date of the lease?")
print(answer)


You can also use local embeddings or embeddings throught http request 

In [None]:
import requests
import json

url = "http://127.0.0.1:1234/v1/embeddings"
headers = {"Content-Type": "application/json"}
data = {
    "model": "text-embedding-nomic-embed-text-v1.5",
    "input": "Hello LM Studio!"
}

response = requests.post(url, headers=headers, data=json.dumps(data))

if response.status_code == 200:
    print("Embedding response:", response.json())
else:
    print("Error:", response.status_code, response.text)

## Example 3 - Optional : using OpenAI AIAsisstant
OpenAIAssistant with Retrieval
Need openAPI Key

In [None]:
import requests
file_url = "https://raw.githubusercontent.com/langroid/langroid-examples/main/examples/docqa/lease.txt"
response = requests.get(file_url)
with open('lease.txt', 'wb') as file:
    file.write(response.content)

# verify
#with open('lease.txt', 'r') as file:
#   print(file.read())

# now create agent, add retrieval tool and file
agent = OpenAIAssistant(cfg)
agent.add_assistant_tools([AssistantTool(type="retrieval")])
agent.add_assistant_files(["lease.txt"])
response = agent.llm_response("What is the start date of the lease?")


## Example 4:  Task: Custom Function-calling
You can define your own custom function (or ToolMessage in Langroid terminology), enable the agent to use it, and have a special method to handle the message when the LLM emits such a message.

In [None]:
from langroid.language_models.openai_gpt import OpenAIGPTConfig, OpenAIGPT
import re

# Setup local LLM
llm_cfg = OpenAIGPTConfig(
    chat_model="local-llm",
    temperature=0.0,
    api_base="http://localhost:1234/v1",
    api_key="not-needed"
)
llm = OpenAIGPT(llm_cfg)

# Define tools
def square_tool(num: int) -> str:
    return str(num ** 2)

def cube_tool(num: int) -> str:
    return str(num ** 3)

tools = {
    "square": square_tool,
    "cube": cube_tool
}

# Function to ask LLM which tool to use
def agent_response(user_input: str) -> str:
    # Step 1: LLM decides which tool and input
    prompt = f"""
You are an assistant. The user asks: "{user_input}".
You have two tools available:
- square(number): returns the square of a number
- cube(number): returns the cube of a number

Respond ONLY like this:
TOOL: <tool_name>, INPUT: <number>
"""
    llm_reply = llm.chat(prompt).message.strip()

    # Step 2: Parse tool name and input number
    tool_match = re.search(r"TOOL:\s*(\w+)", llm_reply)
    num_match = re.search(r"INPUT:\s*(\d+)", llm_reply)

    if tool_match and num_match:
        tool_name = tool_match.group(1)
        num = int(num_match.group(1))

        # Call the selected tool
        if tool_name in tools:
            result = tools[tool_name](num)
            return f"LLM selected tool: {tool_name}\nDONE: The result is {result}"
        else:
            return f"Unknown tool selected: {tool_name}"
    else:
        return "Could not determine the tool or input."

# Run examples
user_input1 = "What is the square of 7?"
response1 = agent_response(user_input1)
print(response1)

user_input2 = "What is the cube of 3?"
response2 = agent_response(user_input2)
print(response2)


## Example 4 Optional: OpenAIAsssistant + Task: Custom Function-calling


In [None]:
# Define your own function for the LLM to call;
# this function will be executed by the Langroid agent as part of the task loop

class SquareTool(ToolMessage):
    request = "square"
    purpose = "To find the square of a number "
    num: int

    def handle(self) -> str:
        return str(self.num ** 2)

# create agent, add tool to agent
cfg = OpenAIAssistantConfig(
    llm=OpenAIGPTConfig(chat_model=OpenAIChatModel.GPT4_TURBO),
    name="NumberExpert",
)
agent = OpenAIAssistant(cfg)
agent.enable_message(SquareTool)
task = Task(
    agent,
    system_message="""
    User will ask you to square a number.
    You do NOT know how, so you will use the
    `square` function to find the answer.
    When you get the answer say DONE and show it.
    """,
    interactive=False,
)
response = task.run("What is the square of 5?")

# Part 2 Wrap Agent with API

In [None]:
# --- Cell 4: Wrap with FastAPI (run in background) ---
import nest_asyncio
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel
import asyncio

nest_asyncio.apply()  # allows uvicorn to run inside notebook

app = FastAPI(title="Agentic AI Demo")

class Query(BaseModel):
    question: str

# --- Updated RetrievalAgent with logging ---
class RetrievalAgent:
    def __init__(self, llm, vectordb):
        self.llm = llm
        self.vectordb = vectordb

    def llm_response(self, query: str):
        print("[STATUS] Received query:", query)

        # 1️⃣ Retrieve relevant chunks
        print("[STATUS] Running vector search...")
        results = self.vectordb.similarity_search(query, k=3)
        print(f"[STATUS] Found {len(results)} relevant chunks")

        context = "\n".join([doc.page_content for doc in results])
        print("[STATUS] Preparing LLM prompt...")

        # 2️⃣ Send to local LLM
        prompt = f"Answer the question based on the following document:\n{context}\n\nQuestion: {query}"
        print("[STATUS] Sending prompt to LLM...")
        response = self.llm.chat(prompt)
        print("[STATUS] Received response from LLM")

        return response


# Instantiate retrieval_agent
retrieval_agent = RetrievalAgent(llm, vectordb)

from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI(title="Agentic AI Demo")

class Query(BaseModel):
    question: str

# --- Health check endpoint ---
@app.get("/health")
def health_check():
    # You can add extra info if you like, e.g., number of documents
    num_docs = len(vectordb.docstore._dict) if hasattr(vectordb.docstore, "_dict") else "unknown"
    return {"status": "ok", "num_docs_in_vectordb": num_docs}

# --- Ask endpoint ---
@app.post("/ask")
def ask_agent(q: Query):
    try:
        print("[FASTAPI] Received request:", q.question)
        response = retrieval_agent.llm_response(q.question)

        # Use the correct attribute for LLM response text
        answer = getattr(response, "content", None) or getattr(response, "message", None)
        if not answer:
            answer = str(response)  # fallback to string conversion

        print("[FASTAPI] Returning response")
        return {"answer": answer}

    except Exception as e:
        print("[FASTAPI] Error:", e)
        return {"error": str(e)}

# --- Run Uvicorn in background ---
import threading
import uvicorn

def run_app():
    uvicorn.run(app, host="127.0.0.1", port=8000)

thread = threading.Thread(target=run_app, daemon=True)
thread.start()

print("FastAPI server started on http://127.0.0.1:8000")
print("Health check available at http://127.0.0.1:8000/health")



In [None]:
# --- Cell 5: Test FastAPI endpoint ---
import requests

# Health check
res = requests.get("http://127.0.0.1:8000/health")
print(res.json())


# ask question
payload = {"question": "What info is given in the how the lease would end?"}

try:
    # Add timeout to avoid hanging indefinitely
    res = requests.post("http://127.0.0.1:8000/ask", json=payload, timeout=300)
    
    print("Status code:", res.status_code)
    print("Raw response text:", res.text)  # debug output
    
    # Try parsing JSON only if response is not empty
    if res.text:
        print(res.json())
    else:
        print("Empty response received")
except requests.exceptions.Timeout:
    print("Request timed out. The LLM may be taking too long to respond.")
except requests.exceptions.RequestException as e:
    print("Request failed:", e)


In [None]:

# clear session
%reset -f   
  