In [5]:
!pip install pinecone
!pip install langchain_core
!pip install langchain_groq
!pip install langgraph
!pip install langchain



In [6]:
!pip install pyngrok
!pip install fastapi
!pip install uvicorn
!pip install requests
!pip install nest-asyncio



In [23]:
!pip install sentence-transformers



ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-18' coro=<Server.serve() done, defined at /usr/local/lib/python3.12/dist-packages/uvicorn/server.py:69> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/server.py", line 67, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run
    s

In [38]:
import os
import json
import requests
from typing import List, Dict, Any, TypedDict
from pinecone import Pinecone
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_groq import ChatGroq
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode
from langchain.tools import Tool
import warnings
warnings.filterwarnings('ignore')


In [39]:
from google.colab import userdata
PINECONE_API_KEY = userdata.get('PINECONE_KEY')
GROQ_API_KEY = userdata.get('GROQ_KEY')
OPENROUTER_API_KEY = userdata.get('OPENROUTER_KEY')

In [40]:
from sentence_transformers import SentenceTransformer

# load embedding model
embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [41]:
class PineconeRetriever:
    def __init__(self, api_key: str):
        self.pc = Pinecone(api_key=api_key)

    def retrieve_data(self, index_name: str, namespace: str, query: str, top_k: int = 5) -> List[Dict]:
        """Retrieve data from Pinecone index"""
        try:
            # Get index
            index = self.pc.Index(index_name)

            query_embedding = embed_model.encode(query).tolist()  # convert to list for Pinecone

            # For demo purposes, we'll create a simple embedding
            # In production, you'd use the same embedding model used for indexing
            # Here we'll use a simple query-based retrieval simulation

            # Query the index (assuming you have embeddings)
            # This is a placeholder - you'll need to generate proper embeddings

            print(f"Retrieving from the index {index_name} and namespace {namespace}")
            results = index.query(
                vector=query_embedding,
                top_k=top_k,
                include_metadata=True,
                namespace=namespace
            )

            retrieved_docs = []
            for match in results.matches:
                retrieved_docs.append({
                    'id': match.id,
                    'score': match.score,
                    'content': match.metadata.get('chunk_text', ''),
                    'file_name': match.metadata.get('file_name', ''),
                    'page': match.metadata.get('page', 'N/A'),
                    'topic': match.metadata.get('topic', ''),
                    'name': match.metadata.get('name', '')
                })

            return retrieved_docs

        except Exception as e:
            print(f"Error retrieving from Pinecone: {e}")
            return []

In [80]:
class MultiAgentLLMSystem:
    def __init__(self, groq_api_key: str, content_control: Dict):
        self.content_control = content_control

        # LLM 1: Summarizer - Condenses content with citations
        self.summarizer = ChatGroq(
            model="llama3-8b-8192",
            api_key=groq_api_key,
            temperature=0.1
        )

        # LLM 2: Reasoner - Deep analysis with citations
        self.reasoner = ChatGroq(
            model="llama3-70b-8192",
            api_key=groq_api_key,
            temperature=0.2
        )

        # LLM 3: Stylist - Final formatting with citations
        self.stylist = ChatGroq(
            model="llama3-8b-8192",
            api_key=groq_api_key,
            temperature=0.3
        )

        # Main Agent: Orchestrates everything
        self.main_agent = ChatGroq(
            model="llama3-70b-8192",
            api_key=groq_api_key,
            temperature=0.1
        )

    def create_citation(self, metadata: Dict) -> str:
        """Create standardized citation format"""
        file_name = metadata.get('file_name', 'Unknown')
        page = metadata.get('page', 'N/A')
        topic = metadata.get('topic', '')

        citation = f"[Source: {file_name}, Page: {int(page)}"
        if topic:
            citation += f", Topic: {topic}"
        citation += "]"

        return citation

    def llm1_summarizer(self, content: str, metadata: Dict, length_control: str) -> str:
        """LLM 1: Summarizer with length control"""
        citation = self.create_citation(metadata)

        length_instructions = {
            "short": "Summarize in exactly 1-2 sentences",
            "medium": "Summarize in exactly 3-4 sentences",
            "long": "Summarize in exactly 1 paragraph (5-6 sentences)"
        }

        length_instruction = length_instructions.get(length_control, length_instructions["short"])

        prompt = f"""
        You are LLM Agent 1 - Medical Content Summarizer.

        Task: {length_instruction} while preserving key medical information.

        CRITICAL RULES:
        1. {length_instruction}
        2. Always end with this exact citation: {citation}
        3. Focus on most important medical facts only
        4. Use clear, precise language

        Content to summarize:
        {content[:800]}  # Limit input content

        Summary with citation:
        """

        response = self.summarizer.invoke([HumanMessage(content=prompt)])
        # print("response 1: ", response.content)

        return response.content

    def llm2_reasoner(self, query: str, content: str, metadata: Dict, length_control: str) -> str:
        """LLM 2: Reasoner with deep analysis"""
        citation = self.create_citation(metadata)

        length_instructions = {
            "short": "Provide reasoning in exactly 2-3 sentences",
            "medium": "Provide reasoning in exactly 4-5 sentences",
            "long": "Provide reasoning in exactly 1-2 paragraphs"
        }

        length_instruction = length_instructions.get(length_control, length_instructions["short"])

        prompt = f"""
        You are LLM Agent 2 - Medical Reasoning Expert.

        Task: Analyze content and provide medical reasoning for the user's query.

        CRITICAL RULES:
        1. {length_instruction}
        2. Always end with this exact citation: {citation}
        3. Focus on WHY and HOW aspects
        4. Connect information to the user's specific query
        5. Must include Citation in the given format at the end

        User Query: {query}
        Content: {content[:800]}

        Medical reasoning with citation:
        """

        response = self.reasoner.invoke([HumanMessage(content=prompt)])
        # print("response 2: ", response.content)
        return response.content

    def llm3_stylist(self, content: str, metadata: Dict, length_control: str) -> str:
        """LLM 3: Stylist for final formatting"""
        citation = self.create_citation(metadata)

        length_instructions = {
            "short": "Format in exactly 2-3 clear, readable sentences",
            "medium": "Format in exactly 1 well-structured paragraph",
            "long": "Format in exactly 2 well-structured paragraphs"
        }

        length_instruction = length_instructions.get(length_control, length_instructions["short"])

        prompt = f"""
        You are LLM Agent 3 - Content Stylist.

        Task: Reformat content to be clear and professional.

        CRITICAL RULES:
        1. {length_instruction}
        2. Preserve all medical information
        3. Make text easy to read and understand
        4. Always end with this exact citation: {citation}

        Content to style:
        {content}

        Styled content with citation:
        """

        response = self.stylist.invoke([HumanMessage(content=prompt)])
        # print("response 3: ", response.content)

        return response.content

In [81]:
class State(TypedDict):
    messages: List[BaseMessage]
    retrieved_docs: List[Dict]
    llm1_outputs: List[str]
    llm2_outputs: List[str]
    llm3_outputs: List[str]
    final_response: str
    query: str
    length_control: str
    namespace: str

In [82]:
class AgenticAISystem:
    def __init__(self, pinecone_api_key: str, groq_api_key: str):
        self.pinecone_retriever = PineconeRetriever(pinecone_api_key)
        self.setup_graph()

    def setup_graph(self):
        """Setup LangGraph workflow"""

        def retrieve_node(state: State):
            """Node 1: Retrieve from Pinecone"""
            query = state["query"]
            retrieved_docs = self.pinecone_retriever.retrieve_data(
                index_name=state.get("index_name", "alphawell"),
                namespace=state.get("namespace", "__default__"),
                query=query,
                top_k=3  # Limit docs for content control
            )
            return {"retrieved_docs": retrieved_docs}

        def multi_llm_processing_node(state: State):
            """Node 2: Process through all 3 LLMs"""
            if not GROQ_API_KEY:
                return {"llm1_outputs": [], "llm2_outputs": [], "llm3_outputs": []}

            multi_llm = MultiAgentLLMSystem(GROQ_API_KEY, {})

            query = state["query"]
            length_control = state.get("length_control", "short")
            retrieved_docs = state.get("retrieved_docs", [])

            print("Retrieveed docs from Pinecone: ", retrieved_docs)

            llm1_outputs = []
            llm2_outputs = []
            llm3_outputs = []

            # Process each document through all 3 LLMs
            for doc in retrieved_docs[:2]:  # Limit to top 2 docs for control
                if doc.get('content'):
                    # LLM 1: Summarize
                    summary = multi_llm.llm1_summarizer(
                        doc['content'], doc, length_control
                    )
                    llm1_outputs.append(summary)

                    # LLM 2: Reason
                    reasoning = multi_llm.llm2_reasoner(
                        query, doc['content'], doc, length_control
                    )
                    llm2_outputs.append(reasoning)

                    # LLM 3: Style the combined content
                    combined = f"Summary: {summary}\nReasoning: {reasoning}"
                    styled = multi_llm.llm3_stylist(
                        combined, doc, length_control
                    )
                    llm3_outputs.append(styled)

            return {
                "llm1_outputs": llm1_outputs,
                "llm2_outputs": llm2_outputs,
                "llm3_outputs": llm3_outputs
            }

        def main_agent_node(state: State):
            """Node 3: Main Agent combines everything"""
            query = state["query"]

            # Check for general conversation
            medical_keywords = ['medicine', 'drug', 'dose', 'side effect', 'treatment', 'cure', 'symptom', 'warning']
            is_medical = any(keyword in query.lower() for keyword in medical_keywords)

            if not is_medical:
                main_llm = ChatGroq(model="llama3-8b-8192", api_key=GROQ_API_KEY, temperature=0.3)
                response = main_llm.invoke([
                    SystemMessage(content="You are a friendly AI assistant. Keep responses brief and natural."),
                    HumanMessage(content=query)
                ])
                return {"final_response": response.content}

            # Medical query - combine all LLM outputs
            llm3_outputs = state.get("llm3_outputs", [])

            if not llm3_outputs:
                return {"final_response": "I don't have specific information about that in my medical database. Please consult a healthcare professional."}

            length_control = state.get("length_control", "short")

            length_instructions = {
                "short": "Provide final answer in exactly 3-4 sentences total",
                "medium": "Provide final answer in exactly 1-2 paragraphs",
                "long": "Provide final answer in exactly 2-3 paragraphs"
            }

            length_instruction = length_instructions.get(length_control, length_instructions["short"])

            combined_content = "\n\n".join(llm3_outputs)

            prompt = f"""
            You are the Main AI Agent coordinating medical information.

            Task: Provide comprehensive answer based on processed information from 3 specialist LLMs.

            CRITICAL RULES:
            1. {length_instruction}
            2. Preserve ALL citations from the content
            3. Combine information logically
            4. Ensure medical accuracy
            5. Answer directly addresses user's query
            6. Strictly include Citation in the given format at the end

            User Query: {query}
            Processed Content from LLM Agents:
            {combined_content}

            Final comprehensive answer:
            Citation: [PDF: , Page No: , Topic: ]
            """

            main_llm = ChatGroq(model="llama3-70b-8192", api_key=GROQ_API_KEY, temperature=0.1)
            response = main_llm.invoke([HumanMessage(content=prompt)])

            # print("response 4: ", response.content)

            return {"final_response": response.content}

        # Build the graph
        builder = StateGraph(State)
        builder.add_node("retrieve", retrieve_node)
        builder.add_node("process_llms", multi_llm_processing_node)
        builder.add_node("main_agent", main_agent_node)

        # Add edges
        builder.add_edge(START, "retrieve")
        builder.add_edge("retrieve", "process_llms")
        builder.add_edge("process_llms", "main_agent")
        builder.add_edge("main_agent", END)

        self.graph = builder.compile()

    def process_query(self, index_name: str, namespace: str, query: str, length: str = "short") -> str:
        """Process query through multi-agent system"""
        initial_state = {
            "messages": [HumanMessage(content=query)],
            "retrieved_docs": [],
            "llm1_outputs": [],
            "llm2_outputs": [],
            "llm3_outputs": [],
            "final_response": "",
            "query": query,
            "length_control": length,
            "index_name": index_name,
            "namespace": namespace
        }

        final_state = self.graph.invoke(initial_state)
        return final_state.get("final_response", "Unable to process query.")

In [83]:
def agenticAI(index_name: str, namespace: str, query: str, length: str = "short") -> str:
    if not PINECONE_API_KEY:
        return "Please set PINECONE_API_KEY"
    if not GROQ_API_KEY:
        return "Please set GROQ_API_KEY"

    try:
        ai_system = AgenticAISystem(PINECONE_API_KEY, GROQ_API_KEY)
        # print("given: ", namespace)
        return ai_system.process_query(index_name, namespace, query, length)
    except Exception as e:
        return f"Error: {str(e)}"

In [84]:
print("Multi-Agent Agentic AI System Ready!")

result = agenticAI('alphawell', '94uo6WQUnIV9izlJrRBsXXX2025-08-26_18-55-24', "what is Hematological Reactions?", "large")
print(result)

Multi-Agent Agentic AI System Ready!
Retrieving from the index alphawell and namespace 94uo6WQUnIV9izlJrRBsXXX2025-08-26_18-55-24
Retrieveed docs from Pinecone:  [{'id': '94uo6WQUnIV9izlJrRBs_4de2c0cafb6470b55779', 'score': 0.441521734, 'content': 'medicine: highlights of prescribing information - topic: use in specific populations - context: (5.5) ] • hematological reactions [see', 'file_name': 'humira.pdf', 'page': 0.0, 'topic': 'use in specific populations', 'name': 'highlights of prescribing information'}, {'id': '94uo6WQUnIV9izlJrRBs_51835a2ad38dfb057976', 'score': 0.369448453, 'content': 'medicine: highlights of prescribing information - topic: other - context: rare reports of pancytopenia including aplastic anemia have been reported with tnf blocking agents. adverse reactions of the hematologic system, including medically significant cytopenia (e.g., thrombocytopenia, leukopenia) have been infrequently reported with humira. the causal relationship of these reports to humira rema

In [85]:
from fastapi import FastAPI, UploadFile, File, Form
from pydantic import BaseModel
import uvicorn
from pyngrok import ngrok
from fastapi.middleware.cors import CORSMiddleware
import nest_asyncio
# import pinecone
# from langchain.vectorstores import Pinecone

app = FastAPI()

origins=["*"]
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"]
)

class QueryRequest(BaseModel):
    request: str
    namespace: str

# Agentic AI
@app.post("/alpha_bot80")
async def respond(data: QueryRequest):
    print("User Query:", data.request)
    # print("Namespace:", data.namespace)
    answer = agenticAI('alphawell', data.namespace, data.request, "large")
    return {"answer": answer}

In [None]:
import os
from google.colab import userdata

ngrok_token = userdata.get('NGROK_AUTH_TOKEN')

!ngrok authtoken $ngrok_token

ngrok_tunnel = ngrok.connect(8000)
print("Public URL", ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL https://72f6d1313fc6.ngrok-free.app


INFO:     Started server process [26610]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     2401:4900:889f:3b70:2cb0:7f49:8e26:7dea:0 - "OPTIONS /alpha_bot80 HTTP/1.1" 200 OK
User Query: [user] hi i am gokul
[assistant] ⏳ Processing...
[user] 
Retrieving from the index alphawell and namespace P8OVDfZGhfAZzsIzOb57XXX2025-08-26_21-20-27
Retrieveed docs from Pinecone:  [{'id': 'P8OVDfZGhfAZzsIzOb57_69535e1276dc64eb6e5b', 'score': 0.12847425, 'content': 'medicine: highlights of prescribing information - topic: interactions - context: with dayvigo 8', 'file_name': 'davigo.pdf', 'page': 0.0, 'topic': 'interactions', 'name': 'highlights of prescribing information'}, {'id': 'P8OVDfZGhfAZzsIzOb57_2eeac71c3b6a1baf275e', 'score': 0.126646057, 'content': 'medicine: highlights of prescribing information - topic: other - context: distributed by: eisai inc. woodcliff lake, nj 07677 dayvigo tm is a trademark of eisai r&d management co., ltd. and is licensed to eisai inc.', 'file_name': 'davigo.pdf', 'page': 20.0, 'topic': 'other', 'name': 'highlights of prescribing information'}, {