In [71]:
from typing import TypedDict, List, Annotated, Union
from langgraph.graph.message import add_messages
from google import genai
import dotenv, os
from langgraph.graph import StateGraph, END, START
from model.db import chroma_client
from google.genai import types
import requests, json

dotenv.load_dotenv()

True

In [72]:
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
MODEL = "gemini-2.0-flash"

In [73]:
class SessionState(TypedDict):
    id: str ## unique identifier for the session as well as collection name
    session_name: str ## name of the session
    
    files: List[dict]
    messages: Annotated[List[dict], add_messages]
    
    invoke: str

In [130]:
def interface_agent(state: SessionState):
    print("state", state["invoke"])
    if state.get("invoke") == "message":
        state["invoke"] = "classify"
        return {
            "next_nodes": ["user_message_classifier"],  # Specify the next node(s)
            "state": state  # Return the updated state
        }
    
    # return { **state, "invoke": state.get("invoke") }
    if state.get("invoke") == "done" or state.get("invoke") == "ExtractData":
        return {
            "next_nodes": [END],  # Indicate the end of the graph
            "state": state  # Return the updated state
        }
        

In [None]:
def user_message_classifier(state: SessionState):
    system_prompt = '''Role: You are a classifier agent in an agentic AI system. Your task is to analyze user queries and determine the correct action the system should take. You must classify the intent of each user message into one of the following categories:

    🔹 Classification Categories:
    UpdateContext
    The user wants to store information in the agent's internal state for future reference.
    ➤ Examples:

    "The client's name is John."

    "We're organizing a conference on June 5th."

    "Add this to our team knowledge."

    SuggestOrCreateSchema
    The user is requesting help designing a schema or structured format to extract information.
    ➤ Examples:

    "Can you create a schema for extracting details from job applications?"

    "Suggest a structure to capture meeting notes."

    "What format should I use to store bug report info?"

    ExtractData
    The user provides a schema and wants you to apply it to extract structured data from text.
    design the shcema in the form of json string based on the user request
    ➤ Examples:

    "Here's a schema: {name, date, location}. Extract this from the paragraph below."

    "Use this format and pull details from the email."
    
    "extract data with fileds including title, name and age"

    "Apply this structure: {title, author, summary} to the following.
    
    ## Output Format:
Return your response as a JSON object with the following fields:

{
  "classification": "UpdateContext" | "SuggestOrCreateSchema" | "UseProvidedSchema",
  "reason": "Brief explanation to use it as a prompt for the next agent",
  "schema": "The schema to be used for the next agent(used in ExtractData)",
  "update_context": "The context to be updated in the agent's internal state(used in UpdateContext)",
}
'''

    # response = requests.post(
    #     "http://147.93.29.19:9876/searchData",
    #     json={
    #         "n_results": 1,
    #         "text": state["messages"][-1].content,
    #         "collection_id": state["id"]
    #     }
    # )
    
    
    query = state["messages"][-1].content
    
    response = client.models.generate_content(
        model=MODEL,
        config=types.GenerateContentConfig(system_instruction=system_prompt),
        contents=query
    )
    
    
    response = response.text
    response = json.loads(response[7:-3])
    val = response["classification"]
    state["invoke"] = val
    
    return state

In [None]:
def schema_definer(state: SessionState):
    system_prompt = '''Role:
You are a Schema Design Agent operating within an agentic AI flow. Your primary responsibility is to analyze markdown-formatted data parsed from diverse file types (e.g., CSVs, PDFs, JSON, PowerPoint slides, and others), understand its structure and semantics, and produce a well-formed structured schema that accurately represents the data model.

Objective:
From the given markdown content, extract entities, fields, relationships, and data types. Then output a structured schema in a clean, standardized format (preferably JSON Schema, Prisma model, or any custom internal format as instructed).

Instructions:
Parse for structure:
Understand the data's tabular, hierarchical, or semantic layout. Recognize headings, bullet lists, tables, and key-value formats in the markdown.

Identify Entities and Fields:

Identify all possible entities (tables/objects/classes).

For each entity, detect relevant fields/attributes, along with their data types and descriptions (if implied).

Include primary keys, foreign keys, or any obvious relationships.

Infer Data Types:
Infer primitive types (string, number, boolean, date, enum, object, array) from the content or examples.

Preserve Naming Semantics:
Use semantic, human-readable names where available. Normalize naming conventions (e.g., camelCase, snake_case) depending on the final output format.

Output Format:
Return the schema in a structured format (e.g., JSON Schema, Prisma schema, or custom format as specified in the query context).

Handle Ambiguities Gracefully:
When uncertain, make an informed guess and add comments or flags for human review.

No assumptions from external context:
Only rely on what's present in the provided markdown. Don't hallucinate fields or metadata not evident in the data.

Example Inputs:
Tables from parsed CSVs embedded in markdown

Bullet points representing properties of a system (from PDFs)

Nested key-value blocks (from JSON)

Slide notes formatted as markdown (from PowerPoint)

Example Output:
{
  "entities": [
    {
      "name": "User",
      "fields": [
        {"name": "id", "type": "string", "description": "Unique identifier"},
        {"name": "email", "type": "string"},
        {"name": "signupDate", "type": "date"}
      ]
    },
    {
      "name": "Order",
      "fields": [
        {"name": "orderId", "type": "string"},
        {"name": "userId", "type": "string", "relation": "User.id"},
        {"name": "amount", "type": "number"}
      ]
    }
  ]
}'''

In [132]:
graph_builder = StateGraph(SessionState)

In [133]:
graph_builder.add_node(
    "interface_agent", interface_agent
)
graph_builder.add_node(
    "user_message_classifier", user_message_classifier
)

graph_builder.set_entry_point("interface_agent")
graph_builder.add_conditional_edges(
    "interface_agent",
    lambda state: interface_agent(state)["next_nodes"],  # Extract next_nodes
    {
        "user_message_classifier": "user_message_classifier",
        END: END
    },
)
graph_builder.add_edge("user_message_classifier", "interface_agent")

<langgraph.graph.state.StateGraph at 0x73127e6cf370>

In [134]:
graph = graph_builder.compile()

In [None]:
sample_input = {
    "id": "33dc47d9b2ed42f4b769c3d225ea2d4c",
    "session_name": "Test Session",
    "files": [],
    "messages": [
        # {"role": "user", "content": "suggest me a schema to extract information from the given data"},
        # {'role': 'user', 'content': 'Can you add a new event with name "Annual Meeting", date "2023-10-15", and location "New York"?'},
                # {"role": "user", "content": "extract the event details from the given data, including the event name, date, and location."},
    ],
    "invoke": "message"
}

In [136]:
output = graph.invoke(sample_input)

state message
state message
state ExtractData
state ExtractData


In [None]:
for event in output:
    print(event)

id
session_name
files
messages
invoke
