In [38]:
from dotenv import find_dotenv,load_dotenv
from pprint import pprint
load_dotenv(find_dotenv(), override=True)
import os

os.environ["LANGCHAIN_PROJECT"]="raglanggraph"

In [39]:
from langchain_community.vectorstores import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
import pandas as pd
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain_community import embeddings

columns_to_embed = ["Number", " Incident Description", " Notes"]
columns_to_metadata = [
    "Number",
    " Incident State",
    " Active",
    " Reassignment Count",
    " Reopen Count",
    " Sys Mod Count",
    " Made SLA",
    " Caller ID",
    " Opened By",
    " Opened At",
    " Sys Created By",
    " Sys Created At",
    " Sys Updated By",
    " Sys Updated At",
    " Contact Type",
    " Location",
    " Category",
    " Subcategory",
    " U Symptom",
    " CMDB CI",
    " Impact",
    " Urgency",
    " Priority",
    " Assignment Group",
    " Assigned To",
    " Knowledge",
    " U Priority Confirmation",
    " Notify",
    " Problem ID",
    " RFC",
    " Vendor",
    " Caused By",
    " Closed Code",
    " Resolved By",
    " Resolved At",
]

metadata_field_info = [
    AttributeInfo(
        name="Number",
        description="Identifier of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Incident State",
        description="State of the incident (e.g., open, closed)",
        type="string",
    ),
    AttributeInfo(
        name="Active",
        description="Indicator of whether the incident is active or not",
        type="string",
    ),
    AttributeInfo(
        name="Reassignment Count",
        description="Number of times the incident has been reassigned",
        type="integer",
    ),
    AttributeInfo(
        name="Reopen Count",
        description="Number of times the incident has been reopened",
        type="integer",
    ),
    AttributeInfo(
        name="Sys Mod Count",
        description="System modification count",
        type="integer",
    ),
    AttributeInfo(
        name="Made SLA",
        description="Indicator of whether the incident was made SLA (Service Level Agreement)",
        type="string",
    ),
    AttributeInfo(
        name="Caller ID",
        description="Identifier of the caller",
        type="string",
    ),
    AttributeInfo(
        name="Opened By",
        description="Identifier of the user who opened the incident",
        type="string",
    ),
    AttributeInfo(
        name="Opened At",
        description="Timestamp when the incident was opened",
        type="datetime",
    ),
    AttributeInfo(
        name="Sys Created By",
        description="Identifier of the system user who created the incident",
        type="string",
    ),
    AttributeInfo(
        name="Sys Created At",
        description="Timestamp when the incident was created by the system",
        type="datetime",
    ),
    AttributeInfo(
        name="Sys Updated By",
        description="Identifier of the system user who last updated the incident",
        type="string",
    ),
    AttributeInfo(
        name="Sys Updated At",
        description="Timestamp when the incident was last updated by the system",
        type="datetime",
    ),
    AttributeInfo(
        name="Contact Type",
        description="Type of contact for the incident (e.g., email, phone)",
        type="string",
    ),
    AttributeInfo(
        name="Location",
        description="Location of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Category",
        description="Category of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Subcategory",
        description="Subcategory of the incident",
        type="string",
    ),
    AttributeInfo(
        name="U Symptom",
        description="Symptom of the incident",
        type="string",
    ),
    AttributeInfo(
        name="CMDB CI",
        description="Configuration Management Database CI (Configuration Item)",
        type="string",
    ),
    AttributeInfo(
        name="Impact",
        description="Impact of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Urgency",
        description="Urgency of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Priority",
        description="Priority of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Assignment Group",
        description="Group assigned to handle the incident",
        type="string",
    ),
    AttributeInfo(
        name="Assigned To",
        description="Person assigned to handle the incident",
        type="string",
    ),
    AttributeInfo(
        name="Knowledge",
        description="Indicator of whether the incident has knowledge",
        type="string",
    ),
    AttributeInfo(
        name="U Priority Confirmation",
        description="Confirmation of the priority of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Notify",
        description="Notification details for the incident",
        type="string",
    ),
    AttributeInfo(
        name="Problem ID",
        description="Identifier of the problem associated with the incident",
        type="string",
    ),
    AttributeInfo(
        name="RFC",
        description="Request for Change associated with the incident",
        type="string",
    ),
    AttributeInfo(
        name="Vendor",
        description="Vendor associated with the incident",
        type="string",
    ),
    AttributeInfo(
        name="Caused By",
        description="Cause of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Closed Code",
        description="Code indicating the closure of the incident",
        type="string",
    ),
    AttributeInfo(
        name="Resolved By",
        description="Person who resolved the incident",
        type="string",
    ),
    AttributeInfo(
        name="Resolved At",
        description="Timestamp when the incident was resolved",
        type="datetime",
    ),
]

document_content_description = "IT Incidents info"

docs = []
df = pd.read_csv("test.csv", sep=";")
for index, row in df.iterrows():
    to_metadata = {col: row[col] for col in columns_to_metadata if col in row}
    values_to_embed = {k: row[k] for k in columns_to_embed if k in row}
    to_embed = "\n".join(
        f"{k.strip()}: {v.strip()}" for k, v in values_to_embed.items()
    )
    newDoc = Document(page_content=to_embed, metadata=to_metadata)
    docs.append(newDoc)

splitter = CharacterTextSplitter(
    separator="\n", chunk_size=1000, chunk_overlap=0, length_function=len
)

all_splits = splitter.split_documents(docs)
embedding = embeddings.OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding)

In [40]:
from langchain_openai import ChatOpenAI
from pandasai.llm import OpenAI
import os

llm = ChatOpenAI(model="gpt-3.5-turbo-1106")

llmPandasAI = OpenAI(api_token=os.environ["OPENAI_API_KEY"])

In [41]:
from typing import Annotated, List, Tuple, Union
from langchain.tools import BaseTool, StructuredTool, Tool
from langchain_experimental.tools import PythonREPLTool
from langchain_core.tools import tool
from pandasai import SmartDataframe
import pandas as pd

In [42]:
from pandasai import Agent
from typing import Any
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import StructuredTool

class retrieverInput(BaseModel):
    input: str = Field(description="Input string to search and retrieve documents")
    k: int = Field(description = "This value is to fetch only top K documents")
    
def retrieve(input: str, k:int)->list:
    retriever = vectorstore.as_retriever(search_kwargs={'k':k})
    docs = retriever.get_relevant_documents(input,)
    return docs
    
retrieverDoc = StructuredTool.from_function(
    func=retrieve,
    name="retriever",
    description="Retrieves relevant information about a particular incident, if there is no relevant document then increase k value",
    args_schema=retrieverInput,
    return_direct=False
)



class dataAnalystInput(BaseModel):
    input: str = Field(description="Input string for data analyst tool to answer")
def analyst(input: str)->Any:
    df1 = SmartDataframe(df, config={"llm":llmPandasAI})
    output = df1.chat(input)
    return output
    
dataAnalyst = StructuredTool.from_function(
    func=analyst,
    name="dataAnalyst",
    description="This tool answers the question for user input using pandas in the background and returns response, if user asks for any question which required full overview, data transformation then use this. Example: Give me total count of data. Answer: Total incident count is 100",
    args_schema=dataAnalystInput,
    return_direct=True,
)



tools = [retrieverDoc, dataAnalyst]

from langchain.tools.render import format_tool_to_openai_function


from langgraph.prebuilt.tool_executor import ToolExecutor

tool_executor = ToolExecutor(tools)
functions = [format_tool_to_openai_function(t) for t in tools]
llm = llm.bind_functions(functions)

In [43]:
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

def create_agent(llm: ChatOpenAI, tools: list, system_prompt: str):
    # Each worker node will be given a name and some tools.
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                system_prompt,
            ),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_tools_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools)
    return executor

In [44]:
def agent_node(state,agent,name):
    result = agent.invoke(state)
    return {"messages":[HumanMessage(content=result["output"],name=name)]}

In [45]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage


class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [46]:
from langchain_core.agents import AgentFinish
from langgraph.prebuilt import ToolInvocation
import json
from langchain_core.messages import FunctionMessage

def supervisor(state):
    messages = state["messages"]
    response = llm.invoke(messages)
    return {"messages": [response]}

def should_continue(state):
    messages = state["messages"]
    last_message = messages[-1]
    
    if("function_call" not in last_message.additional_kwargs):
        return "end"
    else:
        return "continue"
def call_tool(state):
    messages = state['messages']
    last_message = messages[-1]
    print(last_message)
    action = ToolInvocation(
        tool=last_message.additional_kwargs["function_call"]["name"],
        tool_input=json.loads(last_message.additional_kwargs["function_call"]["arguments"]),
    )
    print(f"The agent action is {action}")
    # We call the tool_executor and get back a response
    response = tool_executor.invoke(action)
    print(f"The tool result is: {response}")
    # We use the response to create a FunctionMessage
    function_message = FunctionMessage(content=str(response), name=action.tool)
    # We return a list, because this will get added to the existing list
    return {"messages": [function_message]}

In [47]:
from langgraph.graph import END, StateGraph

workflow = StateGraph(AgentState)


workflow.add_node("supervisor",supervisor)
workflow.add_node("call_tool", call_tool)

workflow.set_entry_point("supervisor")

workflow.add_conditional_edges(
    "supervisor",
    should_continue,
    {
        "continue": "call_tool",
        "end": END
    }
)

workflow.add_edge("call_tool","supervisor")

app = workflow.compile()

In [49]:
from langchain_core.messages import SystemMessage

config = {"recursion_limit": 20}
final_response = app.invoke(
    {
        "messages": [
            HumanMessage(content="You are a helpful assistant, use retriever tool to retrieve information about a incident and use data analyst tool to do basic data analysis based on user input"),
            HumanMessage(content="Tell me about this incident INC0000045")
        ]
    }, config=config
)

content='' additional_kwargs={'function_call': {'arguments': '{"input":"INC0000045","k":1}', 'name': 'retriever'}} response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 182, 'total_tokens': 204}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_77a673219d', 'finish_reason': 'function_call', 'logprobs': None} id='run-fc6c00e9-3d97-40ee-861e-99eeac9de49e-0'
The agent action is tool='retriever' tool_input={'input': 'INC0000045', 'k': 1}
The tool result is: [Document(page_content='Number: INC0000057\nIncident Description: Imaginary IT Incident: Email Delivery Delay""\nNotes: Caller 4416 reported delays in email delivery. The issue was identified as a misconfigured mail server setting by Resolver 6. Upon reconfiguration  normal email delivery was restored.', metadata={' Active': ' false', ' Assigned To': ' Resolver 6', ' Assignment Group': ' Group 70', ' CMDB CI': ' ?', ' Caller ID': ' Caller 4416', ' Category': ' Category 20', ' Caused By': ' ?', ' Close