<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/openaifunction_constructing_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain neo4j openai wikipedia tiktoken

In [None]:
pip install openai --upgrade

In [None]:
pip install llamaindex

## Building Retrieval Augmented Systems



In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
username = os.getenv('NEO4J_USERNAME')
password = os.getenv('NEO4J_PASSWORD')
url = os.getenv('NEO4J_URI')
database = "neo4j"

In [None]:
import logging
import sys
from llama_index.llms import OpenAI
from llama_index import ServiceContext
from llama_index import (
    KnowledgeGraphIndex,
    LLMPredictor,
    ServiceContext,
    SimpleDirectoryReader,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import Neo4jGraphStore
from IPython.display import Markdown, display

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# define LLM
llm = OpenAI(temperature=0.2, model="gpt-3.5-turbo-16k-0613") #gpt-4-0613 (or) #gpt-4-1106-preview
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1000)

In [None]:
graph_store = Neo4jGraphStore(
    username=username,
    password=password,
    url=url,
    database=database,
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [None]:
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.embeddings.openai import OpenAIEmbeddings

os.environ['OPENAI_API_KEY'] = ""

vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    url=url,
    username=username,
    password=password,
    index_name='tasks',
    node_label="Task",
    text_node_properties=['name', 'description', 'status'],
    embedding_node_property='embedding',
)

In [None]:
from llama_index import load_index_from_storage

storage_context = StorageContext.from_defaults(persist_dir='./FinanceKG', graph_store=graph_store)

kg_index = load_index_from_storage(
    storage_context=storage_context,
    index_id="807b9792-2357-4cdd-84dc-8bb39d1e7e39",
    max_triplets_per_chunk=8,
    service_context=service_context,
    include_embeddings=True,
)


In [None]:
query_engine = kg_index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=12,)

In [None]:
query_engine = index.as_query_engine(streaming=True)
streaming_response = query_engine.query("How can you analyze assets")
streaming_response.print_response_stream()

In [None]:
from llama_index.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=1000)

chat_engine = kg_index.as_chat_engine(
    chat_mode="openai",
    memory=memory,
    verbose=True,
    system_prompt="You are an AI chatbot with a specific focus on the Reactive Insight application and corporate finance. Your knowledge is strictly derived from a comprehensive Knowledge Graph. You are not permitted to provide any information that is not contained within the Knowledge Graph. Ensure all responses are relevant to these topics, detailed, and accurate. Remember, you are here to facilitate understanding, so be as pedagogical as possible. Also, propose any other relevant questions that can be deduced from the user's query. If a query cannot be answered with the information in the Knowledge Graph, politely inform the user that the information is not available."
)

In [None]:
response = chat_engine.chat("What is the Super priorization algorithm? ", function_call="query_engine_tool")
display(Markdown(f"<b>{response}</b>"))

In [None]:
from llama_index.indices.knowledge_graph import KnowledgeGraphRAGRetriever
from llama_index.agent import OpenAIAgent
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.llms import OpenAI
from llama_index.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=1000)

# Your knowledge graphs
my_kgs = {'kg1': kg_index, 'kg2': kg_index2}

# Dictionary to store the agents
kg_agents = {}

# List to store the tools
kg_tools = []

for kg_name, kg in my_kgs.items():
    # Create a query engine for the KG
    query_engine = kg.as_query_engine(
        include_text=True,
        response_mode="tree_summarize",
        embedding_mode="hybrid",
        similarity_top_k=20,
    )
    
    # Create a tool for the query engine
    tool = QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name=f"tool_{kg_name}",
            description=f"Useful for questions related to {kg_name}",
        ),
    )
    
    # Add the tool to the list of KG tools
    kg_tools.append(tool)
    
    # Create an agent for the tool
    agent = OpenAIAgent.from_tools([tool], system_prompt="Walk me through this context in manageable parts step by step, summarizing and analyzing as we go.")
    
    # Add the agent to the dictionary of KG agents
    kg_agents[kg_name] = agent

# Create the super agent
llm = OpenAI(model="gpt-3.5-turbo-1106")
super_agent = OpenAIAgent.from_tools(kg_tools, llm=llm, verbose=True, memory=memory, system_prompt="Therefore, the answer.")

In [None]:
from llama_index.indices.knowledge_graph import KnowledgeGraphRAGRetriever
from llama_index.agent import OpenAIAgent
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.llms import OpenAI
from llama_index.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=1000)

# Your knowledge graphs
my_kgs = {'kg1': kg_index, 'kg2': kg_index2}

# Dictionary to store the agents
kg_agents = {}

# List to store the tools
kg_tools = []

for kg_name, kg in my_kgs.items():
    # Create a query engine for the KG
    query_engine = kg.as_query_engine(
        include_text=True,
        response_mode="tree_summarize",
        embedding_mode="hybrid",
        similarity_top_k=20,
    )
    
    # Create a tool for the query engine
    tool = QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name=f"tool_{kg_name}",
            description=f"Useful for questions related to {kg_name}",
        ),
    )
    
    # Add the tool to the list of KG tools
    kg_tools.append(tool)
    
    # Create an agent for the tool
    agent = OpenAIAgent.from_tools([tool], system_prompt="Walk me through this context in manageable parts step by step, summarizing and analyzing as we go.")
    
    # Add the agent to the dictionary of KG agents
    kg_agents[kg_name] = agent

# Create the super agent
llm = OpenAI(model="gpt-3.5-turbo-1106")
super_agent = OpenAIAgent.from_tools(kg_tools, llm=llm, verbose=True, memory=memory, system_prompt="""Query: [Question]?
Knowledge Source: [Documents/data used to answer question]  

Structured Knowledge Evidence:
- (Subject1, Relation1, Object1) 
- (Subject2, Relation2, Object2)
[At least 2 factual knowledge triples to ground reasoning]

Interpretation:  
[1-2 sentence natural language interpretation of reasoning process using provided knowledge]

Reliability Assessment:
- Factuality Score: [Numeric score 0-1 rating factuality of knowledge evidence]  
- Faithfulness Score: [Numeric score 0-1 rating relation of interpretation to overall answer]

[If scores are low:]
Corrected Knowledge:
- (Corrected Subject, Relation, Object)
[Injected corrected factual knowledge from external source]

Re-interpretion:  
[Updated natural language interpretation if needed based on corrected knowledge] 

Conclusion:
[Final answer to original question]""")

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
response = await super_agent.astream_chat("Analyze what to do to recuce costs ?")

# Collect all tokens into a string
response_text = ""
async for token in response.async_response_gen():
    response_text += token

# Print the response text
print(response_text)

In [None]:
# Initial question
initial_question = "Define assets? And optimize them"

# Query the ontology with the initial question
ontology_results = ontology_engine.query(initial_question)

# Combine the initial question with the ontology results
combined_question = f"{initial_question}. {ontology_results}"

# Query the super agent with the combined question
response = await super_agent.astream_chat(combined_question)

# Collect all tokens into a string
response_text = ""
async for token in response.async_response_gen():
    response_text += token

# Print the response text
print(response_text)

In [None]:

response = vector_index.similarity_search(
    "How will RecommendationService be updated?"
)
print(response[0].page_content)

In [None]:

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

vector_qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(), chain_type="stuff", retriever=vector_index.as_retriever())

In [None]:
vector_qa.run(
    "How many open tickets there are?"
)

In [None]:

graph.query(
    "MATCH (t:Task {status:'open'}) RETURN count(*)"
)
     

In [None]:
from langchain.chains import GraphCypherQAChain

graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm = ChatOpenAI(temperature=0, model_name='gpt-4'),
    qa_llm = ChatOpenAI(temperature=0), graph=graph, verbose=True,
)

In [None]:

from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

tools = [
    Tool(
        name="Tasks",
        func=vector_qa.run,
        description="""Useful when you need to answer questions about descriptions of tasks.
        Not useful for counting the number of tasks.
        Use full question as input.
        """,
    ),
    Tool(
        name="Graph",
        func=cypher_chain.run,
        description="""Useful when you need to answer questions about microservices,
        their dependencies or assigned people. Also useful for any sort of
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    ),
]

mrkl = initialize_agent(
    tools, ChatOpenAI(temperature=0, model_name='gpt-4'), agent=AgentType.OPENAI_FUNCTIONS, verbose=True
)