## Create Graph using text

In [1]:
'''
!pip install langchain-community neo4j wikipedia
!pip install langchain==0.2.5
!pip install langchain-experimental==0.0.61
!pip install langchain-openai==0.1.10
#!pip install dspy-ai
'''

'\n!pip install langchain-community neo4j wikipedia\n!pip install langchain==0.2.5\n!pip install langchain-experimental==0.0.61\n!pip install langchain-openai==0.1.10\n#!pip install dspy-ai\n'

In [2]:
import os
import openai
from langchain_community.document_loaders import WikipediaLoader

from langchain_community.graphs import Neo4jGraph
from langchain_experimental.graph_transformers.llm import LLMGraphTransformer
from langchain_openai import AzureChatOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery

#OpenAI
#os.environ["OPENAI_API_KEY"] = ""
azure_oai_key=os.getenv("AZURE_OPENAI_API_KEY")
azure_oai_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
azure_oai_version=os.getenv("AZURE_OPENAI_API_VERSION")
azure_oai_deployment=os.getenv("AZURE_OPENAI_MODEL")
azure_embedding=os.getenv("EMBEDDING_MODEL_NAME")

#Search 
azure_search_key=os.getenv("AZURE_SEARCH_API_KEY")
azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT")
azure_search_index=os.getenv("AZURE_SEARCH_INDEX")

#Neo4j
neo4j_uri=os.getenv("NEO4J_URI")
neo4j_database=os.getenv("NEO4J_DATABASE")
neo4j_username=os.getenv("NEO4J_USERNAME")
neo4j_password=os.getenv("NEO4J_PASSWORD")

In [3]:
# the database name to connect to
graph = Neo4jGraph(url=neo4j_uri, username=neo4j_username, password=neo4j_password, database=neo4j_database)
llm = AzureChatOpenAI(azure_endpoint=azure_oai_endpoint, openai_api_version=azure_oai_version, openai_api_key=azure_oai_key,
    azure_deployment=azure_oai_deployment)
llm_transformer = LLMGraphTransformer(llm=llm)

### Wiki to Graph

In [4]:
# Set the timeout value to a higher value, such as 10 seconds
source = "Elon Musk"
raw_documents = WikipediaLoader(query=source).load()
print(raw_documents)



  lis = BeautifulSoup(html).find_all('li')


[Document(page_content='Elon Reeve Musk (; born June 28, 1971) is a businessman and investor known for his key roles in space company SpaceX and automotive company Tesla, Inc. Other involvements include ownership of X Corp., formerly Twitter, and his role in the founding of The Boring Company, xAI, Neuralink and OpenAI. He is one of the wealthiest people in the world; as of July 2024, Forbes estimates his net worth to be US$221 billion.\nMusk was born in Pretoria to Maye and Errol Musk and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen\'s University at Kingston in Canada. Musk later transferred to the University of Pennsylvania and received bachelor\'s degrees in economics and physics. He moved to California in 1995 to attend Stanford University, but dropped out after two days and, with his brother Kimbal, co-founded online city guide software company Z

### Text to Graph example

In [5]:
# DiffbotGraphTransformer calls Diffbot Natural Language API to extract entities and relationships in the article
from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer

diffbot_key = os.getenv("DIFFBOT_KEY") 

diffbot_nlp = DiffbotGraphTransformer(diffbot_api_key=diffbot_key)

# Diffbot's Natural Language API converts unstructured text data into knowlegde graphs
graph_documents = diffbot_nlp.convert_to_graph_documents(raw_documents)

# add knowledge graph data to the neo4j database
graph.add_graph_documents(graph_documents)

In [6]:
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='http://www.wikidata.org/entity/Q6409751', type='Person', properties={'name': 'Kimbal Musk', 'academicDegree': 'bachelor', 'positionHeld': 'chairman', 'age': '18'}), Node(id='http://www.wikidata.org/entity/Q120599684', type='Organization', properties={'name': 'x.ai'}), Node(id='Tosca', type='Person', properties={'name': 'Tosca'}), Node(id='http://www.wikidata.org/entity/Q317521', type='Person', properties={'name': 'Elon Musk', 'dateOfBirth': '1971-06-28', 'positionHeld': 'businessman'}), Node(id='http://www.wikidata.org/entity/Q28874479', type='Organization', properties={'name': 'TBC - THE BORING COMPANY'}), Node(id='http://www.wikidata.org/entity/Q2460103', type='Skill', properties={'name': 'tunnel construction'}), Node(id='http://www.wikidata.org/entity/Q29043471', type='Organization', properties={'name': 'Neuralink'}), Node(id='http://www.wikidata.org/entity/Q24007468', type='Person', properties={'name': 'Maye Musk', 'positionHeld': 'model'}), Node(id='http://www.wiki

## Query Graph

In [7]:
graph.schema

'Node properties:\nAirport {longest: INTEGER, altitude: INTEGER, pagerank: FLOAT, descr: STRING, runways: INTEGER, id: STRING, icao: STRING, city: STRING, location: POINT, iata: STRING}\nCity {name: STRING}\nRegion {name: STRING}\nCountry {code: STRING}\nContinent {name: STRING}\nPerson {name: STRING, id: STRING, dateOfBirth: STRING, positionHeld: STRING, age: STRING, academicDegree: STRING, numberOfChildren: STRING}\nOrganization {name: STRING, foundingDate: STRING, id: STRING, productType: STRING}\nSkill {id: STRING, name: STRING}\nLocation {name: STRING, id: STRING}\nAward {name: STRING, id: STRING}\nRelationship properties:\nHAS_ROUTE {distance: INTEGER}\nEMPLOYEE_OR_MEMBER_OF {evidence: STRING, isCurrent: STRING, isNotCurrent: STRING, startTime: STRING, positionHeld: STRING, endTime: STRING}\nINDUSTRY {evidence: STRING}\nFOUNDED_BY {evidence: STRING}\nPLACE_OF_BIRTH {evidence: STRING}\nPERSON_LOCATION {evidence: STRING, startTime: STRING, isNotCurrent: STRING, isCurrent: STRING}\n

In [8]:
# Init LLM Search in Graph
from langchain.chains import GraphCypherQAChain

initial_context_from_knowledge_graph = GraphCypherQAChain.from_llm(
    cypher_llm=llm, 
    qa_llm=llm, 
    graph=graph,
    validate_cypher=True, 
    verbose=True
)

In [9]:
#query = "Who are the companies that Elon Musk co-founded and their subsidieries?"
#query = "how many companies Elon Musk founded including names?"
query = "Could you provide a list of subsidiaries for the companies founded by Elon Musk?"

In [10]:
# Knowledge Graph Instructions and Query
rules = """
# Knowledge Graph Instructions:
## 1. Overview
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
## 2. Labeling Nodes
- **Consistency**: Ensure you use basic or elementary types for node labels.
  - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
## 3. Handling Numerical Data and Dates
- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
## 4. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency, and not duplicate entities.
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"), 
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.  
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial. 
"""
question_elon = f"{query}, {rules}"
print(query)
kg_context = initial_context_from_knowledge_graph.run(question_elon)
kg_context


Could you provide a list of subsidiaries for the companies founded by Elon Musk?


[1m> Entering new GraphCypherQAChain chain...[0m


  warn_deprecated(


Generated Cypher:
[32;1m[1;3mMATCH (p:Person)<-[:FOUNDED_BY]-(o:Organization)-[:SUBSIDIARY]->(s:Organization)
WHERE p.name = 'Elon Musk'
RETURN o.name AS Company, collect(s.name) AS Subsidiaries[0m
Full Context:
[32;1m[1;3m[{'Company': 'SpaceX', 'Subsidiaries': ['TBC - THE BORING COMPANY']}][0m

[1m> Finished chain.[0m


'SpaceX, a company founded by Elon Musk, has a subsidiary named TBC - The Boring Company.'

## Azure Vecotr Search

In [11]:
service_endpoint = os.getenv("AZURE_SEARCH_ENDPOINT")
index_name =os.getenv("AZURE_SEARCH_INDEX")
key = os.getenv("AZURE_SEARCH_API_KEY")#azure_search_key

def get_embeddings(text: str):
    # There are a few ways to get embeddings. This is just one example.
   

    open_ai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    open_ai_key = os.getenv("AZURE_OPENAI_API_KEY")

    client = openai.AzureOpenAI(
        azure_endpoint=open_ai_endpoint,
        api_key=open_ai_key,
        api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    )
    embedding = client.embeddings.create(input=[text], model=azure_embedding)
    return embedding.data[0].embedding


## Vector Search

In [12]:
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))
vector_query = VectorizedQuery(vector=get_embeddings(query), k_nearest_neighbors=3, fields="text_vector")


In [13]:
results = search_client.search(
    vector_queries=[vector_query],
    select=["chunk_id","chunk"]
)

result_array = []
for result in results:
    print(result)
    result_array.append(result)
# [END single_vector_search]

{'chunk': "[Document(page_content='Elon Reeve Musk (; born June 28, 1971) is a businessman and investor known for his key roles in space company SpaceX and automotive company Tesla, Inc. Other involvements include ownership of X Corp., formerly Twitter, and his role in the founding of The Boring Company, xAI, Neuralink and OpenAI. He is one of the wealthiest people in the world; as of June 2024, Forbes estimates his net worth to be US$214 billion.\\nMusk was born in Pretoria to Maye and Errol Musk and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen\\'s University at Kingston in Canada. Musk later transferred to the University of Pennsylvania and received bachelor\\'s degrees in economics and physics. He moved to California in 1995 to attend Stanford University, but dropped out after two days and, with his brother Kimbal, co-founded online city guide soft

## Multi Search

In [14]:
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))
vector_query = VectorizedQuery(vector=get_embeddings(query), k_nearest_neighbors=3, fields="text_vector")

results = search_client.search(
    search_text=query,
    vector_queries=[vector_query],
    select=["chunk_id","chunk"],
)

for result in results:
    print(result)
# [END simple_hybrid_search]

{'chunk': "[Document(page_content='Elon Reeve Musk (; born June 28, 1971) is a businessman and investor known for his key roles in space company SpaceX and automotive company Tesla, Inc. Other involvements include ownership of X Corp., formerly Twitter, and his role in the founding of The Boring Company, xAI, Neuralink and OpenAI. He is one of the wealthiest people in the world; as of June 2024, Forbes estimates his net worth to be US$214 billion.\\nMusk was born in Pretoria to Maye and Errol Musk and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen\\'s University at Kingston in Canada. Musk later transferred to the University of Pennsylvania and received bachelor\\'s degrees in economics and physics. He moved to California in 1995 to attend Stanford University, but dropped out after two days and, with his brother Kimbal, co-founded online city guide soft

## LLM Answer (No data sources)

In [15]:
from langchain_core.messages import HumanMessage, SystemMessage
chat = llm

# Vector Search Answer
messages = [
    SystemMessage(
        content=f"You are a helpful assistant who generates information to questions. Please answer the question"
    ),
    HumanMessage(
        content= f"{query}"
    ),
    HumanMessage(
        content= f"write a short answer based on the information above, provide a short answer, not nore than 1 line that must answer the question."
    )
    
]
LLM_ans_colleague = chat.invoke(messages)
print(LLM_ans_colleague.content)

The information about subsidiaries for companies founded by Elon Musk was not provided above.


## LLM + RAG answer

In [16]:
chat = llm

# Vector Search Answer
messages = [
    SystemMessage(
        content=f"You are a helpful assistant who generates information grounded with facts. Please enhance Vector search answer and generate the final answer. the question was: {query}"
    ),
    HumanMessage(
        content= f"Search Results: {result_array}"
    ),
    HumanMessage(
        content= f"write a short answer based on the information above, provide a short answer, not nore than 1 line that must answer the question."
    )
    
]
vector_ans_colleague = chat.invoke(messages)
print(vector_ans_colleague.content)

Elon Musk has founded or co-founded several companies, including SpaceX, Tesla, Inc., Neuralink, The Boring Company, OpenAI, and xAI, with subsidiaries such as Tesla Energy (formerly SolarCity).


# Boom!

In [17]:
chat = llm

# Full Answer Graph and Vector Search
messages = [
    SystemMessage(
        content=f"You are a helpful assistant who generates information grounded with facts. Please enhance the original answer with complementary entity and relationship information from the knowledge graph to generate the final answer. the question was: {query}"
    ),
    HumanMessage(
        content= f"Graph Results: {kg_context}"
    ),
    HumanMessage(
        content= f"Search Results: {result_array}"
    ),
    HumanMessage(
        content= f"write a short answer based on the information above, provide a short answer, not nore than 1 line that must answer the question."
    )
    
]
final_ans_colleague = chat.invoke(messages)
print(final_ans_colleague.content)

SpaceX, founded by Elon Musk, has a subsidiary named TBC - The Boring Company.
