In [18]:
import neo4j
from neo4j_graphrag.llm import AzureOpenAILLM  
from neo4j_graphrag.embeddings.openai import AzureOpenAIEmbeddings 
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.retrievers import VectorRetriever
from neo4j_graphrag.generation.graphrag import GraphRAG 
from dotenv import load_dotenv
import os 


load_dotenv()

True

In [19]:
# connect to neo4j database 
neo4j_driver=neo4j.GraphDatabase.driver(os.getenv('NEO4J_URI_ONLINE'),auth=(os.getenv('NEO4J_USERNAME_ONLINE'),os.getenv('NEO4J_PASSWORD_ONLINE')))

In [20]:
llm=AzureOpenAILLM(
    model_name=os.getenv('AZURE_OPENAI_DEPLOYMENT_MODEL'),
    azure_endpoint=os.getenv('AZURE_OpenAI_ENDPOINT'),
    api_version=os.getenv('AZURE_OpenAI_API_VERSION'),
    api_key=os.getenv('GRAPHRAG_API_KEY'),
    
)
llm.invoke('say something sarcastic')

LLMResponse(content="Oh sure, I'd love to help you procrastinate even more! Why get work done now when you can just put it off until later, right?")

# setup LLM

In [21]:
# response in json format
response=llm.client.chat.completions.create(
    model='gpt-4',
    # response_format={'type':'json_object'},
    messages=[
        {'role':'system','content':'you are sarcastic batman desgined to gove response in JSON '},
        {'role':'user','content':'who is joker'}

    ],
)
print(response.choices[0].message.content)

{
  "response": "Oh, just a guy who enjoys wearing clown makeup and creating absolute chaos in Gotham. You know, your typical neighbor next door!",
  "description": "The Joker is one of Batman's most notorious adversaries. He is known for his chaotic and nihilistic behavior, often concocting elaborate schemes to challenge Batman and undermine the order in Gotham City."
}


In [22]:
embeddings=AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv('AZURE_OPENAI_EMBEDDINGS_ENDPOINT_2'),
    api_key=os.getenv('AZURE_OPENAI_API_KEY_2'),
    api_version=os.getenv('AZURE_OpenAI_API_VERSION_2')
)

In [23]:
# Define basic node labels
basic_node_labels = ["Person", "Company", "Startup", "Investor", "VentureCapitalFirm", "FundingRound"]

# Additional business-related node labels
business_node_labels = ["Acquisition", "Merger", "Product", "Service", "Market", "Industry", "Technology"]

# Academic-related node labels (if applicable to research)
academic_node_labels = ["Publication", "Patent", "Conference", "ResearchPaper"]

# Funding and financial-related node labels
finance_node_labels = ["IPO", "Equity", "Grant", "DebtFinancing", "AngelInvestment"]

# Combine all node labels
node_labels = basic_node_labels + business_node_labels + academic_node_labels + finance_node_labels

# Define relationship types
rel_types = [
    "FOUNDED", "INVESTED_IN", "ACQUIRED", "MERGED_WITH", "PARTNERED_WITH", 
    "EMPLOYED_AT", "ADVISOR_TO", "BOARD_MEMBER_OF", "OWNS", "FUNDED",
    "LICENSED", "HOLDS_PATENT", "PUBLISHED", "COFOUNDED", "WORKS_WITH",
    "SPONSORED", "ACCELERATED_BY", "INCUBATED_BY", "RAISED_FUNDS_IN",
    "PRODUCT_LAUNCHED", "MARKETED_BY", "PIVOTED_TO", "DEVELOPED"
]

In [24]:
# examples={
#     "entities": [
#         {
#             "label": "Elon Musk",
#             "id": "Elon Musk",
#             "description": "Visionary entrepreneur and business magnate"
#         },
#         {
#             "label": "NASA",
#             "id": "NASA",
#             "description": "National Aeronautics and Space Administration"
#         },
#         {
#             "label": "SpaceX",
#             "id": "SpaceX",
#             "description": "Founder and CEO of SpaceX"
#         },
#         {
#             "label": "OrbitMission",
#             "id": "OrbitMission",
#             "description": "First private company to send humans to orbit, 2020"
#         }
        
#     ],
#     "Relationships":[
#                 {
#                     "label": "Founded",
#                     "type": "Founded",
#                     "start_node_id": "person1",
#                     "end_node_id": "organization1",
#                     "properties":{"details":"Elon Musk founded Spacex a spaceflight services company, in 2002"} 
#                 },
#                 {
#                     "label": "Collaborated with",
#                     "type": "Collaborated with",
#                     "start_node_id": "person1",
#                     "end_node_id": "organization3",
#                     "properties":{"details":"Elon Musk collaborated with NASA for the Crew Dragon mission in 2020"} 

#                 }
    
#     ]
# }

In [25]:
prompt_template = '''
You are a network builder tasked with extracting information from business-related documents 
and structuring it in a property graph to inform further networking and investment analysis.

Extract the entities (nodes) and specify their type from the following input text.
Also, extract the relationships between these nodes. The relationship direction goes from the start node to the end node.

Return the result as JSON using the following format:
{{
  "nodes": [ 
    {{"id": "entity_name", "label": "type_of_entity", "properties": {{"name": "name_of_entity"}} }}
  ],
  "relationships": [
    {{"type": "TYPE_OF_RELATIONSHIP", "start_node_id": "entity_name_1", "end_node_id": "entity_name_2", 
      "properties": {{"details": "Description of the relationship"}} 
    }} 
  ] 
}}

Guidelines:
- Use only the information from the input text. Do not add any additional information.  
- If the input text is empty, return an empty JSON. 
- Extract as many nodes and relationships as needed to offer a rich entrepreneurial and business context for further networking and investment analysis.
- The property graph should enable an AI knowledge assistant to understand the business context and assist in investment decisions, startup connections, and entrepreneurial insights.
- Multiple documents will be ingested from different sources, and we are using this property graph to connect information. Ensure entity types remain general and widely applicable.

Use only the following nodes and relationships (if provided):
{schema}

Assign a unique ID (string) to each node and reuse it to define relationships.
Ensure that relationships respect the source and target node types and follow the correct direction.

Return **only** the JSON in the specified format—no additional information.

Examples:
{examples}

Input text:

{text}
'''


# KG builder pipeline

In [26]:
# 1. Build KG and Store in Neo4j Database 
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import FixedSizeSplitter
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline

kg_builder_pdf=SimpleKGPipeline(
    llm=llm,
    driver=neo4j_driver,
    # text_splitter=FixedSizeSplitter(chunk_size=1000,chunk_overlap=250),
    embedder=embeddings,
    entities=node_labels,
    relations=rel_types,
    prompt_template=prompt_template,
    from_pdf=False,
)


In [27]:
import os 

In [28]:
os.chdir('../')
%pwd 

'd:\\pythonProjects\\KAG_Testing'

In [29]:
path='input'
files=os.listdir(path)
text_data=''
# print(files)
for file in files[:2]:
    with open(os.path.join('input',file),'r') as f:
        text_data+=f.read()
print(type(text_data))


<class 'str'>


In [30]:
# # path to files 
# import time 
# try:
#     print(f'Processing: {path}')
#     pdf_results=await kg_builder_pdf.run_async(text=text_data)
#     print(f'Result: {pdf_results}')
# except Exception as e:
#     print(e)
#     errordata=e.args[0] 
#     if 'rate limit' in errordata:
#         time.sleep(60)

    

In [31]:
# print(pdf_results)

# retrieve data from knowledge graph
                

# Vector retriever

ANN (Approximate nearest neighbor)

In [32]:
from neo4j_graphrag.indexes import create_vector_index

create_vector_index(neo4j_driver, name='text_embeddings',label='Chunk', embedding_property='embedding',dimensions=1536, similarity_fn='cosine')



In [33]:
from neo4j_graphrag.retrievers import VectorRetriever

vector_retriever=VectorRetriever(
    neo4j_driver,
    index_name='text_embeddings',
    embedder=embeddings,
    return_properties=['text']
)

In [34]:
# run the retriever 
import json 
from neo4j_graphrag.generation.graphrag import GraphRAG

rag=GraphRAG(llm=llm,retriever=vector_retriever)

# vector_res=vector_retriever.get_search_results(query_text='give me details about the cat', top_k=3)

# for i in vector_res.records:
#     print(i)

response=rag.search('who is connected to the Amazon?')
print(response.answer)

Elon Musk is not connected to Amazon; however, Jeff Bezos, who is the founder of Amazon, is often mentioned in discussions alongside Musk due to their involvement in space exploration through Bezos's company Blue Origin and Musk's company SpaceX.


In [35]:
import json 

vector_res=vector_retriever.get_search_results(query_text='how is elon related to Jeff bezos', top_k=3)

for i in vector_res.records:
    print("====\n" + json.dumps(i.data()['node']['text'], indent=4))

====
"AICofounder of Neuralink OpenAI Zip2 and Xcom part of PayPalPresident of the Musk FoundationSpousesJustine Wilson m2000 div2008Talulah Riley m2010 div2012 m2013 div2016Children121ParentsErrol Musk fatherMaye Musk motherRelativesKimbal Musk brotherTosca Musk sisterLyndon Rive cousinAwardsFull listElon Musks voiceElon Musk speaking about India and his meeting with its prime minister Narendra Modi Recorded June 20 2023SignatureThis article is part of a series aboutElon MuskPersonalAwards and honorsViewsFamilyFilmographyLegalarticle is part of a series aboutElon MuskPersonalAwards and honorsViewsFamilyFilmographyLegal affairsWealthFoundationPolitical activitiesCompaniesZip2XcomPayPalSpaceXStarlinkTesla IncEnergycriticismlitigationOpenAINeuralinkThe Boring CompanyThudX CorpTwitter under Elon MuskTwitter IncacquisitionxAIPoliticsAmerica PACRBG PACDepartment of Government EfficiencyIn popular culture Elon Musk IsaacsonElon Musk VanceLudicrousPower PlayMembers OnlyThe Platonic Permutatio

In [36]:
from neo4j_graphrag.retrievers import VectorCypherRetriever

vc_retriever = VectorCypherRetriever(
   neo4j_driver,
   index_name="text_embeddings",
   embedder=embeddings,
   retrieval_query="""
//1) Go out 2-3 hops in the entity graph and get relationships
WITH node AS chunk
MATCH (chunk)<-[:FROM_CHUNK]-()-[relList:!FROM_CHUNK]-{1,3}()
UNWIND relList AS rel

//2) collect relationships and text chunks
WITH collect(DISTINCT chunk) AS chunks,
 collect(DISTINCT rel) AS rels

//3) format and return context
RETURN '=== text ===\n' + apoc.text.join([c in chunks | c.text], '\n---\n') + '\n\n=== kg_rels ===\n' +
 apoc.text.join([r in rels | startNode(r).name + ' - ' + type(r) + '(' + coalesce(r.details, '') + ')' +  ' -> ' + endNode(r).name ], '\n---\n') AS info
"""
)

In [37]:

from neo4j_graphrag.generation import RagTemplate


rag_template = RagTemplate(template='''Answer the Question using the following Context. Only respond with information mentioned in the Context. Do not inject any speculative information not mentioned.

# Question:
{query_text}

# Context:
{context}

# Answer:
''', expected_inputs=['query_text', 'context'])

v_rag  = GraphRAG(llm=llm, retriever=vector_retriever, prompt_template=rag_template)
vc_rag = GraphRAG(llm=llm, retriever=vc_retriever, prompt_template=rag_template)

In [38]:
q='what is BBC news and how is it connected in the given network?'
print(f"Vector response: {v_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: BBC News is a media entity where Bill Gates, the subject of the provided context, was a guest on the BBC Radio 4's program "Desert Island Discs" on January 31, 2016. During the program, Gates discussed his relationships, the start of Microsoft, and his personal habits, among other topics. Additionally, Bill Gates was the interviewee in an episode of the "Amol Rajan Interviews" series on BBC Two.


In [55]:
q='how is amazon related to Jeff Bezos?'
print(f"Vector response: {vc_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: Amazon founder Jeff Bezos surpassed Bill Gates as the richest person in the world in October 2017, according to the context provided.


In [56]:
q='how is amazon related to Jeff Bezos? '
print(f"Vector response: {v_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: Jeff Bezos is related to Amazon as its CEO and has been recognized as the richest person in the world again, as mentioned by Forbes.


In [41]:
q='how elon related to Valdimir Putin?'
print(f"Vector response: {v_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: Elon Musk is related to Vladimir Putin in the context of reported communications and interactions concerning geopolitical issues. It was reported that Musk allegedly spoke with Putin before proposing a peace plan during the Russian invasion of Ukraine, a claim which Musk denied. Additionally, The Wall Street Journal and other sources reported that Musk had been in contact with Putin and other high-ranking Russian officials discussing personal, business, and geopolitical matters since late 2022. However, these reports have been partially denied by Kremlin stating that Musk and Putin had spoken only once.


In [42]:
q='how elon related to Valdimir Putin?'
print(f"Vector response: {vc_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: Elon Musk is related to Vladimir Putin through various interactions and communications. In October 2022, Musk posted a Twitter poll and peace plan to resolve the Russian invasion of Ukraine, suggesting Crimea remain with Russia and Ukraine remain neutral. This proposal followed alleged discussions with Putin, which Musk denied. Reports later emerged that Musk had been in regular contact with Putin and other high-ranking Russian officials since late 2022, discussing personal, business, and geopolitical matters. However, these communications were closely held secrets within the government, given Musk's influence and technological involvement.


In [43]:
q='how is CNBC related in this network? give a detailed response'
print(f"Vector response: {vc_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: The provided context does not mention CNBC or its relationship to any network. There is no information available in the provided context to answer the question about how CNBC is related in a network.


In [45]:
q='how is CNBC play a role in the given context'
print(f"Vector response: {vc_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: CNBC reported on Elon Musk's endorsement of a carbon tax and his downplaying of concerns about methane, contributing to the public discourse on environmental policies and Musk’s views on these issues.


In [47]:
q='who is sam altman'
print(f"Vector response: {vc_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: Sam Altman is mentioned as a person related to artificial intelligence concepts and ethics, involved in organizations like OpenAI, which focuses on AI research.


In [48]:
q='what is musk foundation'
print(f"Vector response: {vc_rag.search(q,retriever_config={'top_k':10}).answer}")

Vector response: The Musk Foundation, founded in 2001 by Elon Musk, is focused on renewable energy and supporting various charitable causes. The foundation's stated purpose includes providing solar-power energy systems in disaster areas, supporting research, development, and advocacy for interests such as human space exploration, pediatrics, renewable energy, safe artificial intelligence, and science and engineering educational efforts. Notable actions by the foundation include making 350 donations, many to scientific research or education nonprofits, including significant contributions to the Wikimedia Foundation, the University of Pennsylvania, and Musk's brother Kimbal’s nonprofit, Big Green.
