In [1]:
import neo4j
import os

In [2]:
NEO4J_URI = "bolt://0.0.0.0:7687"
NEO4J_USERNAME = input("Your Neo4j username.")
NEO4J_PASSWORD = input("Your Neo4j password.")

In [3]:
ROOT_DIR = "/Users/m.mohammed/Downloads"

In [7]:
os.environ["OPENAI_API_KEY"] = input("Your OPENAI_API_KEY")

In [6]:
os.environ["MISTRAL_API_KEY"] = input("your MISTRAL_API_KEY")

In [8]:
neo4j_driver = neo4j.GraphDatabase.driver(NEO4J_URI,
                                         auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

In [9]:
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.llm import MistralAILLM

class Models:
    OPEN_AI = "OPEN_AI"
    MISTRAL_AI = "MISTRAL_AI"

def get_llm(type: str = Models.MISTRAL_AI):
    llm = None
    if type == Models.MISTRAL_AI:
        llm = MistralAILLM(
            # mistral-large-latest
            model_name="mistral-large-latest",
        )
    else:
        llm = OpenAILLM(
            model_name="gpt-4o",
            model_params={
                "response_format": {"type": "json_object"}, # use json_object formatting for best results
                "temperature": 0 # turning temperature down for more deterministic results
            }
    )
    return llm   

In [10]:
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.embeddings.mistral import MistralAIEmbeddings


def get_embedder(type: str = Models.MISTRAL_AI):
    embedder = None
    if type == Models.MISTRAL_AI:
        embedder = MistralAIEmbeddings()
    else:
        embedder = OpenAIEmbeddings()
    return embedder   

In [36]:
#create text embedder
embedder = get_embedder(type=Models.OPEN_AI)

In [37]:
# Basic node labels used in your current domain (politics, etc.)
basic_node_labels = [
    "Person",
    "Position",
    "PoliticalParty",
    "Direction",
    "Value",
    "Goal",
    "Consequence"
]

node_labels = basic_node_labels

# define relationship types
rel_types = ["belongs_to", "had_role", "wanted_role", "has_direction", "has_value",
   "has_influence_to", "introduces_consequences"]

In [38]:
prompt_template = '''
You are a policical, financial, societal, geopolictical researcher tasks with extracting information from papers 
and structuring it in a property graph to inform further political, societal, geopolicitcal, financial and research Q&A.

Extract the entities (nodes) and specify their type from the following Input text.
Also extract the relationships between these nodes. the relationship direction goes from the start node to the end node. 


Return result as JSON using the following format:
{{"nodes": [ {{"id": "0", "label": "the type of entity", "properties": {{"name": "name of entity" }} }}],
  "relationships": [{{"type": "TYPE_OF_RELATIONSHIP", "start_node_id": "0", "end_node_id": "1", "properties": {{"details": "Description of the relationship"}} }}] }}

- Use only the information from the Input text. Do not add any additional information.  
- If the input text is empty, return empty Json. 
- Make sure to create as many nodes and relationships as needed to offer rich medical context for further research.
- An AI knowledge assistant must be able to read this graph and immediately understand the context to inform detailed research questions. 
- Multiple documents will be ingested from different sources and we are using this property graph to connect information, so make sure entity types are fairly general. 

Use only fhe following nodes and relationships (if provided):
{schema}

Assign a unique ID (string) to each node, and reuse it to define relationships.
Do respect the source and target node types for relationship and
the relationship direction.

Do not return any additional information other than the JSON in it.

Examples:
{examples}

Input text:

{text}
'''


In [39]:
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import FixedSizeSplitter
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline

kg_builder_pdf = SimpleKGPipeline(
   llm=get_llm(type=Models.OPEN_AI),
   driver=neo4j_driver,
   text_splitter=FixedSizeSplitter(chunk_size=500, chunk_overlap=100),
   embedder=embedder,
   entities=node_labels,
   relations=rel_types,
   prompt_template=prompt_template,
   from_pdf=True
)

In [40]:
pdf_file_paths = ['graph-dag-1.pdf']

for path in pdf_file_paths:
    print(f"Processing : {path}")
    pdf_result = await kg_builder_pdf.run_async(file_path=os.path.join(ROOT_DIR, "finai", "graph-rag", "data", path))
    print(f"Result: {pdf_result}")

Processing : graph-dag-1.pdf
Result: run_id='d5d5c907-5e91-4694-928c-20ea29e1bffb' result={'resolver': {'number_of_nodes_to_resolve': 243, 'number_of_created_nodes': 215}}


In [41]:
from neo4j_graphrag.indexes import create_vector_index

create_vector_index(neo4j_driver, name="text_embeddings", label="Chunk",
                   embedding_property="embedding", dimensions=1536, similarity_fn="cosine")

In [42]:
from neo4j_graphrag.retrievers import VectorRetriever

vector_retriever = VectorRetriever(
   neo4j_driver,
   index_name="text_embeddings",
   embedder=embedder,
   return_properties=["text"],
)

In [43]:
from neo4j_graphrag.generation import RagTemplate
from neo4j_graphrag.generation.graphrag import GraphRAG

llm = get_llm(type=Models.MISTRAL_AI)

rag_template = RagTemplate(template='''Answer the Question using the following Context. Only respond with information mentioned in the Context. Do not inject any speculative information not mentioned.

# Question:
{query_text}

# Context:
{context}

# Answer:
''', expected_inputs=['query_text', 'context'])

v_rag  = GraphRAG(llm=llm, retriever=vector_retriever, prompt_template=rag_template)
# vc_rag = GraphRAG(llm=llm, retriever=vc_retriever, prompt_template=rag_template)

In [48]:
q = "Looking at the economic outcomes of the brexit to the United Kingdom, what kind's of outcomes would we expected if Marine le Pen did managed 2017 to be elected as president and what kind of projection could we make to the french stocks at that point of time."

print(f"Vector Response: \n{v_rag.search(q, retriever_config={'top_k':5}).answer}")
print("\n===========================\n")

Vector Response: 
If Marine Le Pen had been elected as president in 2017, there would be potential barriers to trade, which could lead to rising borrowing costs due to wary bond investors demanding higher yields. This situation could also result in falling stock prices, particularly in companies reliant on European trade and foreign investment. The possibility of a "Frexit" could undermine confidence in French debt and introduce threat of volatility and market uncertainty. Additionally, such protectionist or isolationist measures could potentially lead to an economic downturn, where labor shortages and trade disruptions may cause stagnation or recession.


