In [None]:
# Author Zulqar nain

# installing Dependencis

In [36]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs

Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough


In [None]:
# Setting up the envirements

In [23]:
os.environ["OPENAI_API_KEY"] = "your api key is  here "
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "12345678"

graph = Neo4jGraph()

# Loading Raw Data

In [7]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("your_pdf.pdf")
documents = loader.load_and_split()

In [9]:
len(documents)

4

# converting Raw documents to Graph Cipher

In [13]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

In [25]:
llm = ChatOpenAI(temperature=0.0)

In [27]:
graph_tranformer=LLMGraphTransformer(llm=llm)

In [29]:
graph_doc=graph_tranformer.convert_to_graph_documents(documents)

In [71]:
graph_doc

[GraphDocument(nodes=[Node(id='Kamran Umer', type='Person'), Node(id='Retrieval Augmented Generation (Rag)', type='Technology'), Node(id='Langchain', type='Technology'), Node(id='Openai', type='Technology'), Node(id='Fastapi', type='Technology'), Node(id='Ai Engineer', type='Job title'), Node(id='Uae Based Company', type='Company'), Node(id='Generative Ai', type='Technology'), Node(id='Natural Language Processing (Nlp)', type='Technology'), Node(id='Sinusitis', type='Medical condition'), Node(id='Pns X-Ray Images', type='Medical imaging'), Node(id='Machine Learning', type='Skill'), Node(id='Deep Learning', type='Skill'), Node(id='Computer Vision', type='Skill'), Node(id='Supervised & Unsupervised Learning', type='Skill'), Node(id='Rag Applications', type='Technology'), Node(id='Pytorch', type='Technology'), Node(id='Opencv', type='Technology'), Node(id='Numpy', type='Technology'), Node(id='Matplotlib', type='Technology'), Node(id='Keras', type='Technology'), Node(id='Pandas', type='Tec

# Adding grpah Cipher to Graph Database  (neo4j)

In [30]:
graph.add_graph_documents(
    graph_doc,
    baseEntityLabel=True,
    include_source=True
)

# Displaying Graph of your data 

In [53]:
# directly show the graph resulting from the given Cypher query
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

def showGraph(cypher: str = default_cypher):
    # create a neo4j session to run queries
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    #display(widget)
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

# creating embedding of Graph rag

In [55]:
from langchain_openai import OpenAIEmbeddings

In [57]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

# Devolping a custom Chain for Graph Rag

In [59]:
# Retriever

graph.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

In [None]:
entity_chain.invoke({"question": "who is zulqarnian"})

In [67]:
def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [None]:
print(structured_retriever("Who is zulqarnain"))