# Using GraphRAG for unstructured data

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
from langchain.graphs import Neo4jGraph
graph = Neo4jGraph()

In [3]:
from langchain_community.document_loaders import WikipediaLoader
from langchain_text_splitters import TokenTextSplitter

raw_documents = WikipediaLoader(query="Elizabeth I").load()
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:5])



  lis = BeautifulSoup(html).find_all('li')


In [4]:
from langchain_community.chat_models import AzureChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

key = os.getenv('OPENAI_API_KEY')
endpoint = os.getenv('OPENAI_API_ENDPOINT')
gemini_key = os.getenv('GEMINI_KEY')

gpt3 = AzureChatOpenAI(
    deployment_name ='<name>',
    openai_api_version = "<version>",
    openai_api_key = key,
    azure_endpoint = endpoint,
    temperature = 0,
    max_tokens = 2000,
    request_timeout = 60,
    max_retries = 3
)

gpt4 = AzureChatOpenAI(
    deployment_name ='<name>',
    openai_api_version = "<version>",
     openai_api_key = key,
    azure_endpoint = endpoint,
    temperature = 0,
    max_tokens = 2000,
    request_timeout = 60,
    max_retries = 3
)

gemini = ChatGoogleGenerativeAI(
    model="gemini-pro",
    convert_system_message_to_human=True,
    google_api_key = gemini_key,
    max_retries=1,
    temperature=0,
    max_output_tokens=2048
)

  from .autonotebook import tqdm as notebook_tqdm
  warn_deprecated(


In [5]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
llm_transformer = LLMGraphTransformer(llm=gpt4)

In [6]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [7]:
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Mary', type='Person'), Node(id='Baron Burghley', type='Title'), Node(id='English Protestant church', type='Organization'), Node(id='House of Tudor', type='Family'), Node(id='Sir Francis Walsingham', type='Person'), Node(id='Edward VI', type='Person'), Node(id='William Cecil', type='Person'), Node(id='Mary, Queen of Scots', type='Person'), Node(id='Elizabeth I', type='Person'), Node(id='Henry VIII', type='Person'), Node(id='James VI of Scotland', type='Person'), Node(id='Anne Boleyn', type='Person'), Node(id='England and Ireland', type='Country')]
Relationships:[Relationship(source=Node(id='Elizabeth I', type='Person'), target=Node(id='England and Ireland', type='Country'), type='RULES'), Relationship(source=Node(id='Elizabeth I', type='Person'), target=Node(id='House of Tudor', type='Family'), type='MEMBER_OF'), Relationship(source=Node(id='Elizabeth I', type='Person'), target=Node(id='Henry VIII', type='Person'), type='CHILD_OF'), Relationship(source=Node(id='Elizabeth

In [8]:
url = os.getenv('NEO4J_URI')
username = os.getenv('NEO4J_USERNAME')
password = os.getenv('NEO4J_PASSWORD')

graph = Neo4jGraph(
    url=url, 
    username=username, 
    password=password
)

graph.add_graph_documents(
  graph_documents, 
  baseEntityLabel=True, 
  include_source=True
)

Failed to write data to connection ResolvedIPv4Address(('34.126.171.25', 7687)) (ResolvedIPv4Address(('34.126.171.25', 7687)))
Failed to write data to connection IPv4Address(('62343150.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.126.171.25', 7687)))


In [10]:
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")

vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [11]:
response = vector_index.similarity_search(
    "Where was Elizabeth born?"
)

In [12]:
print(response[0].page_content)


text: Elizabeth I (7 September 1533 – 24 March 1603) was Queen of England and Ireland from 17 November 1558 until her death in 1603. She was the last monarch of the House of Tudor.
Elizabeth was the only surviving child of Henry VIII and Anne Boleyn, his second wife, who was executed when Elizabeth was two years old. Anne's marriage to Henry was annulled, and Elizabeth was declared illegitimate. Henry restored her to the line of succession when she was 10, via the Third Succession Act 1543. After Henry's death in 1547, Elizabeth's younger half-brother Edward VI ruled until his own death in 1553, bequeathing the crown to a Protestant cousin, Lady Jane Grey, and ignoring the claims of his two half-sisters, the Catholic Mary and the younger Elizabeth, in spite of statutes to the contrary. Edward's will was set aside within weeks of his death and Mary became queen, deposing and executing Jane. During Mary's reign, Elizabeth was imprisoned for nearly a year on suspicion of supporting Prote

In [13]:
from langchain.chains import RetrievalQA

vector_qa = RetrievalQA.from_chain_type(
    llm=gpt4,
    chain_type="stuff",
    retriever=vector_index.as_retriever()
)

In [14]:
vector_qa.run(
    "Where was Elizabeth born?"
)

  warn_deprecated(


'Elizabeth I was born at Greenwich Palace in England on September 7, 1533.'

In [15]:
vector_qa.run(
    "Who all were in line of sucession to Henry VIII"
)

"According to Henry VIII's will, the line of succession was as follows:\n\n1. Edward VI (his son)\n2. Mary I (his daughter from his first marriage to Catherine of Aragon)\n3. Elizabeth I (his daughter from his second marriage to Anne Boleyn)\n4. Jane Grey (his great-niece, granddaughter of his younger sister Mary Tudor)\n5. Katherine Grey (Jane Grey's sister)\n6. Mary Grey (Jane Grey's sister)\n7. Margaret Clifford (granddaughter of his elder sister Margaret Tudor)\n\nThese individuals were named as the line of succession at the time of Henry VIII's death in 1547."

In [16]:
vector_qa.run(
    "Who many children did Mary Tudor have?"
)

'Mary Tudor, the younger sister of Henry VIII, had two surviving daughters with her second husband, Charles Brandon, 1st Duke of Suffolk. Their names were Frances Grey, Duchess of Suffolk, and Eleanor Clifford, Countess of Cumberland.'

In [18]:
vector_qa.run(
    "What title was given to Serjeant Painter?"
)

'The title given to Serjeant Painter was "Serjeant Painter." This was an official position held by an artist who was responsible for approving all portraits of the queen created by other artists. In the context provided, George Gower was appointed as Serjeant Painter in 1581 during Queen Elizabeth I\'s reign.'

Devops



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `Which tasks have optimization in their description?`


[0m[36;1m[1;3mThere are four tasks with optimization in their description:

1. Task name: Optimize
   Description: Optimize PaymentService
   Status: Open

2. Task name: Optimize
   Description: Optimize PaymentService
   Status: Open

3. Task name: Optimize
   Description: Optimize AuthService
   Status: Open

4. Task name: Optimize
   Description: Optimize AuthService
   Status: Open[0m[32;1m[1;3mThere are four tasks with optimization in their description:

1. Task name: Optimize
   Description: Optimize PaymentService
   Status: Open

2. Task name: Optimize
   Description: Optimize PaymentService
   Status: Open

3. Task name: Optimize
   Description: Optimize AuthService
   Status: Open

4. Task name: Optimize
   Description: Optimize AuthService
   Status: Open[0m

[1m> Finished chain.[0m
There are four tasks with optimization in the