In [25]:
from dotenv import load_dotenv
import os
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_ollama.llms import OllamaLLM
import asyncio
from langchain_openai import ChatOpenAI

In [30]:
# Load environment variables
load_dotenv()

NEO4J_URI = "neo4j://127.0.0.1:7687"
NEO4J_USER = os.getenv("NEO4J_USER")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

In [3]:
# Define the LLM
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.0,
)

In [4]:
graph_transformer = LLMGraphTransformer(llm=llm)

In [6]:
# Read the document
with open("documents/AA_MzA0MTYzNDYxN2FkaXF6a2N4-pages.txt") as f:
    text = f.read()
text

"VODAFONE LIMITED\nCompany No: 1471587\nFINANCIAL STATEMENTS\nFOR THE YEAR ENDED 31 MARCH 2011\nAO1BZWBG*\nMONDAY\nA05\n01/08/2011\n55\nCOMPANIES HOUSE\n\n--- End of Page 1 ---\n\nCompany No 1471587\nVODAFONE LIMITED\nREPORT OF THE DIRECTORS\nThe directors submit their annual report and audited financial statements for the year ended\n31 March 2011\nPrincipal activity and review of business\nThe principal activity of the Company continues to be the provision of telecommunications services\nthrough its mobile network\nThe key messages for the year for Vodafone's business in the UK (Vodafone UK), of which Vodafone\nLimited Is the largest and major part, are summarised below\nTurnover increased by 7 5 % (2010 decline of 7 3%) This was driven by growth in service revenue of 6 1%\nmainly due to the increase in the customer base and increased average revenue per use (ARPU) In addition,\ndata revenue also showed strong growth due to the higher penetration of smart phones and data bundles\nThe

In [8]:
pages = text.split("--- End of Page")
documents = [Document(page_content=page.strip()) for page in pages if page.strip()]
documents = documents[:2]  # Limit to first 2 pages for testing

In [9]:
graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)

In [13]:
print("NODES", graph_documents[0].nodes)
print("RELATIONSHIPS", graph_documents[0].relationships)

NODES [Node(id='Vodafone Limited', type='Organization', properties={}), Node(id='1471587', type='Identifier', properties={}), Node(id='31 March 2011', type='Date', properties={}), Node(id='01/08/2011', type='Date', properties={}), Node(id='Companies House', type='Organization', properties={})]
RELATIONSHIPS [Relationship(source=Node(id='Vodafone Limited', type='Organization', properties={}), target=Node(id='1471587', type='Identifier', properties={}), type='HAS_IDENTIFIER', properties={}), Relationship(source=Node(id='Vodafone Limited', type='Organization', properties={}), target=Node(id='31 March 2011', type='Date', properties={}), type='FINANCIAL_STATEMENTS_END_DATE', properties={}), Relationship(source=Node(id='Vodafone Limited', type='Organization', properties={}), target=Node(id='01/08/2011', type='Date', properties={}), type='DOCUMENT_DATE', properties={}), Relationship(source=Node(id='Vodafone Limited', type='Organization', properties={}), target=Node(id='Companies House', type=

In [14]:
for i, gd in enumerate(graph_documents):
    print(f"--- Document {i+1} ---")
    print("NODES:")
    for node in gd.nodes:
        print(node)
    print("RELATIONSHIPS:")
    for rel in gd.relationships:
        print(rel)
    print()

--- Document 1 ---
NODES:
id='Vodafone Limited' type='Organization' properties={}
id='1471587' type='Identifier' properties={}
id='31 March 2011' type='Date' properties={}
id='01/08/2011' type='Date' properties={}
id='Companies House' type='Organization' properties={}
RELATIONSHIPS:
source=Node(id='Vodafone Limited', type='Organization', properties={}) target=Node(id='1471587', type='Identifier', properties={}) type='HAS_IDENTIFIER' properties={}
source=Node(id='Vodafone Limited', type='Organization', properties={}) target=Node(id='31 March 2011', type='Date', properties={}) type='FINANCIAL_STATEMENTS_END_DATE' properties={}
source=Node(id='Vodafone Limited', type='Organization', properties={}) target=Node(id='01/08/2011', type='Date', properties={}) type='DOCUMENT_DATE' properties={}
source=Node(id='Vodafone Limited', type='Organization', properties={}) target=Node(id='Companies House', type='Organization', properties={}) type='FILED_WITH' properties={}

--- Document 2 ---
NODES:
id='

In [22]:
from langchain.callbacks import get_openai_callback

# Prepare the document
documents = [Document(page_content=text.strip())]

with get_openai_callback() as cb:
    graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)

    # After this point, you can inspect the callback
    print(f"Prompt tokens used: {cb.prompt_tokens}")
    print(f"Completion tokens used: {cb.completion_tokens}")
    print(f"Total tokens used: {cb.total_tokens}")
    print(f"Total cost (USD): ${cb.total_cost:.6f}")


Prompt tokens used: 4727
Completion tokens used: 3679
Total tokens used: 8406
Total cost (USD): $0.048607


In [23]:
print("Graph Documents:")
for i, gd in enumerate(graph_documents):
    print(f"--- Document {i+1} ---")
    print("NODES:")
    for node in gd.nodes:
        print(node)
    print("RELATIONSHIPS:")
    for rel in gd.relationships:
        print(rel)
    print()

Graph Documents:
--- Document 1 ---
NODES:
id='Vodafone Limited' type='Organization' properties={}
id='1471587' type='Identifier' properties={}
id='Telecommunications Services' type='Service' properties={}
id='31 March 2011' type='Date' properties={}
id='£139.0M' type='Financial_value' properties={}
id='£13.6M' type='Financial_value' properties={}
id='£179.0M' type='Financial_value' properties={}
id='19,172,400' type='Quantity' properties={}
id='£2.0M' type='Financial_value' properties={}
id='Vodafone Foundation' type='Organization' properties={}
id='£533.5M' type='Financial_value' properties={}
id='£493.1M' type='Financial_value' properties={}
id='£40.4M' type='Financial_value' properties={}
id='£3,665.1M' type='Financial_value' properties={}
id='£2,941.3M' type='Financial_value' properties={}
id='£378.5M' type='Financial_value' properties={}
id='£2,798.0M' type='Financial_value' properties={}
id='£2,261.1M' type='Financial_value' properties={}
id='£160.9M' type='Financial_value' prop

In [33]:
# store it in Neo4j
# https://python.langchain.com/docs/how_to/graph_constructing/
from langchain_neo4j import Neo4jGraph
graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username=os.getenv("NEO4J_USER"),
    password=os.getenv("NEO4J_PASSWORD"),
    refresh_schema=False
)
graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)

In [37]:
graph2_transformer = LLMGraphTransformer(
    llm=llm,
    node_properties=True,
)
documents = [Document(page_content=text.strip())]
graph_documents_props = await graph2_transformer.aconvert_to_graph_documents(documents)


In [38]:
# print node, properties
print("Graph Documents with Properties:")
for i, gd in enumerate(graph_documents_props):
    print(f"--- Document {i+1} ---")
    print("NODES:")
    for node in gd.nodes:
        print(node)
    print("RELATIONSHIPS:")
    for rel in gd.relationships:
        print(rel)
    print()

Graph Documents with Properties:
--- Document 1 ---
NODES:
id='Vodafone Limited' type='Organization' properties={'companyNumber': '1471587', 'principalActivity': 'Provision of telecommunications services through its mobile network', 'turnoverIncrease': '7.5%', 'operatingProfit': '£139.0m', 'acquisitionCost': '£1790m', 'closingCustomers': '19,172,400', 'charitableDonations': '£2.0m', 'capitalExpenditure': '£533.5m', 'debtorsIncrease': '€3,665.1m', 'currentCreditors': '€2,798.0m', 'provisionsForLiabilities': '£160.9m', 'netAmountsDueToGroupUndertakings': '£5,259.2m', 'lossOnOrdinaryActivitiesAfterTaxation': '€131.2m', 'registeredOffice': 'Vodafone House, The Connection, Newbury, Berkshire RG14 2FN'}
id='Mh Bond' type='Person' properties={}
id='M Brearley' type='Person' properties={'resigned': '2010-12-17'}
id='J Campbell' type='Person' properties={}
id='D Cloke' type='Person' properties={'appointed': '2011-01-04'}
id='D Crook' type='Person' properties={}
id='S Gopalan' type='Person' prop

In [39]:
graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)