# Chatting with the SEC Knowledge Graph

### Import packages and set up Neo4j

In [None]:
from dotenv import load_dotenv
import os

import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain_openai import ChatOpenAI

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Load from environment
load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'

# Global constants
VECTOR_INDEX_NAME = 'form_10k_chunks'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

In [None]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

### Questions
1. Determine which state has the most investment firms
2. Determine which state has the most companies
3. What are the cities in California with the most investment firms?
4. Which city in California has the most companies listed?
5. What are top investment firms in San Francisco?
6. What companies are located in Santa Clara?
7. What companies are near Santa Clara?
8. What investment firms are near Sacramento?
9. Which investment firms are near Palo Alto Networks?

In [None]:
# Graph is updated. It also includes the address information
# POINT(latitude,longitude) allows me to do geospatial search & nearness search
kg.refresh_schema()
print(textwrap.fill(kg.schema, 60))

Node properties are the following: Chunk {textEmbedding:
LIST, f10kItem: STRING, chunkSeqId: INTEGER, text: STRING,
cik: STRING, cusip6: STRING, names: LIST, formId: STRING,
source: STRING, chunkId: STRING},Form {cusip6: STRING,
names: LIST, formId: STRING, source: STRING},Company
{location: POINT, cusip: STRING, names: LIST,
companyAddress: STRING, companyName: STRING, cusip6:
STRING},Manager {location: POINT, managerName: STRING,
managerCik: STRING, managerAddress: STRING},Address
{location: POINT, country: STRING, city: STRING, state:
STRING} Relationship properties are the following: SECTION
{f10kItem: STRING},OWNS_STOCK_IN {shares: INTEGER,
reportCalendarOrQuarter: STRING, value: FLOAT} The
relationships are the following: (:Chunk)-[:NEXT]-
>(:Chunk),(:Chunk)-[:PART_OF]->(:Form),(:Form)-[:SECTION]-
>(:Chunk),(:Company)-[:FILED]->(:Form),(:Company)-
[:LOCATED_AT]->(:Address),(:Manager)-[:LOCATED_AT]-
>(:Address),(:Manager)-[:OWNS_STOCK_IN]->(:Company)


In [None]:
# Checking the address of a random Manager
kg.query("""
MATCH (mgr:Manager)-[:LOCATED_AT]->(addr:Address)
RETURN mgr, addr
LIMIT 1
""")

[{'mgr': {'managerCik': '1424381',
   'managerAddress': '650 Madison Avenue, 25th Floor, New York, NY, 10022',
   'location': POINT(-73.9713457 40.7639879),
   'managerName': 'LAKEWOOD CAPITAL MANAGEMENT, LP'},
  'addr': {'country': 'United States',
   'city': 'New York',
   'location': POINT(-73.9821429 40.7584882),
   'state': 'New York'}}]

###Ground Truth Answers

Q1) Determine which state has the most investment firms

In [None]:
kg.query("""
  MATCH p=(:Manager)-[:LOCATED_AT]->(address:Address)
  RETURN address.state as state, count(address.state) as numManagers
    ORDER BY numManagers DESC
    LIMIT 10
""")

[{'state': 'New York', 'numManagers': 304},
 {'state': 'California', 'numManagers': 302},
 {'state': 'Massachusetts', 'numManagers': 146},
 {'state': 'Pennsylvania', 'numManagers': 138},
 {'state': 'Texas', 'numManagers': 125},
 {'state': 'Illinois', 'numManagers': 121},
 {'state': 'Florida', 'numManagers': 115},
 {'state': 'Connecticut', 'numManagers': 77},
 {'state': 'Ohio', 'numManagers': 76},
 {'state': 'New Jersey', 'numManagers': 69}]

Q2) Determine which state has the most companies

In [None]:
kg.query("""
  MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)
  RETURN address.state as state, count(address.state) as numCompanies
    ORDER BY numCompanies DESC
""")

[{'state': 'California', 'numCompanies': 7},
 {'state': 'Delaware', 'numCompanies': 1},
 {'state': 'New York', 'numCompanies': 1},
 {'state': 'Oregon', 'numCompanies': 1}]

Q3) What are the cities in California with the most investment firms?

In [None]:
kg.query("""
  MATCH p=(:Manager)-[:LOCATED_AT]->(address:Address)
         WHERE address.state = 'California'
  RETURN address.city as city, count(address.city) as numManagers
    ORDER BY numManagers DESC
    LIMIT 10
""")

[{'city': 'San Francisco', 'numManagers': 48},
 {'city': 'Los Angeles', 'numManagers': 44},
 {'city': 'San Diego', 'numManagers': 17},
 {'city': 'Pasadena', 'numManagers': 13},
 {'city': 'Menlo Park', 'numManagers': 9},
 {'city': 'Newport Beach', 'numManagers': 9},
 {'city': 'Irvine', 'numManagers': 9},
 {'city': 'Walnut Creek', 'numManagers': 8},
 {'city': 'Palo Alto', 'numManagers': 6},
 {'city': 'Lafayette', 'numManagers': 6}]

Q4) Which city in California has the most companies listed?

In [None]:
kg.query("""
  MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)
         WHERE address.state = 'California'
  RETURN address.city as city, count(address.city) as numCompanies
    ORDER BY numCompanies DESC
""")

[{'city': 'Santa Clara', 'numCompanies': 3},
 {'city': 'San Jose', 'numCompanies': 2},
 {'city': 'Sunnyvale', 'numCompanies': 1},
 {'city': 'Cupertino', 'numCompanies': 1}]

Q5) What are top investment firms in San Francisco?

In [None]:
kg.query("""
  MATCH p=(mgr:Manager)-[:LOCATED_AT]->(address:Address),
         (mgr)-[owns:OWNS_STOCK_IN]->(:Company)
         WHERE address.city = "San Francisco"
  RETURN mgr.managerName, sum(owns.value) as totalInvestmentValue
    ORDER BY totalInvestmentValue DESC
    LIMIT 10
""")

[{'mgr.managerName': 'Dodge & Cox', 'totalInvestmentValue': 3889236092000.0},
 {'mgr.managerName': 'WELLS FARGO & COMPANY/MN',
  'totalInvestmentValue': 2177580039000.0},
 {'mgr.managerName': 'CHARLES SCHWAB INVESTMENT MANAGEMENT INC',
  'totalInvestmentValue': 1944847519000.0},
 {'mgr.managerName': 'Parallax Volatility Advisers, L.P.',
  'totalInvestmentValue': 694023723000.0},
 {'mgr.managerName': 'PARNASSUS INVESTMENTS, LLC',
  'totalInvestmentValue': 211068925000.0},
 {'mgr.managerName': 'Spyglass Capital Management LLC',
  'totalInvestmentValue': 98135259000.0},
 {'mgr.managerName': 'Valiant Capital Management, L.P.',
  'totalInvestmentValue': 52124040000.0},
 {'mgr.managerName': 'Ensemble Capital Management, LLC',
  'totalInvestmentValue': 42355370000.0},
 {'mgr.managerName': 'Woodline Partners LP',
  'totalInvestmentValue': 41497384000.0},
 {'mgr.managerName': 'Alta Park Capital, LP',
  'totalInvestmentValue': 38959909000.0}]

Q6) What companies are located in Santa Clara?

In [None]:
kg.query("""
  MATCH (com:Company)-[:LOCATED_AT]->(address:Address)
         WHERE address.city = "Santa Clara"
  RETURN com.companyName
""")

[{'com.companyName': 'PALO ALTO NETWORKS INC'},
 {'com.companyName': 'SEAGATE TECHNOLOGY'},
 {'com.companyName': 'ATLASSIAN CORP PLC'}]

Q7) What companies are near Santa Clara?

In [None]:
kg.query("""
  MATCH (sc:Address)
    WHERE sc.city = "Santa Clara"
  MATCH (com:Company)-[:LOCATED_AT]->(comAddr:Address)
    WHERE point.distance(sc.location, comAddr.location) < 10000
  RETURN com.companyName, com.companyAddress
""")

[{'com.companyName': 'PALO ALTO NETWORKS INC',
  'com.companyAddress': '3000 Tannery Way, Santa Clara, CA 95054, USA'},
 {'com.companyName': 'GSI TECHNOLOGY INC',
  'com.companyAddress': '1213 Elko Dr, Sunnyvale, CA 94089, USA'},
 {'com.companyName': 'SEAGATE TECHNOLOGY',
  'com.companyAddress': '2445 Augustine Dr, Santa Clara, CA 95054, USA'},
 {'com.companyName': 'ATLASSIAN CORP PLC',
  'com.companyAddress': '431 El Camino Real, Santa Clara, CA 95050, USA'},
 {'com.companyName': 'APPLE INC', 'com.companyAddress': 'Cupertino, CA, USA'}]

Q8) What investment firms are near Sacramento?

In [None]:
kg.query("""
  MATCH (address:Address)
    WHERE address.city = "Sacramento"
  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location,
        managerAddress.location) < 10000
  RETURN mgr.managerName, mgr.managerAddress
""")

[{'mgr.managerName': 'CALIFORNIA STATE TEACHERS RETIREMENT SYSTEM',
  'mgr.managerAddress': '100 WATERFRONT PLACE, WEST SACRAMENTO, CA, 95605'},
 {'mgr.managerName': 'CALIFORNIA PUBLIC EMPLOYEES RETIREMENT SYSTEM',
  'mgr.managerAddress': '400 Q ST, SUITE 4800, SACRAMENTO, CA, 95811'}]

Q9) Which investment firms are near Palo Alto Networks?

In [None]:
kg.query("""
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames",
         "Palo Aalto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:LOCATED_AT]->(comAddress:Address),
    (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE point.distance(comAddress.location,
        mgrAddress.location) < 10000
  RETURN mgr,
    toInteger(point.distance(comAddress.location,
        mgrAddress.location) / 1000) as distanceKm
    ORDER BY distanceKm ASC
    LIMIT 10
""")

[{'mgr': {'managerCik': '1802994',
   'managerAddress': '901 CAMPISI WAY, SUITE 140, CAMPBELL, CA, 95008',
   'location': POINT(-121.9342655 37.2909459),
   'managerName': 'Mine & Arao Wealth Creation & Management, LLC.'},
  'distanceKm': 6}]

### Writing Cypher with an LLM

I'll use few-shot learning to teach an LLM to write Cypher
- I have used the OpenAI's GPT 3.5 model
- I have used GraphCypherQAChain (Neo4j integration within LangChain)

In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to
query a graph database.
Instructions:
Use only the provided relationship types and properties in the
schema. Do not use any other relationship types or properties that
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName
The question is:
{question}"""

In [None]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

In [None]:
cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

In [None]:
def prettyCypherChain(question: str) -> str:
    response = cypherChain.run(question)
    print(textwrap.fill(response, 60))

In [None]:
prettyCypherChain("What investment firms are in San Francisco?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address {city: 'San Francisco'})
MATCH (mgr)-[:OWNS_STOCK_IN]->(comp:Company)
RETURN comp.companyName[0m
Full Context:
[32;1m[1;3m[{'comp.companyName': 'WESTERN DIGITAL CORP.'}, {'comp.companyName': 'FEDEX CORP'}, {'comp.companyName': 'NETAPP INC'}, {'comp.companyName': 'FEDEX CORP'}, {'comp.companyName': 'NIKE INC'}, {'comp.companyName': 'WESTERN DIGITAL CORP.'}, {'comp.companyName': 'PALO ALTO NETWORKS INC'}, {'comp.companyName': 'NIKE INC'}, {'comp.companyName': 'NEWS CORP NEW'}, {'comp.companyName': 'NETAPP INC'}][0m

[1m> Finished chain.[0m
Some of the investment firms in San Francisco include
Western Digital Corp., FedEx Corp, NetApp Inc, Nike Inc,
Palo Alto Networks Inc, and News Corp New.


In [None]:
prettyCypherChain("What investment firms are in Menlo Park?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'Menlo Park'
RETURN mgr.managerName[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Bordeaux Wealth Advisors LLC'}, {'mgr.managerName': 'Opes Wealth Management LLC'}, {'mgr.managerName': 'Solstein Capital, LLC'}, {'mgr.managerName': 'Stamos Capital Partners, L.P.'}, {'mgr.managerName': 'TIEMANN INVESTMENT ADVISORS, LLC'}, {'mgr.managerName': 'SCGE MANAGEMENT, L.P.'}, {'mgr.managerName': 'Nelson Capital Management, LLC'}, {'mgr.managerName': 'Jasper Ridge Partners, L.P.'}, {'mgr.managerName': 'CROSSLINK CAPITAL INC'}][0m

[1m> Finished chain.[0m
Jasper Ridge Partners, L.P. and CROSSLINK CAPITAL INC are
investment firms located in Menlo Park.


In [None]:
prettyCypherChain("What companies are in Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (comp:Company)-[:LOCATED_AT]->(compAddress:Address)
    WHERE compAddress.city = 'Santa Clara'
RETURN comp.companyName[0m
Full Context:
[32;1m[1;3m[{'comp.companyName': 'PALO ALTO NETWORKS INC'}, {'comp.companyName': 'SEAGATE TECHNOLOGY'}, {'comp.companyName': 'ATLASSIAN CORP PLC'}][0m

[1m> Finished chain.[0m
PALO ALTO NETWORKS INC, SEAGATE TECHNOLOGY, and ATLASSIAN
CORP PLC are companies located in Santa Clara.


In [None]:
# The LLM fails to do nearness search
prettyCypherChain("What investment firms are near Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
WHERE mgrAddress.city = 'Santa Clara'
RETURN mgr.managerName[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer to that question.


### Expand the prompt to teach the LLM new Cypher patterns

In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Santa Clara?
  MATCH (address:Address)
    WHERE address.city = "Santa Clara"
  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location,
        managerAddress.location) < 10000
  RETURN mgr.managerName, mgr.managerAddress

The question is:
{question}"""

In [None]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

In [None]:
prettyCypherChain("What investment firms are near Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (address:Address)
    WHERE address.city = "Santa Clara"
MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location, managerAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Mine & Arao Wealth Creation & Management, LLC.', 'mgr.managerAddress': '901 CAMPISI WAY, SUITE 140, CAMPBELL, CA, 95008'}][0m

[1m> Finished chain.[0m
Mine & Arao Wealth Creation & Management, LLC. is an
investment firm near Santa Clara, located at 901 CAMPISI
WAY, SUITE 140, CAMPBELL, CA, 95008.


### Expand the query to retrieve information from the Form 10K chunks

In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Santa Clara?
  MATCH (address:Address)
    WHERE address.city = "Santa Clara"
  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location,
        managerAddress.location) < 10000
  RETURN mgr.managerName, mgr.managerAddress

# What does Palo Alto Networks do?
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames",
         "Palo Alto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:FILED]->(f:Form),
    (f)-[s:SECTION]->(c:Chunk)
  WHERE s.f10kItem = "item1"
RETURN c.text

The question is:
{question}"""

In [None]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)


In [None]:
prettyCypherChain("What does Palo Alto Networks do?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mCALL db.index.fulltext.queryNodes(
     "fullTextCompanyNames", 
     "Palo Alto Networks"
     ) YIELD node, score
WITH node as com
MATCH (com)-[:FILED]->(f:Form),
  (f)-[s:SECTION]->(c:Chunk)
WHERE s.f10kItem = "item1"
RETURN c.text[0m
Full Context:
[32;1m[1;3m[{'c.text': '>Item 1. Business\nGeneral\nPalo Alto Networks, Inc. is a global cybersecurity provider with a vision of a world where each day is safer and more secure than the one before. We were incorporated in 2005 and are headquartered in Santa Clara, California.\nWe empower enterprises, organizations, service providers, and government entities to protect themselves against today’s most sophisticated cyber threats. Our cybersecurity platforms and services help secure enterprise users, networks, clouds, and endpoints by delivering comprehensive cybersecurity backed by industry-leading artificial intelligence and automation. We are a leading 

### Testing

In [None]:
# Check the graph schema
kg.refresh_schema()
print(textwrap.fill(kg.schema, 60))

Node properties are the following: Chunk {textEmbedding:
LIST, f10kItem: STRING, chunkSeqId: INTEGER, text: STRING,
cik: STRING, cusip6: STRING, names: LIST, formId: STRING,
source: STRING, chunkId: STRING},Form {cusip6: STRING,
names: LIST, formId: STRING, source: STRING},Company
{location: POINT, cusip: STRING, names: LIST,
companyAddress: STRING, companyName: STRING, cusip6:
STRING},Manager {location: POINT, managerName: STRING,
managerCik: STRING, managerAddress: STRING},Address
{location: POINT, country: STRING, city: STRING, state:
STRING} Relationship properties are the following: SECTION
{f10kItem: STRING},OWNS_STOCK_IN {shares: INTEGER,
reportCalendarOrQuarter: STRING, value: FLOAT} The
relationships are the following: (:Chunk)-[:NEXT]-
>(:Chunk),(:Chunk)-[:PART_OF]->(:Form),(:Form)-[:SECTION]-
>(:Chunk),(:Company)-[:FILED]->(:Form),(:Company)-
[:LOCATED_AT]->(:Address),(:Manager)-[:LOCATED_AT]-
>(:Address),(:Manager)-[:OWNS_STOCK_IN]->(:Company)


In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Sacramento?
  MATCH (address:Address)
    WHERE address.city = "Sacramento"
  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location,
        managerAddress.location) < 10000
  RETURN mgr.managerName, mgr.managerAddress

# What does Palo Alto Networks do?
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames",
         "Palo Alto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:FILED]->(f:Form),
    (f)-[s:SECTION]->(c:Chunk)
  WHERE s.f10kItem = "item1"
RETURN c.text

The question is:
{question}"""

In [None]:
# Update the prompt and reset the QA chain
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)

In [None]:
# Q1 Determine which state has the most investment firms
prettyCypherChain("Determine which state has the most investment firms")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
WITH mgrAddress.state as state, COUNT(DISTINCT mgr.managerName) as numFirms
RETURN state, numFirms
ORDER BY numFirms DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'state': 'New York', 'numFirms': 304}][0m

[1m> Finished chain.[0m
New York has the most investment firms with a total of 304.


In [None]:
# Q2 Determine which state has the most companies
prettyCypherChain("Determine which state has the most companies")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Company)-[:LOCATED_AT]->(a:Address)
WITH a.state AS state, COUNT(c) AS numCompanies
RETURN state, numCompanies
ORDER BY numCompanies DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'state': 'California', 'numCompanies': 7}][0m

[1m> Finished chain.[0m
California has the most companies, with a total of 7.


In [None]:
# Q3 What are the cities in California with the most investment firms?
prettyCypherChain("What are the cities in California with the most investment firms?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
WHERE mgrAddress.state = 'California'
RETURN mgrAddress.city, COUNT(DISTINCT mgr) AS numInvestmentFirms
ORDER BY numInvestmentFirms DESC[0m
Full Context:
[32;1m[1;3m[{'mgrAddress.city': 'San Francisco', 'numInvestmentFirms': 48}, {'mgrAddress.city': 'Los Angeles', 'numInvestmentFirms': 44}, {'mgrAddress.city': 'San Diego', 'numInvestmentFirms': 17}, {'mgrAddress.city': 'Pasadena', 'numInvestmentFirms': 13}, {'mgrAddress.city': 'Irvine', 'numInvestmentFirms': 9}, {'mgrAddress.city': 'Menlo Park', 'numInvestmentFirms': 9}, {'mgrAddress.city': 'Newport Beach', 'numInvestmentFirms': 9}, {'mgrAddress.city': 'Walnut Creek', 'numInvestmentFirms': 8}, {'mgrAddress.city': 'Palo Alto', 'numInvestmentFirms': 6}, {'mgrAddress.city': 'Lafayette', 'numInvestmentFirms': 6}][0m

[1m> Finished chain.[0m
The cities in California with the most investment firms 

In [None]:
# Q4 Which city in California has the most companies listed?
prettyCypherChain("Which city in California has the most companies listed?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Company)-[:LOCATED_AT]->(a:Address)
WHERE a.state = 'California'
RETURN a.city, COUNT(c) AS numCompanies
ORDER BY numCompanies DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'a.city': 'Santa Clara', 'numCompanies': 3}][0m

[1m> Finished chain.[0m
Santa Clara in California has 3 companies listed.


In [None]:
# Q5 What are top investment firms in San Francisco?
prettyCypherChain("What are top investment firms in San Francisco?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName
ORDER BY mgr.managerName LIMIT 5[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'ALGERT GLOBAL LLC'}, {'mgr.managerName': 'ASPIRIANT, LLC'}, {'mgr.managerName': 'Alta Park Capital, LP'}, {'mgr.managerName': 'Avalon Global Asset Management LLC'}, {'mgr.managerName': 'CATALYST PRIVATE WEALTH, LLC'}][0m

[1m> Finished chain.[0m
Some of the top investment firms in San Francisco include
Algert Global LLC, Aspiriant LLC, Alta Park Capital LP,
Avalon Global Asset Management LLC, and Catalyst Private
Wealth LLC.


In [None]:
# Q6 What companies are located in Santa Clara?
prettyCypherChain("What companies are located in Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (company:Company)-[:LOCATED_AT]->(address:Address)
WHERE address.city = 'Santa Clara'
RETURN company.companyName[0m
Full Context:
[32;1m[1;3m[{'company.companyName': 'PALO ALTO NETWORKS INC'}, {'company.companyName': 'SEAGATE TECHNOLOGY'}, {'company.companyName': 'ATLASSIAN CORP PLC'}][0m

[1m> Finished chain.[0m
The companies located in Santa Clara are PALO ALTO NETWORKS
INC, SEAGATE TECHNOLOGY, and ATLASSIAN CORP PLC.


In [None]:
# Q7 What companies are near Santa Clara
prettyCypherChain("What companies are near Santa Clara")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (address:Address)
    WHERE address.city = "Santa Clara"
MATCH (company:Company)-[:LOCATED_AT]->(companyAddress:Address)
    WHERE point.distance(address.location, companyAddress.location) < 10000
RETURN company.companyName, company.companyAddress[0m
Full Context:
[32;1m[1;3m[{'company.companyName': 'PALO ALTO NETWORKS INC', 'company.companyAddress': '3000 Tannery Way, Santa Clara, CA 95054, USA'}, {'company.companyName': 'GSI TECHNOLOGY INC', 'company.companyAddress': '1213 Elko Dr, Sunnyvale, CA 94089, USA'}, {'company.companyName': 'SEAGATE TECHNOLOGY', 'company.companyAddress': '2445 Augustine Dr, Santa Clara, CA 95054, USA'}, {'company.companyName': 'ATLASSIAN CORP PLC', 'company.companyAddress': '431 El Camino Real, Santa Clara, CA 95050, USA'}, {'company.companyName': 'APPLE INC', 'company.companyAddress': 'Cupertino, CA, USA'}][0m

[1m> Finished chain.[0m
The companies near Santa Cla

In [None]:
# Q8 What investment firms are near Santa Clara?
prettyCypherChain("What investment firms are near Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (address:Address)
    WHERE address.city = "Santa Clara"
MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location, managerAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Mine & Arao Wealth Creation & Management, LLC.', 'mgr.managerAddress': '901 CAMPISI WAY, SUITE 140, CAMPBELL, CA, 95008'}][0m

[1m> Finished chain.[0m
Mine & Arao Wealth Creation & Management, LLC. is an
investment firm near Santa Clara, located at 901 CAMPISI
WAY, SUITE 140, CAMPBELL, CA, 95008.


In [None]:
# Q9 Which investment firms are near Palo Alto Networks?
prettyCypherChain("Which investment firms are near Palo Alto Networks?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (company:Company {companyName: "Palo Alto Networks"})-[:LOCATED_AT]->(companyAddress:Address)
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
WHERE point.distance(companyAddress.location, mgrAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer.
