In [1]:
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage 
from dotenv import load_dotenv
from datetime import datetime
from os.path import exists
import tiktoken
from firecrawl_scraping import *
from utility import *
import re
import os
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.schema import StrOutputParser
import json
from neo4j import GraphDatabase
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI
from neomodel import config, StructuredNode, StringProperty, FloatProperty, BooleanProperty, ArrayProperty, RelationshipTo

    
config.DATABASE_URL = f"bolt://neo4j:{os.getenv('NEO4J_PASSWORD')}@localhost:7687"


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
def cypher_generator(schema, query, model_name="gpt-4o"):
    system_message = """
    Your task is to generate Cypher statement to query a graph database (Neo4j), based on the input natural language query.
    The output response should contain only Cypher query in text format, with no additional commentary, explanations, or extraneous information.
    """

    few_shot_examples = """
    Examples of natural language to Cypher statement translation are given below:
    
    ## Examples
    
    ## Example 1
    - Question: What is the URL and description of the company called "360factors"?
    - Cypher: 
    MATCH (c:Company) 
    WHERE c.name = "360factors"
    RETURN c.url AS URL, c.description AS Description
    
    ## Example 2
    - Question: What products are served to company called "BDO"
    - Cypher:
    MATCH (p:Product)-[:SERVES]->(c:Company)
    WHERE c.name = 'BDO'
    RETURN DISTINCT p.name AS Product
    
    ## Example 4
    - Question: Who are the client of the company called "BDO"
    - Cypher:
    MATCH (company:Company)-[:PROVIDES]->(p:Product)-[:SERVES]->(client:Company)
    WHERE company.name = 'BDO'
    RETURN DISTINCT client.name AS Client
    
    ## Example 5
    - Question: Who are the client of client of the company called "Landytech"
    - Cypher:
    MATCH (company:Company)-[:PROVIDES]->(layer_1_p:Product)-[:SERVES]->(layer_1_client:Company)-[:PROVIDES]->(layer_2_p:Product)-[:SERVES]->(layer_2_client:Company)
    WHERE company.name = 'Landytech'
    RETURN DISTINCT layer_2_client.name AS Client
    
    ## Example 6
    - Question: What are the top 5 companies with most number of service provider? Return their names, number of clients, urls and descriptions.
    - Cypher:
    MATCH (provider:Company)-[:PROVIDES]->(product:Product)-[:SERVES]->(client:Company)
    WITH client, count(DISTINCT provider) AS providerCount
    ORDER BY providerCount DESC
    LIMIT 5
    RETURN client.name AS companyName, providerCount, client.url, client.description;
    """
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_message),
            ("system", """Use only the provided nodes, relationship and properties from the schema provided below:
                        {schema}
                        Do not use any other relationship types or properties that are not provided.
                        """),
            ("system", few_shot_examples),
            ("human", "Here is the natural language question that you need to translate to Cypher query: {query}"),
            ("human", """
                Here are the rules that you need to adhere:
                ## Rules:
                - Make sure to answer in the standard text format.
                - DO NOT HALLUCINATE.
             """),
        ]
    )
    
    llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_KEY'),
                    temperature=0, 
                    model_name=model_name)

    llm_chain = prompt | llm | StrOutputParser()

    response = llm_chain.invoke({'schema': schema, 'query': query})
    
    return response


In [21]:
def augmented_generation(query, database_output, model_name="gpt-4o"):
    system_message = """
    Your task is to generate natural language response based on user's query, backed by database query result.
    Given input user query, you need to interpret the query and generate suitable response based on the database output.
    Use solely the information from the database with no additional commentary or extraneous information.
    """

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_message),
            ("human", "Here is the natural language question that you need to translate to Cypher query: {query}"),
            ("human", """Here is the database query output: {database_output}. 
                         Now you need to organise the information obtained from the database and generate response that answer the user's query."""),
            ("human", """
                Here are the rules that you need to adhere:
                ## Rules:
                - Make sure to answer in the standard text format.
                - If the database output is empty or NaN, just reply the information is not found from the database.
                - DO NOT HALLUCINATE.
             """),
        ]
    )
    
    llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_KEY'),
                    temperature=0, 
                    model_name=model_name)

    llm_chain = prompt | llm | StrOutputParser()

    response = llm_chain.invoke({'database_output': database_output, 'query': query})
    
    return response


In [None]:
from neomodel import db

def query_databse(query):
    try:
        # Execute the node query
        result, _ = db.cypher_query(query)
        return result
    except Exception as e:
        print(f"Failed to query the data: {e}")
        return None

In [10]:
graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password=os.getenv('NEO4J_PASSWORD'))
graph.schema

'Node properties:\nCompany {communityId: INTEGER, processed_name: STRING, name: STRING, url: STRING, cluster_name_detail: STRING, hq_location: STRING, hq_city: STRING, last_known_valuation_date: STRING, verticals: STRING, valuation: FLOAT, primary_industry_sector: STRING, primary_industry_group: STRING, last_known_valuation_deal_type: STRING, description: STRING, year_founded: STRING, total_raised: FLOAT, hq_country_territory_region: STRING}\nProduct {name: STRING, communityId: INTEGER, summary_product: BOOLEAN, name_embedding: LIST, description_embedding: LIST, description: STRING, company_url: STRING, product_key: STRING}\nRelationship properties:\n\nThe relationships:\n(:Company)-[:PROVIDES]->(:Product)\n(:Product)-[:SERVES]->(:Company)'

In [26]:
query = cypher_generator(graph.schema, "What product dose michael's client servce? Return client's name and client's product name")
print('Step 1: Cypher query')
print(query)
print(' ')
print('Step 2: Query database')
database_result = query_databse(query)
print(database_result)
print(' ')
print('Step 3: Generate response')
response = augmented_generation(query, str(database_result))
print(response)


Step 1: Cypher query
MATCH (michael:Company)-[:PROVIDES]->(michaelProduct:Product)-[:SERVES]->(client:Company)-[:PROVIDES]->(clientProduct:Product)
WHERE michael.name = "Michael"
RETURN DISTINCT client.name AS ClientName, clientProduct.name AS ClientProductName
 
Step 2: Query database
[]
 
Step 3: Generate response
The information is not found from the database.


"What product dose Bennie's client servce? Return client's name and client's product name"

The companies that are clients of Bennie and the products they provide are as follows:

1. **Interpretek**
   - Interpreting Services and ASL Training
   - Customized ASL Classes
   - Online Focus
   - Video Remote Interpreting (VRI) Services
   - On-site Interpreting Services

2. **Crisis Text Line**
   - Crisis Text Line

3. **Rippling**
   - Comprehensive Workforce and Benefits Management
   - Global Benefits Solution
   - Workforce Management System

4. **Hi Marley**
   - Intelligent Conversational Platform for Insurance
   - Guidewire Cloud App
   - Total Loss Assist
   - Hi Marley Service
   - Hi Marley Claims
   - Hi Marley Insurance Cloud

5. **Pliancy**
   - Tech-enabled IT solutions
   - High-touch, tech-enabled IT services

6. **Superhuman**
   - AI-powered email service
   - Superhuman AI-powered email

7. **LTSE**
   - Stock Exchange Services

8. **Bonusly**
   - Employee Recognition and Performance Enablement Solutions
   - Bonusly Appreciate
   - Bonusly Connect
   - Bon