In [None]:

%pip install --quiet langchain langchain-community langchain-experimental neo4j tiktoken transformers sentence-transformers langchain_openai
!pip install python-dotenv langchain langchain-community neo4j



# --------------------------------------
# Imports and Environment Setup
# --------------------------------------

In [None]:
from neo4j import GraphDatabase
import os
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI

In [None]:
# Neo4j Connection Details
NEO4J_URI="neo4j+s://72744c43.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD=NEO4J_PASSWORD

# Function to execute Cypher queries
def execute_query(driver, cypher_query, parameters=None):
    try:
        with driver.session() as session:
            session.run(cypher_query, parameters)
    except Exception as e:
        print(f"Error: {e}")


In [None]:
# Create Listing Node
def create_listing_node(driver, listing_id, room_type, price, reviews, availability):
    query = """
    MERGE (l:Listing {Listing_ID: $listing_id, Room_Type: $room_type, Price: $price, Reviews: $reviews, Availability: $availability})
    """
    params = {"listing_id": listing_id, "room_type": room_type, "price": price, "reviews": reviews, "availability": availability}
    execute_query(driver, query, params)

# Create Host Node
def create_host_node(driver, host_name):
    query = """
    MERGE (h:Host {Name: $host_name})
    """
    params = {"host_name": host_name}
    execute_query(driver, query, params)

# Create Neighborhood Node
def create_neighborhood_node(driver, neighborhood):
    query = """
    MERGE (n:Neighborhood {Name: $neighborhood})
    """
    params = {"neighborhood": neighborhood}
    execute_query(driver, query, params)


# --------------------------------------
# Relationship Creation Function
# --------------------------------------

In [None]:
# Create Relationships
def create_relationships(driver, listing_id, host_name, neighborhood):
    query = """
    MATCH (l:Listing {Listing_ID: $listing_id}), (h:Host {Name: $host_name})
    MERGE (h)-[:OWNS]->(l)
    WITH l
    MATCH (n:Neighborhood {Name: $neighborhood})
    MERGE (l)-[:LOCATED_IN]->(n)
    """
    params = {"listing_id": listing_id, "host_name": host_name, "neighborhood": neighborhood}
    execute_query(driver, query, params)



# --------------------------------------
# Upload and Parse Excel Data
# --------------------------------------

In [None]:
from google.colab import files
import pandas as pd

# Upload the Excel file
uploaded = files.upload()

# Get the filename
excel_filename = list(uploaded.keys())[0]  # Get the uploaded filename
print(f"Uploaded file: {excel_filename}")

# Read the Excel file into a Pandas DataFrame
data = pd.read_excel(excel_filename)

# Display the first few rows to verify the upload
data.head()


Saving Book1.xlsx to Book1 (3).xlsx
Uploaded file: Book1 (3).xlsx


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,NaT,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


# --------------------------------------
# Populate Graph Database
# --------------------------------------

In [None]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

for _, row in data.iterrows():
    listing_id = row["id"]
    host_name = row["host_name"]
    neighborhood = row["neighbourhood"]
    room_type = row["room_type"]
    price = row["price"]
    reviews = row["number_of_reviews"]
    availability = row["availability_365"]

    create_listing_node(driver, listing_id, room_type, price, reviews, availability)
    create_host_node(driver, host_name)
    create_neighborhood_node(driver, neighborhood)
    create_relationships(driver, listing_id, host_name, neighborhood)

driver.close()
print("Graph populated successfully!")


Graph populated successfully!


# --------------------------------------
# OpenAI API Configuration
# --------------------------------------

In [None]:
# OpenAI API details
OPENAI_API_KEY = OPENAI_API_KEY
OPENAI_ENDPOINT = "https://api.openai.com/v1"

# Print confirmation
print("Environment variables loaded successfully!")

Environment variables loaded successfully!


# --------------------------------------
# Knowledge Graph Interaction
# --------------------------------------

In [None]:
NEO4J_DATABASE = os.environ.get("NEO4J_DATABASE")
kg = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE,
)

In [None]:
result = kg.query(
    """
    SHOW INDEXES
    """
)
print(result)

[{'id': 0, 'name': 'index_343aff4e', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'LOOKUP', 'entityType': 'NODE', 'labelsOrTypes': None, 'properties': None, 'indexProvider': 'token-lookup-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2024, 12, 1, 17, 42, 20, 967000000, tzinfo=<UTC>), 'readCount': 1771}, {'id': 1, 'name': 'index_f7700477', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'LOOKUP', 'entityType': 'RELATIONSHIP', 'labelsOrTypes': None, 'properties': None, 'indexProvider': 'token-lookup-1.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2024, 12, 1, 17, 11, 22, 523000000, tzinfo=<UTC>), 'readCount': 1}, {'id': 2, 'name': 'listings_embeddings_index', 'state': 'ONLINE', 'populationPercent': 100.0, 'type': 'VECTOR', 'entityType': 'NODE', 'labelsOrTypes': ['Listing'], 'properties': ['embeddings'], 'indexProvider': 'vector-2.0', 'owningConstraint': None, 'lastRead': neo4j.time.DateTime(2024, 12, 1, 17, 12, 2, 531000000, tzinfo=<UTC>

# --------------------------------------
# Query Listings using OpenAI Embeddings
# --------------------------------------

In [None]:
# kg.query(
#     """
#     MATCH (l:Listing)
#     WHERE l.Room_Type IS NOT NULL
#     WITH l, genai.vector.encode(
#         l.Room_Type,
#         "OpenAI",
#         {
#           token: $openAiApiKey,
#           endpoint: $openAiEndpoint
#         }) AS vector
#     WITH l, vector
#     WHERE vector IS NOT NULL
#     CALL db.create.setNodeVectorProperty(l, "embeddings", vector)
#     """,
#     params={
#         "openAiApiKey": OPENAI_API_KEY,
#         "openAiEndpoint": OPENAI_ENDPOINT,
#     },
# )


[]

In [None]:
# result = kg.query(
#     """
#     MATCH (l:Listing)
#     RETURN l.Listing_ID, l.embeddings
#     LIMIT 5
#     """
# )

# print(result)


[{'l.Listing_ID': 2539, 'l.embeddings': [0.01404150016605854, 0.01425890065729618, 0.014437936246395111, -0.015486572869122028, -0.004021904896944761, 0.0010718153789639473, 0.0031299246475100517, -0.01188667956739664, -0.005892186891287565, 0.007429974153637886, 0.00354554271325469, 0.02713667042553425, 0.012973681092262268, -0.00916278176009655, -0.011183326132595539, -0.0003290976455900818, 0.030640650540590286, 0.003660636954009533, 0.035500187426805496, -0.003954766783863306, -0.014783218502998352, 0.010300936177372932, -0.009731858968734741, 0.010332907550036907, -0.007788045331835747, -0.03230312466621399, -0.0073660328052937984, -0.013453240506350994, -0.0011349573032930493, -0.022046945989131927, 0.02266078069806099, -0.007449156604707241, -0.01881151832640171, -0.02295491099357605, -0.016215503215789795, -0.003153902478516102, -0.015716761350631714, 0.006448475643992424, 0.021292438730597496, 0.010448001325130463, 0.008568128570914268, 0.020473990589380264, 0.0062406668439507

In [None]:
question = "Find the listings in Williamsburg."

result = kg.query(
    """
    WITH genai.vector.encode(
        $question,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'listings_embeddings_index',
        $top_k,
        question_embedding
        ) YIELD node AS listing, score
    RETURN listing.Listing_ID AS listing_id, listing.Room_Type AS room_type, listing.Price AS price, score
    ORDER BY score DESC
    LIMIT $top_k
    """,
    params={
        "openAiApiKey": OPENAI_API_KEY,
        "openAiEndpoint": OPENAI_ENDPOINT,
        "question": question,
        "top_k": 3,  # Number of top similar listings to return
    },
)

print(result)


[{'listing_id': 3831, 'room_type': 'Entire home/apt', 'price': 89, 'score': 0.8589324951171875}, {'listing_id': 56467, 'room_type': 'Entire home/apt', 'price': 130, 'score': 0.8589324951171875}, {'listing_id': 2595, 'room_type': 'Entire home/apt', 'price': 225, 'score': 0.8589324951171875}]


In [None]:
question = "Find host name with Entire home/apt near Brooklyn."

result = kg.query(
    """
    WITH genai.vector.encode(
        $question,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'listings_embeddings_index',
        $top_k,
        question_embedding
        ) YIELD node AS listing, score
    RETURN listing.Listing_ID AS listing_id, listing.Room_Type AS room_type, listing.Price AS price, score
    ORDER BY score DESC
    LIMIT $top_k
    """,
    params={
        "openAiApiKey": OPENAI_API_KEY,
        "openAiEndpoint": OPENAI_ENDPOINT,
        "question": question,
        "top_k": 3,  # Number of top similar listings to return
    },
)

print(result)


[]



# --------------------------------------
# Format Results and Summarize
# --------------------------------------

In [None]:
import openai

In [None]:
# Format retrieved results for ChatCompletion
retrieved_listings = "\n".join(
    [
        f"Listing ID: {row['listing_id']} "
        f"Room Type: {row['room_type']}, Price: {row['price']}, Score: {row['score']:.2f}"
        for row in result
    ]
)

# Prepare system and user messages for ChatCompletion
system_message = {"role": "system", "content": "You are a helpful assistant that formats listings data."}
user_message = {
    "role": "user",
    "content": (
        f"Here are the listings related to the query:\n{retrieved_listings}\n\n"
        "Please summarize the top options for me in a user-friendly way."
    ),
}


In [None]:
# Initialize OpenAI client
from openai import OpenAI

client = OpenAI(
    api_key="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"  # Replace this with your actual key
)

# Prepare messages for ChatCompletion
messages = [
    {
        "role": "system",
        "content": "You are a helpful funny assistant that formats listings data."
    },
    {
        "role": "user",
        "content": (
            f"Here are the listings related to the query:\n{retrieved_listings}\n\n"
            "Please summarize the top options for me in a user-friendly way in a passage.Dont give in points"
        )
    },
]

# Generate a response using OpenAI
chat_completion = client.chat.completions.create(
    messages=messages,
    model="gpt-3.5-turbo",
)

In [None]:
# Print the generated response
print("Generated Response:")
print(chat_completion.choices[0].message.content)


Generated Response:
Sure thing! Here are the top options based on your query. 

You have three great choices: 
- Listing ID 3831: Entire home/apt for $89 with a score of 0.86
- Listing ID 56467: Entire home/apt for $130 with a score of 0.86
- Listing ID 2595: Entire home/apt for $225 with a score of 0.86

Happy browsing for your perfect stay!
