### Step 0: Package Installation & setup

In [229]:
!pip install nx-arangodb sentence-transformers

!pip install --upgrade langchain langchain-community langchain-openai langgraph google-search-results

Collecting google-search-results
  Downloading google_search_results-2.4.2.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: google-search-results
  Building wheel for google-search-results (setup.py) ... [?25l[?25hdone
  Created wheel for google-search-results: filename=google_search_results-2.4.2-py3-none-any.whl size=32009 sha256=56d2c56246a66d88009f7fe2549bd8cf315d748eee367d4a4f07c24ae8680c96
  Stored in directory: /root/.cache/pip/wheels/6e/42/3e/aeb691b02cb7175ec70e2da04b5658d4739d2b41e5f73cd06f
Successfully built google-search-results
Installing collected packages: google-search-results
Successfully installed google-search-results-2.4.2


In [None]:
# GPU Verification

!nvidia-smi
!nvcc --version

In [None]:
!pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com # Requires CUDA-capable GPU

In [230]:
from sentence_transformers import SentenceTransformer

import networkx as nx
import nx_arangodb as nxadb
from arango import ArangoClient

from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langchain_openai import ChatOpenAI
from langchain_community.graphs import ArangoGraph
from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from langchain_core.tools import tool
from langchain_core.prompts import PromptTemplate

from serpapi import GoogleSearch
import json
import os
from IPython.display import Markdown

In [241]:
# config.json file contains all the endpoints and credentials
with open('config.json', 'r') as file:
    config = json.load(file)

In [146]:
# Initialize Sentence Transformer model for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

In [20]:
# Connect to "_system" database as root user.
# This returns an API wrapper for "_system" database.
client = ArangoClient(hosts=config["ARANGO_ENDPOINT"])

sys_db = client.db('_system', username=config["ARANGO_USERNAME"], password=config["ARANGO_PASSWORD"])

In [21]:
# List all databases.
sys_db.databases()

['_system', 'fake-news']

In [22]:
# Create a new database named "fake-news" if it does not exist.
if not sys_db.has_database('fake-news'):
    sys_db.create_database('fake-news')

In [147]:
db = client.db("fake-news", username="root", password="Az4qWcI2li31uimRVo4j")

### Step 1: prepare dataset


In [49]:
# Create collections
users = db.create_collection("Users") #node
messages = db.create_collection("Messages") #node
sent = db.create_collection("Sent", edge=True) #edge
forwarded = db.create_collection("Forwarded", edge=True) #edge

In [50]:
# Import Users data
users_data = [
    {"_key": "1", "username": "Alice"},
    {"_key": "2", "username": "Bob"},
    {"_key": "3", "username": "Charlie"},
    {"_key": "4", "username": "David"},
    {"_key": "5", "username": "Eve"}
]
users.import_bulk(users_data)

{'error': False,
 'created': 5,
 'errors': 0,
 'empty': 0,
 'updated': 0,
 'ignored': 0,
 'details': []}

In [51]:
# Vector index for messages

def create_vector_index():
    # Use AQL to create a vector index
    aql = """
    INSERT {
      type: "vector",
      fields: ["embedding"],
      name: "vector_cosine",
      sparse: true,
      unique: false,
      inBackground: false,
      params: {
        metric: "cosine",
        dimension: 384
      }
    } INTO Messages OPTIONS { overwriteMode: "ignore" }
    """
    try:
        db.aql.execute(aql)
        print("Vector index created successfully on Messages collection.")
    except Exception as e:
        print(f"Error creating vector index: {e}")

create_vector_index()

Vector index created successfully on Messages collection.


In [52]:
# Import Messages data
def add_sample_data():
    messages = db.collection("Messages")
    sample_texts = [
        "Hey, Breaking news! Trump lost 2025 election.",
        "Did you know? Octopuses have three hearts! Two pump blood to the gills, and one pumps it to the rest of the body. But when they swim, the main heart stops beating! No wonder they prefer crawling. ",
        "I don't know if its true but trump lost the election."
    ]

    count = 1

    for text in sample_texts:
        # Check if the content already exists to avoid duplicates
        existing_doc = list(messages.find({"content": text}))
        if not existing_doc:
            embedding = model.encode(text).tolist()
            messages.insert({"_key": str(count), "content": text, "embedding": embedding})
            print(f"Inserted new document: {text}")
            count += 1
    print("Sample data added to Messages collection.")

add_sample_data()

Inserted new document: Hey, Breaking news! Trump lost 2025 election.
Inserted new document: Did you know? Octopuses have three hearts! Two pump blood to the gills, and one pumps it to the rest of the body. But when they swim, the main heart stops beating! No wonder they prefer crawling. 
Inserted new document: I don't know if its true but trump lost the election.
Sample data added to Messages collection.


In [53]:
# Import Sent edges
sent_data = [
    {"_from": "Messages/1", "_to": "Users/1", "timestamp": "2025-03-08 19:00:00"},
    {"_from": "Messages/3", "_to": "Users/2", "timestamp": "2025-03-08 19:15:00"},
    {"_from": "Messages/2", "_to": "Users/3", "timestamp": "2025-03-08 19:30:00"}
]
sent.import_bulk(sent_data)

{'error': False,
 'created': 3,
 'errors': 0,
 'empty': 0,
 'updated': 0,
 'ignored': 0,
 'details': []}

In [54]:
# Import Forwarded edges
forwarded_data = [
    {"_from": "Users/1", "_to": "Users/2", "message_id": "1", "timestamp": "2025-03-08 19:01:00"},
    {"_from": "Users/2", "_to": "Users/3", "message_id": "1", "timestamp": "2025-03-08 19:05:00"},
    {"_from": "Users/3", "_to": "Users/4", "message_id": "1", "timestamp": "2025-03-08 19:10:00"},
    {"_from": "Users/3", "_to": "Users/5", "message_id": "2", "timestamp": "2025-03-08 19:16:00"},
    {"_from": "Users/5", "_to": "Users/1", "message_id": "2", "timestamp": "2025-03-08 19:20:00"},
    {"_from": "Users/2", "_to": "Users/4", "message_id": "3", "timestamp": "2025-03-08 19:31:00"},
    {"_from": "Users/4", "_to": "Users/5", "message_id": "3", "timestamp": "2025-03-08 19:35:00"}
]
forwarded.import_bulk(forwarded_data)

{'error': False,
 'created': 7,
 'errors': 0,
 'empty': 0,
 'updated': 0,
 'ignored': 0,
 'details': []}

In [55]:
# Create the graph
graph = db.create_graph("WhatsAppForwarding") #graph

# creating nodes
graph.create_vertex_collection("Users")
graph.create_vertex_collection("Messages")

# creating edges bewteen nodes
graph.create_edge_definition(
    edge_collection="Sent",
    from_vertex_collections=["Users"],
    to_vertex_collections=["Messages"]
)
graph.create_edge_definition(
    edge_collection="Forwarded",
    from_vertex_collections=["Users"],
    to_vertex_collections=["Users"]
)

<EdgeCollection Forwarded>

In [148]:
# Test queries, 3 hops traversal

aql = """
WITH Users
FOR v, e, p IN 1..3 OUTBOUND 'Users/1' Forwarded
    FILTER e.message_id == '1'
    RETURN {
        user: v.username,
        timestamp: e.timestamp
    }
"""
result = db.aql.execute(aql)
print(list(result))

[{'user': 'Bob', 'timestamp': '2025-03-08 19:01:00'}, {'user': 'Charlie', 'timestamp': '2025-03-08 19:05:00'}, {'user': 'David', 'timestamp': '2025-03-08 19:10:00'}]


### Step 2: Build the Agentic App with LangChain & LangGraph


In [149]:
# 1. Create the ArangoGraph LangChain wrapper
arango_graph = ArangoGraph(db)

In [150]:
# 2. Define the llm object
os.environ["OPENAI_API_KEY"] = config["OPENAI_API"]
llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")

llm.invoke("hello!")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 9, 'total_tokens': 19, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_06737a9306', 'finish_reason': 'stop', 'logprobs': None}, id='run-1905389f-1fab-4bde-a07b-abb3f17fcc73-0', usage_metadata={'input_tokens': 9, 'output_tokens': 10, 'total_tokens': 19, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [236]:
graph_schema = arango_graph.schema

In [219]:
# 3. Define the Text to AQL Tool
@tool
def text_to_aql_to_text(query: str):
    """This tool is available to invoke the
    ArangoGraphQAChain object, translate a Natural Language Query into AQL,
    execute the query, and translate the result back into Natural Language.

    Example queries for your reference:

    1) Query to find who Alice forwarded messages to:
    FOR u IN Users
    FILTER u.username == "Alice"
    FOR v, e IN OUTBOUND u Forwarded
    RETURN {
      forwarded_to: v.username,
      message_id: e.message_id,
      timestamp: e.timestamp
    }

    2) Query to find who forwarded messages to Alice:
    FOR u IN Users
    FILTER u.username == "Alice"
    FOR v, e IN INBOUND u Forwarded
    RETURN {
      forwarded_from: v.username,
      message_id: e.message_id,
      timestamp: e.timestamp
    }

    3) query to fetch specific message
    FOR doc IN Messages
    FILTER doc.content == "Hello, how are you?"
    RETURN doc
    """

    llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")

    chain = ArangoGraphQAChain.from_llm(
    	llm=llm,
    	graph=arango_graph,
    	verbose=True,
        allow_dangerous_requests=True
    )

    result = chain.invoke(query)

    return str(result["result"])

In [220]:
# 4. Define the semantic search Tool
@tool
def semantic_search(query: str, limit: int = 2):
    """This tool performs semantic search on the Messages collection
    using cosine similarity between the query and stored embeddings.

    Args:
        query: The search query in natural language.
        limit: The number of top-matching results to return.

    Returns:
        A list of dictionaries containing the content and similarity score.
    """

    query_embedding = model.encode(query).tolist()
    aql = """
    WITH Users
    LET queryEmbedding = @queryEmbedding
    FOR doc IN Messages
      LET score = COSINE_SIMILARITY(doc.embedding, queryEmbedding)
      FILTER score > 0.1
      SORT score DESC
      LIMIT @limit
      FOR user IN 1 OUTBOUND doc Sent
        RETURN {
          content: doc.content,
          original_sender: user.username,
          similarity_score: score
        }

    """
    bind_vars = {"queryEmbedding": query_embedding, "limit": limit}
    cursor = db.aql.execute(aql, bind_vars=bind_vars)

    return list(cursor)

In [231]:
# 5. Define the message verificattion web search Tool
@tool
def verify_fact(query: str):
    """Searches the web for a given query using Google Search via SerpAPI.

    Args:
        query: The statement or question to verify.

    Returns:
        A summary of the top search results.
    """
    params = {
        "q": query,
        "api_key": config["SERP_API_KEY"]
    }
    search = GoogleSearch(params)
    results = search.get_dict().get("organic_results", [])[:1]  # Top 1 result

    summaries = [
        {"title": res["title"], "snippet": res["snippet"], "url": res["link"]}
        for res in results
    ]

    return json.dumps(summaries, indent=2) if summaries else "No relevant information found."

In [232]:
# 6. Create the Agentic Application

tools = [text_to_aql_to_text, semantic_search, verify_fact]

def query_graph(query):
    llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
    app = create_react_agent(llm, tools)
    final_state = app.invoke({"messages": [{"role": "user", "content": query}]})
    return final_state["messages"][-1].content

In [239]:
query = "find who Alice forwarded messages to and that person forwarded to who?"
display(Markdown(query_graph(query)))



[1m> Entering new ArangoGraphQAChain chain...[0m


[1m> Entering new ArangoGraphQAChain chain...[0m
AQL Query (1):[32;1m[1;3m
WITH Users, Forwarded
FOR u IN Users
    FILTER u.username == 'Alice'
    FOR f IN Forwarded
        FILTER f._from == u._id
        RETURN f._to
[0m
AQL Result:
[32;1m[1;3m['Users/2'][0m
AQL Query (1):[32;1m[1;3m
WITH Users, Forwarded
FOR u IN Users
    FILTER u.username == 'Alice'
    FOR f IN Forwarded
        FILTER f._from == u._id
        LET forwardedTo = f._to
        FOR f2 IN Forwarded
            FILTER f2._from == forwardedTo
            RETURN f2._to
[0m
AQL Result:
[32;1m[1;3m['Users/3', 'Users/4'][0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


Alice forwarded messages to a user with the ID 'Users/2'. This user, in turn, received forwarded messages from two other users, identified as 'Users/3' and 'Users/4'.

In [212]:
query = "find who forwarded messages to Alice"
display(Markdown(query_graph(query)))



[1m> Entering new ArangoGraphQAChain chain...[0m
AQL Query (1):[32;1m[1;3m
WITH Users, Forwarded
FOR u IN Users
    FILTER u.username == 'Alice'
    FOR f IN Forwarded
        FILTER f._to == u._id
        RETURN f._from
[0m
AQL Result:
[32;1m[1;3m['Users/5'][0m

[1m> Finished chain.[0m


The user with the ID 'Users/5' is responsible for forwarding messages to Alice.

In [205]:
query = "find out similar messages sent by different people along with their name"
display(Markdown(query_graph(query)))

I found some similar messages sent by different people:

1. **Alice**: "Hey, Breaking news! Trump lost 2025 election." (Similarity Score: 0.14)
2. **Bob**: "I don't know if its true but trump lost the election." (Similarity Score: 0.10)

These messages share a similar theme regarding the 2025 election.

In [235]:
query = "fetch the content of message 1 and verify this message BY searching web if its true"
display(Markdown(query_graph(query)))



[1m> Entering new ArangoGraphQAChain chain...[0m
AQL Query (1):[32;1m[1;3m
WITH Messages
FOR message IN Messages
FILTER message._key == '1'
RETURN message
[0m
AQL Result:
[32;1m[1;3m[{'_key': '1', '_id': 'Messages/1', '_rev': '_jVqQwM6--_', 'content': 'Hey, Breaking news! Trump lost 2025 election.', 'embedding': [-0.007163814269006252, 0.00791835691779852, 0.1831125020980835, 0.05643795058131218, 0.02982192672789097, 0.02564813382923603, -0.11532781273126602, -0.018671628087759018, -0.0614331029355526, 0.005224666558206081, -0.05747831240296364, 0.048746827989816666, 0.02096504718065262, -0.049460116773843765, -0.07076548784971237, 0.03233611211180687, -0.10514471679925919, -0.045691899955272675, -0.09758692979812622, 0.009631344117224216, 0.09832679480314255, 0.07849396020174026, -0.01998559944331646, 0.051968250423669815, 0.023345502093434334, 0.007565595209598541, -0.016841880977153778, 0.03489099442958832, -0.027945276349782944, 0.00877468753606081, 0.030479541048407555, 0

The content of message 1 is: **"Hey, Breaking news! Trump lost 2025 election."**

However, a web search indicates that this statement is not true. According to a recent article from The New York Times, Donald J. Trump has actually won the presidency in the 2024 election. You can read more about it [here](https://www.nytimes.com/interactive/2024/11/05/us/elections/results-president.html).