In [1]:
import networkx as nx
import nx_arangodb as nxadb

from arango import ArangoClient

import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from random import randint
import re

from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langchain_openai import ChatOpenAI
from langchain_community.graphs import ArangoGraph
from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from langchain_core.tools import tool


import gradio
from graph_utils import create_d3_visualization
from dotenv import load_dotenv
import os
from agent_utils import GraphAgent



[00:00:32 -0600] [INFO]: NetworkX-cuGraph is unavailable: No module named 'nx_cugraph'.


In [2]:
load_dotenv()

True

In [3]:
# Access variables
openai_key = os.getenv('OPENAI_API_KEY')
arango_host = os.getenv('ARANGO_HOST')
arango_user = os.getenv('ARANGO_USER')
arango_password = os.getenv('ARANGO_PASSWORD')
arango_db = os.getenv('ARANGO_DB')


In [4]:
# Initialize the client for ArangoDB.
client = ArangoClient(hosts=arango_host)
db = client.db(arango_db, username=arango_user, password=arango_password)
print(db.collections())

[{'id': '195136', 'name': '_analyzers', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195138', 'name': '_queues', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195137', 'name': '_aqlfunctions', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '211044', 'name': 'Users', 'system': False, 'type': 'document', 'status': 'loaded'}, {'id': '195139', 'name': '_jobs', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '211045', 'name': 'Games', 'system': False, 'type': 'document', 'status': 'loaded'}, {'id': '195141', 'name': '_appbundles', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195135', 'name': '_graphs', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195140', 'name': '_apps', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '211046', 'name': 'plays', 'system': False, 'type': 'edge', 'status': 'loaded'}, {'id': '195142', 'name': '_frontend', 'system': True, 'type': 'docu

In [5]:

G_adb = nxadb.Graph(name="SteamGraph", db=db)

print(G_adb)

[00:00:36 -0600] [INFO]: Graph 'SteamGraph' exists.
[00:00:36 -0600] [INFO]: Default node type set to 'Games'


Graph named 'SteamGraph' with 14950 nodes and 70477 edges


In [6]:
graph_def = db.collection("_graphs").get("SteamGraph")
print(graph_def)


{'_key': 'SteamGraph', '_id': '_graphs/SteamGraph', '_rev': '_jPxJOKO---', 'edgeDefinitions': [{'collection': 'plays', 'from': ['Users'], 'to': ['Games']}], 'orphanCollections': [], 'networkx': {'name': 'SteamGraph'}}


In [7]:
for node, data in list(G_adb.nodes(data=True))[:10]:
    print(node, data.get("_id"))


Users/295626029 Users/295626029
Games/12185_tom_clancy_s_rainbow_six_siege Games/12185_tom_clancy_s_rainbow_six_siege
Users/163293998 Users/163293998
Users/56788879 Users/56788879
Games/11706_chivalry_medieval_warfare Games/11706_chivalry_medieval_warfare
Games/13715_enslaved_odyssey_to_the_west_premium_edition Games/13715_enslaved_odyssey_to_the_west_premium_edition
Users/205340007 Users/205340007
Games/11687_critical_mass Games/11687_critical_mass
Games/12066_toribash Games/12066_toribash
Users/200082927 Users/200082927


In [8]:
print(G_adb)

Graph named 'SteamGraph' with 14950 nodes and 70477 edges


### AQL Test

In [9]:
# First, let's check what collections exist
print("Available collections:")
for collection in db.collections():
    print(collection['name'])

# Get the graph to see its collections
graph = db.graph('SteamGraph')
print("\nGraph collections:")
print(graph.edge_definitions())

Available collections:
_analyzers
_queues
_aqlfunctions
Users
_jobs
Games
_appbundles
_graphs
_apps
plays
_frontend

Graph collections:
[{'edge_collection': 'plays', 'from_vertex_collections': ['Users'], 'to_vertex_collections': ['Games']}]


In [10]:


# -----------------------------------------------------------
# 1. Query 3 random nodes from the Users collection.
# -----------------------------------------------------------
result_cursor = G_adb.query("""
    FOR node IN Users
        SORT RAND()
        LIMIT 3
        RETURN node
""")
print("Sample Users Nodes:")
for node in result_cursor:
    print(node)
print('-'*10)

# -----------------------------------------------------------
# 2. Query 3 random edges from the plays edge collection.
# -----------------------------------------------------------
result_cursor = G_adb.query("""
    FOR edge IN plays
        SORT RAND()
        LIMIT 3
        RETURN edge
""")
print("Sample Plays Edges:")
for edge in result_cursor:
    print(edge)
print('-'*10)


Sample Users Nodes:
{'_key': '167741133', '_id': 'Users/167741133', '_rev': '_jPaYbp6--f', 'type': 'Users', 'steamid': 167741133}
{'_key': '139393793', '_id': 'Users/139393793', '_rev': '_jPaYbkm--4', 'type': 'Users', 'steamid': 139393793}
{'_key': '75548930', '_id': 'Users/75548930', '_rev': '_jPaYbpu---', 'type': 'Users', 'steamid': 75548930}
----------
Sample Plays Edges:
{'_key': '28186', '_id': 'plays/28186', '_from': 'Users/26333936', '_to': 'Games/11586_call_of_duty_black_ops', '_rev': '_jPaYgpa--Q', 'weight': 57}
{'_key': '62792', '_id': 'plays/62792', '_from': 'Users/19094181', '_to': 'Games/11929_half_life_blue_shift', '_rev': '_jPaYimC--G', 'weight': 0.8}
{'_key': '27222', '_id': 'plays/27222', '_from': 'Users/124437057', '_to': 'Games/11727_deponia_the_complete_journey', '_rev': '_jPaYgl---C', 'weight': 60}
----------


### Agent

In [11]:
arango_graph = ArangoGraph(db)


In [12]:
agent = GraphAgent(arango_graph, G_adb)


In [13]:
agent.query_graph("Most influential game in the graph and its number users who played it")

1) Generating NetworkX code
Query Received for AQL translation:
Find the number of users who played the most influential game.
----------


[1m> Entering new ArangoGraphQAChain chain...[0m
AQL Query (1):[32;1m[1;3m
WITH Users, Games, plays
FOR game IN Games
    LET totalWeight = (
        FOR play IN plays
            FILTER play._to == game._id
            COLLECT AGGREGATE total = SUM(play.weight)
            RETURN total
    )
    SORT totalWeight DESC
    LIMIT 1
    FOR play IN plays
        FILTER play._to == game._id
        COLLECT WITH COUNT INTO numberOfUsers
RETURN numberOfUsers
[0m
AQL Result:
[32;1m[1;3m[4841][0m

[1m> Finished chain.[0m
Arrango Chain Result:
{'query': 'Find the number of users who played the most influential game.', 'result': 'Summary: The most influential game in the database was played by 4,841 users.', 'aql_result': [4841]}
----------
import networkx as nx

# Assuming G_adb is already defined and is a NetworkX graph object

# Step 1: Calculat

[00:00:51 -0600] [INFO]: Graph 'SteamGraph' load took 2.534729242324829s


----------
FINAL_RESULT: Dota 2
----------
3) Formulating final answer
content='Most influential game in the graph and its number users who played it' additional_kwargs={} response_metadata={} id='d60f1554-8e1c-4717-bf11-f0902bdddb8d'
content='' additional_kwargs={'tool_calls': [{'id': 'call_bK43XBWQuopXMB9Kfb6cEmUi', 'function': {'arguments': '{"__arg1": "Find the most influential game in the graph using centrality measures."}', 'name': 'NetworkX_Analysis'}, 'type': 'function'}, {'id': 'call_rBVQxfwVCDUAheYNJ2oz9XYs', 'function': {'arguments': '{"__arg1": "Find the number of users who played the most influential game."}', 'name': 'AQL_Query'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 75, 'prompt_tokens': 217, 'total_tokens': 292, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},

'The most influential game in the graph is "Dota 2," and it was played by 4,841 users.'

In [None]:
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

llm.invoke("hello!")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 9, 'total_tokens': 19, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_523b9b6e5f', 'finish_reason': 'stop', 'logprobs': None}, id='run-c81298af-457f-49cb-9484-850406da7719-0', usage_metadata={'input_tokens': 9, 'output_tokens': 10, 'total_tokens': 19, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})