In [1]:
import networkx as nx
import nx_arangodb as nxadb

from arango import ArangoClient

import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from random import randint
import re

from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langchain_openai import ChatOpenAI
from langchain_community.graphs import ArangoGraph
from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from langchain_core.tools import tool


import gradio
from graph_utils import create_d3_visualization
from dotenv import load_dotenv
import os
from agent_utils import GraphAgent



[21:19:29 -0600] [INFO]: NetworkX-cuGraph is unavailable: No module named 'nx_cugraph'.


In [2]:
load_dotenv()

True

In [3]:
# Access variables
openai_key = os.getenv('OPENAI_API_KEY')
arango_host = os.getenv('ARANGO_HOST')
arango_user = os.getenv('ARANGO_USER')
arango_password = os.getenv('ARANGO_PASSWORD')
arango_db = os.getenv('ARANGO_DB')


In [4]:
# Initialize the client for ArangoDB.
client = ArangoClient(hosts=arango_host)
db = client.db(arango_db, username=arango_user, password=arango_password)
print(db.collections())

[{'id': '195136', 'name': '_analyzers', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195138', 'name': '_queues', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195137', 'name': '_aqlfunctions', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '211044', 'name': 'Users', 'system': False, 'type': 'document', 'status': 'loaded'}, {'id': '195139', 'name': '_jobs', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '211045', 'name': 'Games', 'system': False, 'type': 'document', 'status': 'loaded'}, {'id': '195141', 'name': '_appbundles', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195135', 'name': '_graphs', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '195140', 'name': '_apps', 'system': True, 'type': 'document', 'status': 'loaded'}, {'id': '211046', 'name': 'plays', 'system': False, 'type': 'edge', 'status': 'loaded'}, {'id': '195142', 'name': '_frontend', 'system': True, 'type': 'docu

In [5]:

G_adb = nxadb.Graph(name="SteamGraph", db=db)

print(G_adb)

[21:19:36 -0600] [INFO]: Graph 'SteamGraph' exists.
[21:19:36 -0600] [INFO]: Default node type set to 'Games'


Graph named 'SteamGraph' with 14950 nodes and 70477 edges


In [6]:
graph_def = db.collection("_graphs").get("SteamGraph")
print(graph_def)


{'_key': 'SteamGraph', '_id': '_graphs/SteamGraph', '_rev': '_jPu1zvi---', 'edgeDefinitions': [{'collection': 'plays', 'from': ['Users'], 'to': ['Games']}], 'orphanCollections': [], 'networkx': {'name': 'SteamGraph'}}


In [7]:
for node, data in list(G_adb.nodes(data=True))[:10]:
    print(node, data.get("_id"))


Games/14187_broken_sword_1_shadow_of_the_templars_director_s_cut Games/14187_broken_sword_1_shadow_of_the_templars_director_s_cut
Users/239544929 Users/239544929
Games/12747_fable_anniversary Games/12747_fable_anniversary
Games/12684_football_superstars Games/12684_football_superstars
Users/234929278 Users/234929278
Users/242547207 Users/242547207
Users/204477246 Users/204477246
Users/170489182 Users/170489182
Games/11530_cat_goes_fishing Games/11530_cat_goes_fishing
Users/61216865 Users/61216865


In [8]:
print(G_adb)

Graph named 'SteamGraph' with 14950 nodes and 70477 edges


### AQL Test

In [9]:
# First, let's check what collections exist
print("Available collections:")
for collection in db.collections():
    print(collection['name'])

# Get the graph to see its collections
graph = db.graph('SteamGraph')
print("\nGraph collections:")
print(graph.edge_definitions())

Available collections:
_analyzers
_queues
_aqlfunctions
Users
_jobs
Games
_appbundles
_graphs
_apps
plays
_frontend

Graph collections:
[{'edge_collection': 'plays', 'from_vertex_collections': ['Users'], 'to_vertex_collections': ['Games']}]


In [10]:


# -----------------------------------------------------------
# 1. Query 3 random nodes from the Users collection.
# -----------------------------------------------------------
result_cursor = G_adb.query("""
    FOR node IN Users
        SORT RAND()
        LIMIT 3
        RETURN node
""")
print("Sample Users Nodes:")
for node in result_cursor:
    print(node)
print('-'*10)

# -----------------------------------------------------------
# 2. Query 3 random edges from the plays edge collection.
# -----------------------------------------------------------
result_cursor = G_adb.query("""
    FOR edge IN plays
        SORT RAND()
        LIMIT 3
        RETURN edge
""")
print("Sample Plays Edges:")
for edge in result_cursor:
    print(edge)
print('-'*10)


Sample Users Nodes:
{'_key': '193196975', '_id': 'Users/193196975', '_rev': '_jPaYbkq--o', 'type': 'Users', 'steamid': 193196975}
{'_key': '211920439', '_id': 'Users/211920439', '_rev': '_jPaYblu-_J', 'type': 'Users', 'steamid': 211920439}
{'_key': '301915080', '_id': 'Users/301915080', '_rev': '_jPaYbqC--e', 'type': 'Users', 'steamid': 301915080}
----------
Sample Plays Edges:
{'_key': '70004', '_id': 'plays/70004', '_from': 'Users/116564064', '_to': 'Games/11593_rust', '_rev': '_jPaYi5u--M', 'weight': 0.6}
{'_key': '9694', '_id': 'plays/9694', '_from': 'Users/80779496', '_to': 'Games/11372_bioshock', '_rev': '_jPaYfei--h', 'weight': 1.9}
{'_key': '10483', '_id': 'plays/10483', '_from': 'Users/11403772', '_to': 'Games/12639_magic_duels', '_rev': '_jPaYffa--q', 'weight': 22}
----------


### Agent

In [11]:
arango_graph = ArangoGraph(db)


In [12]:
agent = GraphAgent(arango_graph, G_adb)


In [15]:
agent.query_graph("which user plays the most games and how many games?")

Query Received for AQL translation:
Find users with the highest number of games played, show user and game count
----------


[1m> Entering new ArangoGraphQAChain chain...[0m
AQL Query (1):[32;1m[1;3m
WITH Users, plays, Games
FOR u IN Users
    LET game_count = LENGTH(
        FOR p IN plays
        FILTER p._from == u._id
        RETURN 1
    )
    SORT game_count DESC
    RETURN {
        user: u,
        number_of_games: game_count
    }
[0m
AQL Result:
[32;1m[1;3m[{'user': {'_key': '62990992', '_id': 'Users/62990992', '_rev': '_jPaYbkq--2', 'type': 'Users', 'steamid': 62990992}, 'number_of_games': 498}, {'user': {'_key': '11403772', '_id': 'Users/11403772', '_rev': '_jPaYbiK-_h', 'type': 'Users', 'steamid': 11403772}, 'number_of_games': 314}, {'user': {'_key': '138941587', '_id': 'Users/138941587', '_rev': '_jPaYbjK--5', 'type': 'Users', 'steamid': 138941587}, 'number_of_games': 299}, {'user': {'_key': '47457723', '_id': 'Users/47457723', '_rev': '_jPaYbqu--M', 'type': 'User

'Based on the query results, the user with Steam ID 62990992 plays the most games, with a total of 498 games in their library. This is significantly more than the second-place user who has 314 games, showing that this user is quite an avid gamer with a very extensive game collection.'

In [12]:
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

llm.invoke("hello!")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 9, 'total_tokens': 19, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_523b9b6e5f', 'finish_reason': 'stop', 'logprobs': None}, id='run-c76be148-f2cd-4658-abd9-0b481454d11c-0', usage_metadata={'input_tokens': 9, 'output_tokens': 10, 'total_tokens': 19, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})