In [25]:
import pandas as pd
import networkx as nx
from dotenv import load_dotenv
import os
import matplotlib.pyplot as plt
import re

from arango import ArangoClient
import nx_arangodb as nxadb
load_dotenv()


True

In [2]:
arango_host = os.getenv('ARANGO_HOST')
arango_user = os.getenv('ARANGO_USER')
arango_password = os.getenv('ARANGO_PASSWORD')
arango_db = os.getenv('ARANGO_DB')

In [3]:
df = pd.read_csv('Data/steamplayer/steam-200k.csv', header=None)
df.columns = ["steamid", "GameName", "action", "hours", "junk"]
# Display the first few rows
df.head()

# Filter for only 'play' interactions
play_df = df[df["action"] == 'play']

In [26]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from arango import ArangoClient
import nx_arangodb as nxadb
from adbnx_adapter.controller import ADBNX_Controller
from adbnx_adapter.adapter import ADBNX_Adapter

# -------------------------------
# 1. Read and Process the DataFrame
# -------------------------------
df = pd.read_csv('Data/steamplayer/steam-200k.csv', header=None)
df.columns = ["steamid", "GameName", "action", "hours", "junk"]

# Filter for only 'play' interactions
play_df = df[df["action"] == 'play']

# -------------------------------
# 2. Build a Heterogeneous NetworkX Graph
# -------------------------------
G = nx.Graph()

# Add player nodes with IDs in the format "Users:<steamid>"
for steamid in play_df["steamid"].unique():
    node_id = f"Users:{steamid}"
    G.add_node(node_id, _id=node_id, type="Users", steamid=int(steamid))

# Add game nodes with IDs in the format "Games:<GameName>"
for game in play_df["GameName"].unique():
    node_id = f"Games:{game}"
    G.add_node(node_id, _id=node_id, type="Games", GameName=game)

# Add edges between players and games with the hours played as weight
for _, row in play_df.iterrows():
    player_node = f"Users:{row['steamid']}"
    game_node = f"Games:{row['GameName']}"
    hours = float(row["hours"])
    G.add_edge(player_node, game_node, weight=hours)

print("Total nodes:", G.number_of_nodes())
print("Total edges:", G.number_of_edges())

# (Optional) Visualize the graph (colors based on type)
# pos = nx.spring_layout(G, iterations=15, seed=1721)
# node_colors = ["red" if data.get("type")=="Users" else "blue" for node, data in G.nodes(data=True)]
# nx.draw(G, pos=pos, node_color=node_colors, with_labels=True, node_size=50)
# plt.title("Heterogeneous Steam Graph")
# plt.show()

# -------------------------------
# 3. Define a Custom ADBNX Controller
# -------------------------------
# Helper function to sanitize keys.

def slugify(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9]+', '_', text)
    return text.strip('_')

class Custom_ADBNX_Controller(ADBNX_Controller):
    def _identify_networkx_node(self, nx_node_id, nx_node, adb_v_cols):
        # Our node IDs are in the format "Users:<steamid>" or "Games:<GameName>"
        return str(nx_node_id).split(":")[0]

    def _identify_networkx_edge(self, nx_edge, from_node_id, to_node_id, nx_map, adb_e_cols):
        from_col = nx_map[from_node_id].split(":")[0]
        to_col = nx_map[to_node_id].split(":")[0]
        if from_col == "Users" and to_col == "Games":
            return "plays"
        else:
            raise ValueError(f"Unknown edge relationship: {from_node_id} -> {to_node_id}")

    def _keyify_networkx_node(self, i, nx_node_id, nx_node, col):
        parts = str(nx_node_id).split(":")
        # The part after ':' is the "name" portion
        key_part = parts[1] if len(parts) > 1 else str(nx_node_id)
        if col == "Games":
            # For game nodes, produce keys in the format: <unique_number>_<slugified_name>
            return f"{i}_{slugify(key_part)}"
        else:
            # For other collections (like Users), you can just return the numeric id or similar
            return str(key_part)

# -------------------------------
# 4. Define Edge Definitions for ArangoDB
# -------------------------------
edge_definitions = [
    {
        "edge_collection": "plays",
        "from_vertex_collections": ["Users"],
        "to_vertex_collections": ["Games"],
    }
]

# -------------------------------
# 5. Persist the Graph into ArangoDB
# -------------------------------
# Connect to ArangoDB
# Initialize the client for ArangoDB.
client = ArangoClient(hosts=arango_host)
# Replace with your actual database name and credentials
db = client.db("Steam", username="root", password=arango_password)

# (Optional) Delete the graph if it exists
try:
    db.delete_graph("SteamGraph", drop_collections=True, ignore_missing=True)
except Exception:
    pass

# Instantiate the adapter with our custom controller.
custom_adapter = ADBNX_Adapter(db, Custom_ADBNX_Controller())

# Convert the NetworkX graph into an ArangoDB graph using our edge definitions.
adb_graph = custom_adapter.networkx_to_arangodb("SteamGraph", G, edge_definitions=edge_definitions)

print("ArangoDB Graph created:", adb_graph)


[2025/02/17 21:29:24 -0600] [234402] [INFO] - adbnx_adapter: Instantiated ADBNX_Adapter with database 'Steam'


Total nodes: 14950
Total edges: 70477


Output()

Output()

[2025/02/17 21:29:34 -0600] [234402] [INFO] - adbnx_adapter: Created ArangoDB 'SteamGraph' Graph


ArangoDB Graph created: <Graph SteamGraph>


In [23]:
# Print schema information
print("Graph Schema:")
print("\nVertex Collections:")
for collection in adb_graph.vertex_collections():
    print(f"- {collection}")

print("\nEdge Definitions:")
for edge_def in adb_graph.edge_definitions():
    print(f"\nEdge Collection: {edge_def['edge_collection']}")
    print(f"From Collections: {edge_def['from_vertex_collections']}")
    print(f"To Collections: {edge_def['to_vertex_collections']}")

Graph Schema:

Vertex Collections:
- Games
- Users

Edge Definitions:

Edge Collection: plays
From Collections: ['Users']
To Collections: ['Games']
