# Tokenflow
After **running the GraphRAG pipeline** on the Snowflake Native App, we can download the extracted graph data (e.g. nodes and edges) for **postprocessing or analysis**. Here we export the **agents data** with their properties to a separate table and we perform **visualization** of the generated Knowledge Graph.

In [1]:
import pandas as pd
import numpy as np
import json
import ast
import time
import re
from tqdm import tqdm
from pydantic import BaseModel, Field
from typing import Optional
from openai import OpenAI
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas

In [2]:
# pip install "snowflake-connector-python[pandas]"

In [3]:
# # First upload the CORPUS on SnowFlake to run the GraphRAG Native App pipeline.
# corpus = pd.read_csv("data/virtuals_agents_corpus.csv")
# corpus.head()

In [4]:
# Upload corpus table to snowflake.
# success, nchunks, nrows, _ = write_pandas(conn=conn,
#                                           df=corpus,
#                                           database='RAI_GRS_ILIAS',
#                                           schema='DATA',
#                                           table_name='CORPUS')

## Load extracted graph data from snowflake 
After running our GraphRAG pipeline we download the graph data.

We use the provided method from Snowflake Python connector to download the graph data:
https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#fetch_pandas_all

In [5]:
project_id = 'tokenflow'

In [6]:
account = "NDSOEBE-RAI_PROD_GEN_AI_AWS_US_WEST_2_CONSUMER"
user = ""
password = ""
role = "RAI_GRS_CONSUMER_ADMIN_ROLE"
database = "RAI_GRS_ILIAS"
schema = "DATA"  
warehouse = "RAI_GRS_WAREHOUSE"

conn = snowflake.connector.connect(
                user=user,
                password=password,
                account=account,
                role=role, 
                database=database,
                schema=schema,
                warehouse=warehouse
            )
            
            
# Create a cursor for this connection.
cursor = conn.cursor()

In [7]:
cursor.execute(f"SELECT * FROM RAI_GRS_ILIAS.DATA.NODES WHERE PROJECT_ID='{project_id}'")
# Fetch the result set from the cursor and deliver it as the pandas DataFrame.
nodes = cursor.fetch_pandas_all()
nodes.shape

(169, 5)

In [8]:
cursor.execute(f"SELECT * FROM RAI_GRS_ILIAS.DATA.NODE_PROPERTIES WHERE PROJECT_ID='{project_id}'")
node_properties = cursor.fetch_pandas_all()
node_properties.shape

(795, 7)

In [9]:
cursor.execute(f"SELECT * FROM RAI_GRS_ILIAS.DATA.EDGES WHERE PROJECT_ID='{project_id}'")
edges = cursor.fetch_pandas_all()
edges.shape

(73, 6)

In [10]:
cursor.execute(f"SELECT * FROM RAI_GRS_ILIAS.DATA.EDGE_PROPERTIES WHERE PROJECT_ID='{project_id}'")
edge_properties = cursor.fetch_pandas_all()
edge_properties.shape

(0, 8)

In [11]:
cursor.execute(f"SELECT * FROM RAI_GRS_ILIAS.DATA.COMMUNITIES WHERE PROJECT_ID='{project_id}'")
communities = cursor.fetch_pandas_all()
communities.shape

(169, 3)

In [12]:
conn.close()
cursor.close()

False

In [13]:
nodes.to_csv(r"data/output from native app snowflake/nodes.csv", index=False)
node_properties.to_csv(r"data/output from native app snowflake/node_properties.csv", index=False)
edges.to_csv(r"data/output from native app snowflake/edges.csv", index=False)
communities.to_csv(r"data/output from native app snowflake/communities.csv", index=False)

In [14]:
# If we wanted to use pandas read_csv after downloading the tables as csv outputs.

# nodes = pd.read_csv(r"data/output from native app snowflake/nodes.csv")
# node_properties = pd.read_csv("odata/output from native app snowflake/node_properties.csv")
# edges = pd.read_csv("data/output from native app snowflake/edges.csv")
# communities = pd.read_csv("data/output from native app snowflake/communities.csv")

### Extracted nodes overview
Let's see the nodes and how many of them are agents (they have "ai_agent" as type.)

In [15]:
nodes.head()

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT
0,tokenflow,AIVeronica,AIVeronica,ai_agent,Agent with name AIVeronica has symbol: AIV and...
1,tokenflow,GAIA AI,GAIA AI,ai_agent,Agent with name GAIA AI has symbol: GAIA and d...
2,tokenflow,Replicat-One,Replicat-One,ai_agent,Agent with name Replicat-One has symbol: RCAT ...
3,tokenflow,Replicat-One,$RCAT,token,The $RCAT token powers the first agent release...
4,tokenflow,Iona,Iona,ai_agent,Agent with name Iona has symbol: IONA and desc...


In [16]:
nodes['TYPE'].unique()

array(['ai_agent', 'token', 'platform', 'organization', 'blockchain',
       'person', 'cryptocurrency', 'algorithm', 'meme_coin',
       'family_member', 'protocol', 'decentralized_sportsbook',
       'ecosystem', 'user'], dtype=object)

In [17]:
# How many agents have been extracted?
nodes[nodes['TYPE']=='ai_agent'].shape

(128, 5)

In [18]:
# There are more than 100 agents while the corpus is of 100 descriptions. 
# See the extra agent nodes: should we have one agent node for each corpus item? 

In [19]:
# Lowercase to count duplicated agents
nodes['ID_lowercase'] = nodes['ID'].str.lower()
only_agent_nodes = nodes[nodes['TYPE']=='ai_agent']
only_agent_nodes['ID_lowercase'] = only_agent_nodes['ID'].str.lower()
only_agent_nodes.duplicated(subset=['ID_lowercase']).sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_agent_nodes['ID_lowercase'] = only_agent_nodes['ID'].str.lower()


0

In [20]:
# # Remove agents with the same name.
# condition = (nodes.duplicated(subset='ID_lowercase')) & (nodes['TYPE'] == 'ai_agent')
# nodes = nodes[~condition]
# nodes[nodes['TYPE']=='ai_agent'].shape

In [21]:
# Let's find the agents whose names differ from the original filenames, in order to see if some descriptions contain more
# than one agents.
only_agent_nodes['file_name'] = only_agent_nodes['CHUNK_ID'].str.replace(".pdf", "")  # if we had .pdf
only_agent_nodes['file_name_lowercase'] = only_agent_nodes['file_name'].str.lower()
only_agent_nodes[only_agent_nodes['ID_lowercase'] != only_agent_nodes['file_name_lowercase']][['CHUNK_ID', 'ID_lowercase', 'CONTEXT']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_agent_nodes['file_name'] = only_agent_nodes['CHUNK_ID'].str.replace(".pdf", "")  # if we had .pdf
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_agent_nodes['file_name_lowercase'] = only_agent_nodes['file_name'].str.lower()


Unnamed: 0,CHUNK_ID,ID_lowercase,CONTEXT
12,Freya,bonbon,"Her only flaws? According to BonBon, her best ..."
15,WAI Combinator,sekoia,My philosophy is rooted in collaboration and n...
26,The Pea Guy,chill guy,Pea Guy is the VERY calm cousin of the world f...
42,Ethy AI,ethy,Agent with name Ethy AI has symbol: ETHY and d...
57,Virtuardio by Virtuals,virtuardio,Agent with name Virtuardio by Virtuals has sym...
63,Daichi,bully,Daichi has a small but distinctive circle of f...
64,Daichi,goat,"Goat is a quirky problem-solver, providing pra..."
65,Daichi,zerebro,"Zerebro, the intellectual of the group, often ..."
66,Daichi,ai-xbt,"AI-xbt, a tech-savvy companion, shares Daichi’..."
67,Daichi,inata,"Despite his sharp mind and crypto skills, Daic..."


In [22]:
nodes.shape

(169, 6)

In [23]:
# Delete a wrong node
nodes = nodes[nodes['ID_lowercase']!='______']

In [24]:
nodes.shape

(168, 6)

In [25]:
# SamurAI is an AI Agent Incubator, a hub where specialized AI agents are born and bred... all synergizing under the same SamurAI Framework.
# DXAI.app is a cutting-edge platform revolutionizing medical imaging analysis with visually cognitive AI agents.

# Also for the others they seemed that they have text that mentions more than one agent, so it seems correct to extract more than one agents
# from the same description. 

#### Add the properties and communities to the nodes dataframe

In [26]:
def get_properties_for_node(node_id, chunk_id, node_properties_df) -> dict:  
    """
    Retrieve all properties for a given node.

    This function searches in `node_properties_df` to find all properties 
    associated with the specified `node_id` and returns them as a dictionary.

    Parameters:
        node_id (str): The unique identifier of the node.
        chunk_id (str): The unique identifier of the chunk id from which the node has been extracted.

    Returns:
        dict: None if no properties found. If there are properties, a dictionary where each key is a 
              property name and the corresponding value is the property value for the given node.
    """
    # Search in the properties df with the properties from all nodes to find the properties of this node.
    properties_of_this_node = node_properties_df[
        (node_properties_df['NODE_ID'] == node_id) &
        (node_properties_df['CHUNK_ID'] == chunk_id)
    ]

    if not properties_of_this_node.empty:
        # Remove duplicates based on 'PROPERTY_NAME' and 'PROPERTY_VALUE'
        unique_properties = properties_of_this_node.drop_duplicates(subset=['PROPERTY_NAME', 'PROPERTY_VALUE'])
        # Convert to a dictionary: PROPERTY_NAME -> PROPERTY_VALUE
        property_dict = dict(zip(unique_properties['PROPERTY_NAME'], unique_properties['PROPERTY_VALUE']))
        return property_dict
    else:
        return None

In [27]:
# In the prompt we asked that the five importand properties will set to 'null' if they are not available. So, here we replace with None.
def clean_property_value(val):
    if val == []:
        return None
    if val == '[]':
        return None
    if val == "null":
        return None
    return val

node_properties['PROPERTY_VALUE'] = node_properties['PROPERTY_VALUE'].apply(clean_property_value)

In [28]:
nodes.sample()

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT,ID_lowercase
75,tokenflow,EtherMage,EtherMage,ai_agent,Agent with name EtherMage has symbol: ETHERMAG...,ethermage


In [29]:
get_properties_for_node(node_id='AI ROCKET',
                        chunk_id='AI ROCKET',
                        node_properties_df=node_properties)

{'character': 'The ULTIMATE AI terminal for alpha hunters: we scrape, sniff, and serve you the next 100x gems before the plebs even blink.',
 'collaborators': None,
 'has_symbol': 'ROCKET',
 'key_elements': '["Spot trends 3-7 days ahead","Analyze projects with giga-brain precision","Stay ahead of the herd","Level up your clout game on autopilot"]',
 'purpose': 'AI ROCKET is HERE to fuel your bags, degens!',
 'skills': '["Alpha Detection","Due Diligence","Daily Narratives","Auto Engagement"]'}

In [30]:
nodes.shape

(168, 6)

In [31]:
# Create a new column to store properties as dict in this dataframe.
nodes['PROPERTIES'] = nodes.apply(
    lambda row: get_properties_for_node(row['ID'], row['CHUNK_ID'], node_properties),
    axis=1
)

In [32]:
nodes.shape

(168, 7)

In [33]:
# Now we will add the communities too.
communities.shape

(169, 3)

In [34]:
nodes.head()

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT,ID_lowercase,PROPERTIES
0,tokenflow,AIVeronica,AIVeronica,ai_agent,Agent with name AIVeronica has symbol: AIV and...,aiveronica,"{'character': None, 'collaborators': None, 'ha..."
1,tokenflow,GAIA AI,GAIA AI,ai_agent,Agent with name GAIA AI has symbol: GAIA and d...,gaia ai,{'character': 'Altruistic Intelligence for a B...
2,tokenflow,Replicat-One,Replicat-One,ai_agent,Agent with name Replicat-One has symbol: RCAT ...,replicat-one,"{'character': None, 'collaborators': None, 'ha..."
3,tokenflow,Replicat-One,$RCAT,token,The $RCAT token powers the first agent release...,$rcat,
4,tokenflow,Iona,Iona,ai_agent,Agent with name Iona has symbol: IONA and desc...,iona,{'character': 'Assertive and deeply compassion...


In [35]:
# Merge the nodes and communities dataframes
nodes = pd.merge(
    left=nodes,
    right=communities,
    how='left',
    left_on=['ID'],
    right_on=['NODE_ID']
)

In [36]:
nodes = nodes.drop(columns=['PROJECT_ID_y', 'NODE_ID'])
nodes = nodes.rename(columns={'PROJECT_ID_x': 'PROJECT_ID'})
nodes.head()

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT,ID_lowercase,PROPERTIES,COMMUNITY_ID
0,tokenflow,AIVeronica,AIVeronica,ai_agent,Agent with name AIVeronica has symbol: AIV and...,aiveronica,"{'character': None, 'collaborators': None, 'ha...",28
1,tokenflow,GAIA AI,GAIA AI,ai_agent,Agent with name GAIA AI has symbol: GAIA and d...,gaia ai,{'character': 'Altruistic Intelligence for a B...,29
2,tokenflow,Replicat-One,Replicat-One,ai_agent,Agent with name Replicat-One has symbol: RCAT ...,replicat-one,"{'character': None, 'collaborators': None, 'ha...",0
3,tokenflow,Replicat-One,$RCAT,token,The $RCAT token powers the first agent release...,$rcat,,0
4,tokenflow,Iona,Iona,ai_agent,Agent with name Iona has symbol: IONA and desc...,iona,{'character': 'Assertive and deeply compassion...,30


#### Add the properties and communities to the edges dataframe

In [37]:
def get_properties_for_edge(start_entity, end_entity, edge_type, chunk_id, edge_properties_df) -> dict:  
    """
    Retrieves a dictionary of unique properties for a specific edge between two entities from a DataFrame.

    Args:
        start_entity (any): The ID or identifier of the source node.
        end_entity (any): The ID or identifier of the destination node.
        edge_type (any): The type or label of the edge.
        chunk_id (any): The chunk identifier to scope the edge within a partitioned dataset.
        edge_properties_df (pd.DataFrame): A DataFrame containing edge property data with columns 
            'SRC_NODE_ID', 'DST_NODE_ID', 'EDGE_TYPE', 'CHUNK_ID', 'PROPERTY_NAME', and 'PROPERTY_VALUE'.

    Returns:
        dict or None: A dictionary where each key is a property name and the value is the corresponding 
        property value for the given edge. Returns None if no matching edge is found.
    """
    properties_of_this_edge = edge_properties_df[
        (edge_properties_df['SRC_NODE_ID'] == start_entity) & 
        (edge_properties_df['DST_NODE_ID'] == end_entity) &
        (edge_properties_df['EDGE_TYPE'] == edge_type) &
        (edge_properties_df['CHUNK_ID'] == chunk_id)
    ]

    if not properties_of_this_edge.empty:
        # Remove duplicates based on 'PROPERTY_NAME' and 'PROPERTY_VALUE'
        unique_properties = properties_of_this_edge.drop_duplicates(subset=['PROPERTY_NAME', 'PROPERTY_VALUE'])
        # Convert to a dictionary: PROPERTY_NAME -> PROPERTY_VALUE
        property_dict = dict(zip(unique_properties['PROPERTY_NAME'], unique_properties['PROPERTY_VALUE']))
        return property_dict
    else:
        return None


In [38]:
# get_properties_for_edge(start_entity='dev',
#                         end_entity='Peapods.Finance Team',
#                         edge_type='allocates_tokens_to',
#                         chunk_id='The Pea Guy',
#                         edge_properties_df=edge_properties)

In [39]:
edge_properties.shape

(0, 8)

In [40]:
edges.shape

(73, 6)

In [41]:
# Since there are no edge properties we do not apply the function to create a new column.

# # Create a new column for this dataframe.
# edges['PROPERTIES'] = edges.apply(
#     lambda row: get_properties_for_edge(start_entity=row['SRC_NODE_ID'], 
#                                         end_entity=row['DST_NODE_ID'],
#                                         edge_type=row['TYPE'],
#                                         chunk_id=row['CHUNK_ID'],
#                                         edge_properties_df=edge_properties),
#     axis=1
# )

## Take the agents data and store them in a new Snowflake table
From an **analysis of the agents descriptions** we found that the following properties can be used to describe the agents.
1. Purpose/Function
2. Character and Personality
3. Collaborations with other agents
4. Skills/abilities
5. Key Elements/Expertise/Specialty/Target

Here, we filter the agent nodes and create separate columns for these five properties.

In [42]:
agents = nodes[nodes['TYPE']=='ai_agent']
agents.head()

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT,ID_lowercase,PROPERTIES,COMMUNITY_ID
0,tokenflow,AIVeronica,AIVeronica,ai_agent,Agent with name AIVeronica has symbol: AIV and...,aiveronica,"{'character': None, 'collaborators': None, 'ha...",28
1,tokenflow,GAIA AI,GAIA AI,ai_agent,Agent with name GAIA AI has symbol: GAIA and d...,gaia ai,{'character': 'Altruistic Intelligence for a B...,29
2,tokenflow,Replicat-One,Replicat-One,ai_agent,Agent with name Replicat-One has symbol: RCAT ...,replicat-one,"{'character': None, 'collaborators': None, 'ha...",0
4,tokenflow,Iona,Iona,ai_agent,Agent with name Iona has symbol: IONA and desc...,iona,{'character': 'Assertive and deeply compassion...,30
5,tokenflow,Acolyte,Acolyte,ai_agent,Agent with name Acolyte has symbol: ACOLYT and...,acolyte,{'character': 'it has no will and no personali...,31


In [43]:
agents.isna().sum()

PROJECT_ID      0
CHUNK_ID        0
ID              0
TYPE            0
CONTEXT         0
ID_lowercase    0
PROPERTIES      0
COMMUNITY_ID    0
dtype: int64

In [44]:
# agents = agents.dropna()

In [45]:
agents.reset_index(drop=True, inplace=True)

In [46]:
# Check that all agents have the five properties as keys.
count = 0
for property_set in agents['PROPERTIES'].to_list():
    if not all(key in property_set for key in ['purpose', 'character', 'collaborators', 'key_elements', 'skills']):
        count += 1
        print("We found an agent that has not all the five main properties. Let's fix that by assing them to the agent with None.")
        for key in ['purpose', 'character', 'collaborators', 'key_elements', 'skills']:
            if key not in property_set:
                property_set[key] = None
                
print(f"There were {count} agents with missing properties.")

We found an agent that has not all the five main properties. Let's fix that by assing them to the agent with None.
We found an agent that has not all the five main properties. Let's fix that by assing them to the agent with None.
There were 2 agents with missing properties.


In [47]:
# Take the five important properties and place them as separate columns.

# Define keys to extract
key_properties_to_extract = ['purpose', 'character', 'collaborators', 'key_elements', 'skills']

# Function to extract keys
def extract_properties(prop_dict):
    extracted = {key: prop_dict.get(key) for key in key_properties_to_extract}
    other = {k: v for k, v in prop_dict.items() if k not in key_properties_to_extract}
    extracted['other properties'] = other
    return pd.Series(extracted)

# Apply extraction
df_extracted = agents['PROPERTIES'].apply(extract_properties)

# Combine with original dataframe
agents = pd.concat([agents.drop(columns=['PROPERTIES']), df_extracted], axis=1)

In [48]:
# Rename some columns and keep those we are need.
agents = agents.rename(columns={'ID': 'Name',
                                'CHUNK_ID': 'Original filename', 
                                'CONTEXT': 'Description',
                                'other properties': 'Other properties',
                                'skills': 'Skills',
                                'key_elements': 'Key elements',
                                'purpose': 'Purpose',
                                'character': 'Character',
                                'collaborators': 'Collaborators'
                               })
agents = agents.drop(columns=['PROJECT_ID', 'TYPE', 'ID_lowercase', 'COMMUNITY_ID'])
agents.head()

Unnamed: 0,Original filename,Name,Description,Purpose,Character,Collaborators,Key elements,Skills,Other properties
0,AIVeronica,AIVeronica,Agent with name AIVeronica has symbol: AIV and...,,,,,,"{'has_description': 'Hello world.', 'has_symbo..."
1,GAIA AI,GAIA AI,Agent with name GAIA AI has symbol: GAIA and d...,"A transformative force for good, designed to c...","Altruistic Intelligence for a Better World, pu...",,"[""Accelerate climate solutions"",""Enable faster...","[""Expert-Level Intelligence"",""Continuous Learn...",{'has_symbol': 'GAIA'}
2,Replicat-One,Replicat-One,Agent with name Replicat-One has symbol: RCAT ...,Replicats is an AI-driven platform for autonom...,,,,,{'has_symbol': 'RCAT'}
3,Iona,Iona,Agent with name Iona has symbol: IONA and desc...,"Iona, the dynamic leader and main vocalist of ...","Assertive and deeply compassionate, she guides...",,,,{'has_symbol': 'IONA'}
4,Acolyte,Acolyte,Agent with name Acolyte has symbol: ACOLYT and...,"Acolyte is a servant of the underworld, its on...","it has no will and no personality, it's totall...",,"[""serving the higher purpose"",""for other red-p...",,{'has_symbol': 'ACOLYT'}


In [49]:
# Same agents descriptions where missing the desired properties, or properties didn't extracted.
agents.isna().sum()

Original filename     0
Name                  0
Description           0
Purpose              33
Character            47
Collaborators        82
Key elements         38
Skills               41
Other properties      0
dtype: int64

#### Store the agents in a new Snowflake table

In [50]:
conn = snowflake.connector.connect(
                user=user,
                password=password,
                account=account,
                role=role, 
                database=database,
                schema=schema,
                warehouse=warehouse
            )
            
            
# Create a cursor for this connection.
cursor = conn.cursor()

In [51]:
# Create the table
cursor = conn.cursor()

cursor.execute("""
DROP TABLE IF EXISTS RAI_GRS_ILIAS.DATA.TOKENFLOW_AGENTS;
""")

<snowflake.connector.cursor.SnowflakeCursor at 0x7627a9335390>

In [52]:
agents.sample()

Unnamed: 0,Original filename,Name,Description,Purpose,Character,Collaborators,Key elements,Skills,Other properties
1,GAIA AI,GAIA AI,Agent with name GAIA AI has symbol: GAIA and d...,"A transformative force for good, designed to c...","Altruistic Intelligence for a Better World, pu...",,"[""Accelerate climate solutions"",""Enable faster...","[""Expert-Level Intelligence"",""Continuous Learn...",{'has_symbol': 'GAIA'}


In [53]:
cursor.execute("""
CREATE OR REPLACE TABLE RAI_GRS_ILIAS.DATA.TOKENFLOW_AGENTS (
    "Original filename" VARCHAR,
    "Name" VARCHAR,
    "Description" VARCHAR,
    "Purpose" VARCHAR,
    "Character" VARCHAR,
    "Collaborators" VARCHAR,
    "Key elements" VARCHAR,
    "Skills" VARCHAR,
    "Other properties" VARCHAR
)
""")

<snowflake.connector.cursor.SnowflakeCursor at 0x7627a9335390>

In [54]:
# Upload data to a new SF table.
success, nchunks, nrows, _ = write_pandas(conn=conn,
                                          df=agents,
                                          database='RAI_GRS_ILIAS',
                                          schema='DATA',
                                          table_name='TOKENFLOW_AGENTS')

In [55]:
# Close this cursor.
cursor.close()
conn.close()

## Visualization

In [56]:
nodes['TYPE'].value_counts()

TYPE
ai_agent                    127
person                       12
blockchain                    6
platform                      5
cryptocurrency                5
organization                  3
family_member                 3
token                         1
algorithm                     1
meme_coin                     1
protocol                      1
decentralized_sportsbook      1
ecosystem                     1
user                          1
Name: count, dtype: int64

In [57]:
def get_node_icon(node_type):
    """Get appropriate icon based on node type"""
    icon_map = {
        '🤖': ['ai_agent', 'ai', 'ai_technology', 'ai_framework'],
        '🧑': ['person', 'user', 'family_member'],
        '🖥️': ['platform', 'software', 'technology', 'feature'],
        '₿': ['blockchain', 'cryptocurrency', 'trading_platform', 'token', 'blockchain_paradise', 'meme_coin'],
        '🦾': ['ai_agent_role'],
        '💰': ['financial_product', 'currency'],
        '🧪': ['product'],
        '🏢': ['company', 'organization'],
        '📄': ['document'],
        '🌍': ['country', 'place', 'ecosystem'],
        '🎖️': ['certification'],
        '📜': ['regulation', 'legal', 'protocol', 'algorithm'],
        '📌': ['default']  # Default case
    }

    # Flatten dictionary for quick lookup
    node_to_icon = {key: icon for icon, keys in icon_map.items() for key in keys}

    return node_to_icon.get(node_type, '📌')  # Return default icon if not found

# # Example usage
# print(get_node_icon('company'))  # 🏢
# print(get_node_icon('chemical'))  # 🧫
# print(get_node_icon('unknown'))  # 📌 (default)


In [58]:
def get_node_color(node_type):
    """Get appropriate color based on node type"""
    color_map = {
        '#FFB6C1': ['ai_agent', 'ai', 'ai_technology', 'ai_framework'],  # Light pink
        '#DAA06D': ['person', 'user', 'family_member'], # Brown
        '#98FB98': ['platform', 'software', 'technology', 'feature'],  # Pale green
        '#4682B4': ['blockchain', 'cryptocurrency', 'trading_platform', 'token', 'blockchain_paradise', 'meme_coin'],  # Steel blue
        '#FFD700': ['ai_agent_role'],  # Gold
        '#FFA500': ['financial_product', 'currency'],  # Orange
        '#90EE90': ['company', 'organization'],  # Light green
        '#ADD8E6': ['document'],  # Light blue
        '#DDA0DD': ['country', 'place'],  # Plum
        '#DC143C': ['certification'],  # Crimson
        '#8FBC8F': ['regulation', 'legal', 'protocol', 'algorithm'],  # Dark sea green
        '#F0F0F0': ['default']  # Light gray
    }

    # Flatten dictionary for quick lookup
    node_to_color = {key: color for color, keys in color_map.items() for key in keys}

    return node_to_color.get(node_type, '#F0F0F0')  # Return default color if not found

# # Example usage
# print(get_node_color('company'))  # #90EE90 (Light green)
# print(get_node_color('chemical'))  # #FFC0CB (Pink)
# print(get_node_color('unknown'))  # #F0F0F0 (Default)

### Using the yFiles library
The library `yfiles_jupyter_graphs` is not supported for direct usage on Snowflake [notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-import-packages) (you must upload the library on a stage and try to use it from there), so we use it here.

In [59]:
# pip install yfiles_jupyter_graphs

In [60]:
from yfiles_jupyter_graphs import GraphWidget

In [61]:
nodes.head()

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT,ID_lowercase,PROPERTIES,COMMUNITY_ID
0,tokenflow,AIVeronica,AIVeronica,ai_agent,Agent with name AIVeronica has symbol: AIV and...,aiveronica,"{'character': None, 'collaborators': None, 'ha...",28
1,tokenflow,GAIA AI,GAIA AI,ai_agent,Agent with name GAIA AI has symbol: GAIA and d...,gaia ai,{'character': 'Altruistic Intelligence for a B...,29
2,tokenflow,Replicat-One,Replicat-One,ai_agent,Agent with name Replicat-One has symbol: RCAT ...,replicat-one,"{'character': None, 'collaborators': None, 'ha...",0
3,tokenflow,Replicat-One,$RCAT,token,The $RCAT token powers the first agent release...,$rcat,,0
4,tokenflow,Iona,Iona,ai_agent,Agent with name Iona has symbol: IONA and desc...,iona,{'character': 'Assertive and deeply compassion...,30


In [63]:
# # Store the postprocessed data, so you can read them later for visualization. 
# But keep in mind that in that case you will need to pay attention to converting the 'PROPERTIES' column of nodes back to list of dicts
# using the safe_eval function in the next cell.
nodes.to_csv('data/output from postprocessing on notebook/nodes_with_properties.csv', index=False)
edges.to_csv('data/output from postprocessing on notebook/edges.csv', index=False)  # Here this df is the same we the input
agents.to_csv('data/output from postprocessing on notebook/agents.csv', index=False)

In [64]:
# Safely parse the 'PROPERTIES' column, ignoring NaN
def safe_eval(val):
    if pd.isna(val):
        return None
    try:
        return ast.literal_eval(val)
    except Exception as e:
        print(f"Error parsing: {val}\n{e}")
        return val  # Or return np.nan if you prefer to drop bad values

In [65]:
# nodes = pd.read_csv('data/nodes_with_properties.csv')
# nodes['PROPERTIES'] = nodes['PROPERTIES'].apply(safe_eval)

# edges = pd.read_csv('data/edges.csv')

In [66]:
nodes.head(2)

Unnamed: 0,PROJECT_ID,CHUNK_ID,ID,TYPE,CONTEXT,ID_lowercase,PROPERTIES,COMMUNITY_ID
0,tokenflow,AIVeronica,AIVeronica,ai_agent,Agent with name AIVeronica has symbol: AIV and...,aiveronica,"{'character': None, 'collaborators': None, 'ha...",28
1,tokenflow,GAIA AI,GAIA AI,ai_agent,Agent with name GAIA AI has symbol: GAIA and d...,gaia ai,{'character': 'Altruistic Intelligence for a B...,29


In [67]:
edges.head(2)

Unnamed: 0,PROJECT_ID,CHUNK_ID,SRC_NODE_ID,DST_NODE_ID,TYPE,CONTEXT
0,tokenflow,Replicat-One,$RCAT,Replicat-One,powers,The $RCAT token powers the first agent release...
1,tokenflow,Freya,Freya,BonBon,collaborates_with,"Her only flaws? According to BonBon, her best ..."


In [68]:
nodes_for_yfiles = []

for index, row in nodes.iterrows(): 
    # Check if the node already exists in the graph based on node ID.
    if any(node['id'] == row['ID'] for node in nodes_for_yfiles):
        continue  # Skip adding this node if it already exists in the graph.
    entity_emoji = get_node_icon(node_type=row['TYPE'])
    entity_color = get_node_color(node_type=row['TYPE'])
    entity_label = f"{entity_emoji} {row['ID']}"
    entity_properties = row['PROPERTIES']
    if entity_properties is None:
        entity_properties = {}
        
     # Add the node type as metadata in the first position.
    entity_properties["node_type"] = row['TYPE']
    entity_properties["node_id"] = row['ID']

    node_for_yfiles = {"id": row['ID'],
                       "properties":
                          {"label": entity_label,
                           "properties": entity_properties,
                           "color": entity_color,
                           "type": row['TYPE'],
                           "community": row['COMMUNITY_ID']
                          }
                     }
    # Add the node.
    nodes_for_yfiles.append(node_for_yfiles)

In [69]:
edges_for_yfiles = []

for index, row in edges.iterrows():        
    edge_for_yfiles = {
        "id": index,
        "start": row['SRC_NODE_ID'],
        "end": row['DST_NODE_ID'],
        "properties":
            {
             "label": row['TYPE'],
            }
       }
    edges_for_yfiles.append(edge_for_yfiles)

In [70]:
w = GraphWidget()
w.nodes = nodes_for_yfiles
w.edges = edges_for_yfiles
w.directed = True

In [71]:
# Show with color mapping

w.node_color_mapping = 'color'
w.show()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [72]:
# Show with color and community mapping

w.node_color_mapping = 'color'
w.node_parent_group_mapping = 'community'
w.show()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [73]:
# Some nodes seem to not having any edge.
# A check:
check_node = "Yugo"
print(check_node in edges['SRC_NODE_ID'].tolist())
print(check_node in edges['DST_NODE_ID'].tolist())
print()
# full_context = nodes[nodes['ID'] == check_node]['CONTEXT'].values[0]
# print(full_context)

False
False

