In [None]:
import pandas as pd

from neo4j import GraphDatabase

import os
from dotenv import load_dotenv

from tqdm import tqdm

load_dotenv()

In [None]:
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")

print(uri, username, password)

In [3]:
with GraphDatabase.driver(uri, auth=(username, password)) as driver:
    driver.verify_connectivity()

In [4]:
player_df = pd.read_csv('../data/csv_data/(after) Player information features.csv')
action_df = pd.read_csv('../data/csv_data/(after) Player actions features.csv')
social_df = pd.read_csv('../data/csv_data/(after) Social interaction diversity features.csv')
network_df = pd.read_csv('../data/csv_data/(after) Network measures features.csv')
group_df = pd.read_csv('../data/csv_data/(after) Group activities features.csv')

In [5]:
player_df = player_df.drop('Type', axis=1)
action_df = action_df.drop('Type', axis=1)
social_df = social_df.drop('Type', axis=1)
group_df = group_df.drop('Type', axis=1)
network_df = network_df.drop('Type', axis=1)

In [None]:
player_df.head()
action_df.head()
# social_df.head()
# group_df.head()
# network_df.head()

In [7]:
def create_player_nodes_batch(tx, player_data_list):
    query = """
        UNWIND $player_list AS playerData
        CREATE (p:Player)
        SET p = playerData
    """
    tx.run(query, player_list=player_data_list)

def create_action_nodes_batch(tx, action_data_list):
    query = """
        UNWIND $action_list AS actionData
        CREATE (a:Action)
        SET a = actionData
    """
    tx.run(query, action_list=action_data_list)

def create_performed_relationships_batch(tx, action_data_list):
    query = """
        UNWIND $action_list AS actionData
        MATCH (p:Player {Actor: toInteger(actionData.Actor)}),
              (a:Action {collect_max_count: toInteger(actionData.collect_max_count),
            Sit_ratio: toFloat(actionData.Sit_ratio),
            Sit_count: toInteger(actionData.Sit_count),
            sit_count_per_day: toFloat(actionData.sit_count_per_day),
            Exp_get_ratio: toFloat(actionData.Exp_get_ratio),
            Exp_get_count: toInteger(actionData.Exp_get_count),
            exp_get_count_per_day: toFloat(actionData.exp_get_count_per_day),
            Item_get_ratio: toFloat(actionData.Item_get_ratio),
            Item_get_count: toInteger(actionData.Item_get_count),
            item_get_count_per_day: toFloat(actionData.item_get_count_per_day),
            Money_get_ratio: toFloat(actionData.Money_get_ratio),
            Money_get_count: toInteger(actionData.Money_get_count),
            money_get_count_per_day: toFloat(actionData.money_get_count_per_day),
            Abyss_get_ratio: toFloat(actionData.Abyss_get_ratio),
            Abyss_get_count: toInteger(actionData.Abyss_get_count),
            abyss_get_count_per_day: toFloat(actionData.abyss_get_count_per_day),
            Exp_repair_count: toInteger(actionData.Exp_repair_count),
            Exp_repair_count_per_day: toFloat(actionData.Exp_repair_count_per_day),
            Use_portal_count: toInteger(actionData.Use_portal_count),
            Use_portal_count_per_day: toFloat(actionData.Use_portal_count_per_day),
            Killed_bypc_count: toInteger(actionData.Killed_bypc_count),
            Killed_bypc_count_per_day: toFloat(actionData.Killed_bypc_count_per_day),
            Killed_bynpc_count: toInteger(actionData.Killed_bynpc_count),
            Killed_bynpc_count_per_day: toFloat(actionData.Killed_bynpc_count_per_day),
            Teleport_count: toInteger(actionData.Teleport_count),
            Teleport_count_per_day: toFloat(actionData.Teleport_count_per_day),
            Reborn_count: toInteger(actionData.Reborn_count),
            Reborn_count_per_day: toFloat(actionData.Reborn_count_per_day)
        })
        CREATE (p)-[:PERFORMED]->(a)
    """
    tx.run(query, action_list=action_data_list)

def create_social_relationships_batch(tx, social_data_list):
    query = """
        UNWIND $social_list AS socialData
        MATCH (p:Player {Actor: toInteger(socialData.Actor)})
        SET p.Social_diversity = toFloat(socialData.Social_diversity)
    """
    tx.run(query, social_list=social_data_list)

def create_group_relationships_batch(tx, group_data_list):
    query = """
        UNWIND $group_list AS groupData
        MATCH (p:Player {Actor: toInteger(groupData.Actor)})
        SET p.Avg_PartyTime = toFloat(groupData.Avg_PartyTime),
            p.GuildAct_count = toInteger(groupData.GuildAct_count),
            p.GuildJoin_count = toInteger(groupData.GuildJoin_count)
    """
    tx.run(query, group_list=group_data_list)

def create_network_properties_batch(tx, network_data_list):
    query = """
        UNWIND $network_list AS networkData
        MATCH (p:Player {Actor: toInteger(networkData.Actor)})
        SET p += networkData
    """
    # Ensure properties like 'Actor' and 'A_Acc' are not included in networkData
    for data in network_data_list:
        data.pop('Actor', None)
        data.pop('A_Acc', None)

    tx.run(query, network_list=network_data_list)

In [None]:
batch_size = 1000  # Adjust batch size as needed

# Player Nodes
print("Preparing Player Data...")
player_data_list = [row.to_dict() for index, row in player_df.iterrows()]
print(player_data_list[0])

# Action Nodes and Relationships
print("Preparing Action Data...")
action_data_list = [row.to_dict() for index, row in action_df.iterrows()]
print(action_data_list[0])

# Social Relationships
print("Preparing Social Data...")
social_data_list = [row.to_dict() for index, row in social_df.iterrows()]
print(social_data_list[0])

# Group Relationships
print("Preparing Group Data...")
group_data_list = [row.to_dict() for index, row in group_df.iterrows()]
print(group_data_list[0])

# Network Properties
print("Preparing Network Data...")
network_data_list = [row.to_dict() for index, row in network_df.iterrows()]
print(network_data_list[0])


In [None]:
# -----------------------------------------------------------------------------
# Neo4j Operations with Batching and Progress Bars
# -----------------------------------------------------------------------------

with driver.session() as session:
    # -------------------------------------------------------------------------
    # Create Player Nodes
    print("\nCreating Player Nodes...")
    with tqdm(total=len(player_data_list), desc="Player Nodes", unit="node") as pbar:
        for i in range(0, len(player_data_list), batch_size):
            batch = player_data_list[i:i + batch_size]
            session.execute_write(create_player_nodes_batch, batch)
            pbar.update(len(batch))
    print("Player Nodes Created.")

    # -------------------------------------------------------------------------
    # Create Action Nodes
    print("\nCreating Action Nodes...")
    with tqdm(total=len(action_data_list), desc="Action Nodes", unit="node") as pbar:
        for i in range(0, len(action_data_list), batch_size):
            batch = action_data_list[i:i + batch_size]
            session.execute_write(create_action_nodes_batch, batch)
            pbar.update(len(batch))
    print("Action Nodes Created.")
    
    print("\nCreating PERFORMED Relationships...")
    with tqdm(total=len(action_data_list), desc="PERFORMED Relationships", unit="rel") as pbar:
        for i in range(0, len(action_data_list), batch_size):
            batch = action_data_list[i:i + batch_size]
            session.execute_write(create_performed_relationships_batch, batch)
            pbar.update(len(batch))
    print("PERFORMED Relationships Created.")

    # -------------------------------------------------------------------------
    # Create Social Relationships
    print("\nCreating Social Relationships...")
    with tqdm(total=len(social_data_list), desc="Social Relationships", unit="rel") as pbar:
        for i in range(0, len(social_data_list), batch_size):
            batch = social_data_list[i:i + batch_size]
            session.execute_write(create_social_relationships_batch, batch)
            pbar.update(len(batch))
    print("Social Relationships Created.")

    # -------------------------------------------------------------------------
    # Create Group Relationships
    print("\nCreating Group Relationships...")
    with tqdm(total=len(group_data_list), desc="Group Relationships", unit="rel") as pbar:
        for i in range(0, len(group_data_list), batch_size):
            batch = group_data_list[i:i + batch_size]
            session.execute_write(create_group_relationships_batch, batch)
            pbar.update(len(batch))
    print("Group Relationships Created.")

    # -------------------------------------------------------------------------
    # Create Network Properties
    print("\nCreating Network Properties...")
    total_network_updates = len(network_data_list)
    with tqdm(total=total_network_updates, desc="Network Properties", unit="property") as pbar:
        for i in range(0, total_network_updates, batch_size):
            batch = network_data_list[i:i + batch_size]
            session.execute_write(create_network_properties_batch, batch)
            pbar.update(len(batch))
    print("Network Properties Created.")

print("\nKnowledge Graph Creation Complete!")

In [None]:
# -------------------------------------------------------------------------
# Create PERFORMED Relationships
