In [2]:
import os

import pandas as pd
import pyvis

In [7]:
# Parameters
MODEL_NAME: str = "all-mpnet-base-v2"
CONTRIBUTOR: str = "Health Promotion Board"

# specify content_category. input 'all' if running across all categories
CONTENT_CATEGORY: str = "live-healthy-articles"

In [8]:
NEO4J_FOLDER_PATH = os.path.join(
    "..",
    "data",
    "07_model_output",
    f"{CONTENT_CATEGORY}",
    "neo4j",
)

NEO4J_CLUSTERED_DATA_PATH = os.path.join(
    NEO4J_FOLDER_PATH,
    f"{MODEL_NAME}_neo4j_clustered_data.csv",
)

NEO4J_UNCLUSTERED_DATA_PATH = os.path.join(
    NEO4J_FOLDER_PATH,
    f"{MODEL_NAME}_neo4j_unclustered_data.csv",
)

In [9]:
# Function to visualize the result


def visualize_result(clustered_df, unclustered_df):
    visual_graph = pyvis.network.Network(select_menu=True, filter_menu=True)

    # Add nodes-nodes pair
    for _, row in clustered_df.iterrows():
        # Add nodes
        visual_graph.add_node(
            row["node_1"],
            label=row["node_1"],
            title=f"Ground Truth: {row['node_1_ground_truth']}\nPredicted: {row['node_1_pred_cluster']}\nTitle: {row['node_1_title']}",
            group=row["node_1_pred_cluster"],
        )
        visual_graph.add_node(
            row["node_2"],
            label=row["node_2"],
            title=f"Ground Truth: {row['node_2_ground_truth']}\nPredicted: {row['node_2_pred_cluster']}\nTitle: {row['node_2_title']}",
            group=row["node_2_pred_cluster"],
        )

        # Add edge
        visual_graph.add_edge(
            row["node_1"], row["node_2"], title=f"Edge Weight: {row['edge_weight']}"
        )

    # Add solo nodes
    for _, row in unclustered_df.iterrows():
        visual_graph.add_node(
            row["node_title"],
            label=row["node_title"],
            title=f"Ground Truth: {row['node_ground_truth']}\nPredicted: No Community\nTitle: {row['node_title']}",
        )
    visual_graph.show(f"neo4j_network_{MODEL_NAME}.html", notebook=False)

In [10]:
clustered_df = pd.read_csv(NEO4J_CLUSTERED_DATA_PATH)
unclustered_df = pd.read_csv(NEO4J_UNCLUSTERED_DATA_PATH)

visualize_result(clustered_df, unclustered_df)

neo4j_network_all-mpnet-base-v2.html
