In [7]:
#### Focus on the 
import json
import networkx as nx
from pyvis.network import Network
from whoosh.index import open_dir
from whoosh.qparser import QueryParser

def build_knowledge_graph_from_projects(projects_data, search_results=None, closest_file=None, query_keyword=None):
    """Build the knowledge graph based on JSON-LD projects data."""
    G = nx.Graph()

    # Dictionary to track existing nodes for locations, keywords, and funding agencies
    location_nodes = {}
    keyword_nodes = {}
    funding_agency_nodes = {}

    # Iterate through the projects in @graph
    for project in projects_data['@graph']:
        project_id = project.get('schema:identifier', 'Unknown Project')
        project_name = project.get('schema:description', {}).get('schema:name', 'Unnamed Project')

        # Only add nodes for projects matching search results (if search_results provided)
        if search_results and project_id not in search_results:
            continue

        # Emphasize the project most closely related to the query_keyword
        is_closest_project = query_keyword and query_keyword.lower() in project_name.lower()

        # Add project node with larger size and distinct color for the closest match
        project_color = '#f1c40f' if is_closest_project else '#3498db'
        project_size = 30 if is_closest_project else 20
        G.add_node(project_id, label=project_name, type='Project', size=project_size, color=project_color)

        # Link project to its owner (linked_user)
        linked_user = project.get('schema:linked_user')
        if linked_user:
            G.add_node(linked_user, label=linked_user, type='User', size=15, color='#e74c3c')
            G.add_edge(project_id, linked_user, label='Owner', color='#2c3e50', width=2)

        # Extract project description details (startDate, endDate, location)
        project_start = project['schema:description'].get('schema:startDate', 'Unknown Start Date')
        project_end = project['schema:description'].get('schema:endDate', 'Unknown End Date')

        # Add location node and link project to location
        project_location = project['schema:description'].get('schema:location', {}).get('schema:name', 'Unknown Location')
        if project_location != 'Unknown Location':
            # Check if the location already exists globally, if so, link to existing node
            if project_location not in location_nodes:
                location_nodes[project_location] = f"location_{project_location}"
                G.add_node(location_nodes[project_location], label=project_location, type='Location', size=10, color='#f39c12')
            G.add_edge(project_id, location_nodes[project_location], label='Located in', color='#f39c12', width=1.5)

        # Link project to funding agency
        funding_agency = project['schema:description'].get('schema:funding', {}).get('schema:name', 'Unknown Funding Agency')
        if funding_agency != 'Unknown Funding Agency':
            # Check if the funding agency already exists globally, if so, link to existing node
            if funding_agency not in funding_agency_nodes:
                funding_agency_nodes[funding_agency] = f"funding_{funding_agency}"
                G.add_node(funding_agency_nodes[funding_agency], label=funding_agency, type='FundingAgency', size=10, color='#e74c3c')
            G.add_edge(project_id, funding_agency_nodes[funding_agency], label='Funded by', color='#c0392b', width=1.5)

        # Link project to keywords
        keywords = project['schema:description'].get('schema:keywords', [])
        for keyword in keywords:
            # Check if the keyword already exists globally, if so, link to existing node
            if keyword not in keyword_nodes:
                keyword_nodes[keyword] = f"keyword_{keyword}"
                G.add_node(keyword_nodes[keyword], label=keyword, type='Keyword', size=4, color='#8e44ad')
            G.add_edge(project_id, keyword_nodes[keyword], label='Has Keyword', color='#8e44ad', width=1.5)

        # Link project to its files
        files = project.get('schema:files', [])
        for file_info in files:
            file_cid = file_info.get('file_cid', 'Unknown File')
            file_title = file_info['metadata'].get('dc:title', 'Untitled File')

            # Determine if this file is the closest match
            is_closest_file = file_cid == closest_file

            # Set visual emphasis for the closest matching file
            file_color = '#e67e22' if is_closest_file else '#9b59b6'
            file_size = 15 if is_closest_file else 10

            # Add file node with file title and metadata CID
            G.add_node(file_cid, label=file_title, type='File', size=file_size, color=file_color)
            G.add_edge(project_id, file_cid, label='Has File', color='#8e44ad', width=2)

            # Add metadata CID node and link to file
            metadata_cid = file_info.get('metadata_cid', 'Unknown Metadata')
            G.add_node(metadata_cid, label=f"Metadata: {metadata_cid}", type='Metadata', size=8, color='#2ecc71')
            G.add_edge(file_cid, metadata_cid, label='Has Metadata', color='#27ae60', width=1.5)

    return G


def visualize_graph_with_legend(G):
    """Visualize the knowledge graph using pyvis with a legend and improved styling."""
    net = Network(notebook=True, height='750px', width='100%', cdn_resources='in_line')  # Set size and use inline resources for Jupyter
    net.barnes_hut(gravity=-4000)  # Use a force-directed layout for better spacing

    # Custom options for better visual clarity
    net.set_options("""
    var options = {
      "nodes": {
        "shape": "dot",
        "font": {
          "size": 16
        }
      },
      "edges": {
        "smooth": {
          "type": "continuous"
        },
        "color": {
          "inherit": false
        },
        "width": 1.5
      },
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -4000,
          "springLength": 100,
          "springConstant": 0.01,
          "avoidOverlap": 0.1
        },
        "minVelocity": 0.75
      }
    }
    """)

    # Adding the main graph from NetworkX
    net.from_nx(G)
    
    # Adding a legend to the graph for visual clarity
    legend = """
    <div style="position:absolute; top:10px; right:10px; background-color:white; padding:10px; border-radius:5px; box-shadow:0 0 10px rgba(0,0,0,0.5);">
        <h3>Knowledge Graph Legend</h3>
        <ul style="list-style:none; padding:0;">
            <li><span style="background-color:#3498db; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Project</strong></li>
            <li><span style="background-color:#f1c40f; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Highlighted Project (Keyword Match)</strong></li>
            <li><span style="background-color:#e74c3c; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>User</strong></li>
            <li><span style="background-color:#e67e22; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Highlighted File</strong></li>
            <li><span style="background-color:#9b59b6; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>File</strong></li>
            <li><span style="background-color:#2ecc71; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Metadata</strong></li>
            <li><span style="background-color:#1abc9c; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Duration</strong></li>
            <li><span style="background-color:#f39c12; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Location</strong></li>
            <li><span style="background-color:#c0392b; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Funding Agency</strong></li>
            <li><span style="background-color:#8e44ad; padding:5px; border-radius:50%; display:inline-block; width:15px; height:15px; margin-right:5px;"></span> <strong>Keyword</strong></li>
        </ul>
    </div>
    """
    
    # Inject the legend HTML into the pyvis graph
    net.html = net.html.replace("</body>", legend + "</body>")  
    return net.show("knowledge_graph.html")


def search_projects(keyword, index_dir):
    """Search for projects based on a keyword using Whoosh index."""
    ix = open_dir(index_dir)
    with ix.searcher() as searcher:
        query = QueryParser("content", ix.schema).parse(keyword)
        results = searcher.search(query, limit=10)
        return [result['project_id'] for result in results]


def search_index(keyword, ix):
    """Search for a keyword in the indexed documents."""
    try:
        with ix.searcher() as searcher:
            query = QueryParser("full_text", ix.schema).parse(keyword)
            results = searcher.search(query)

            if results:
                for result in results:
                    logging.info(f"CID: {result['cid']}, Name: {result['name']}, Title: {result['title']}, "
                                 f"Creator: {result['creator']}, Size: {result['size']} bytes")
            else:
                logging.info(f"No results found for '{keyword}'")
    except Exception as e:
        logging.error(f"Error occurred during search: {e}")

# Function to set up the Whoosh index directory
def setup_index(schema):
    """Sets up the Whoosh index directory and returns the index object."""
    index_dir = "indexdir"
    if not os.path.exists(index_dir):
        os.mkdir(index_dir)
        logging.info("Index directory created.")
        ix = create_in(index_dir, schema)
    else:
        try:
            ix = open_dir(index_dir)
            logging.info("Opened existing index.")
        except EmptyIndexError:
            logging.warning("Index is empty. Creating a new index.")
            ix = create_in(index_dir, schema)
    return ix



# -
# # Function to set up the Whoosh index directory
# def setup_index(schema):
#     """Sets up the Whoosh index directory and returns the index object."""
#     index_dir = "indexdir"
#     if not os.path.exists(index_dir):
#         os.mkdir(index_dir)
#         logging.info("Index directory created.")
#         ix = create_in(index_dir, schema)
#     else:
#         try:
#             ix = open_dir(index_dir)
#             logging.info("Opened existing index.")
#         except EmptyIndexError:
#             logging.warning("Index is empty. Creating a new index.")
#             ix = create_in(index_dir, schema)
#     return ix


# def search_index(keyword, ix):
#     """Search for a keyword in the indexed documents."""
#     try:
#         with ix.searcher() as searcher:
#             query = QueryParser("full_text", ix.schema).parse(keyword)
#             results = searcher.search(query)

#             if results:
#                 for result in results:
#                     logging.info(f"CID: {result['cid']}, Name: {result['name']}, Title: {result['title']}, "
#                                  f"Creator: {result['creator']}, Size: {result['size']} bytes")
#             else:
#                 logging.info(f"No results found for '{keyword}'")
#     except Exception as e:
#         logging.error(f"Error occurred during search: {e}")


# -

# Example usage:

PROJECTS_JSON = 'datasets/projects.json'
INDEX_DIR = 'indexdir'
KEYWORD = 'blockchain'


# Load your JSON-LD file
with open(PROJECTS_JSON, 'r') as f:
    projects_data = json.load(f)

# Perform a keyword search (replace 'index_dir' with your actual index directory)

search_results = search_projects(KEYWORD, INDEX_DIR)
print(search_results)

# Build the graph with the search results and visualize
G = build_knowledge_graph_from_projects(projects_data, search_results=search_results, query_keyword=KEYWORD)
visualize_graph_with_legend(G)

[]
knowledge_graph.html
