# Construct Knowledge Graph

### Import required libraries

In [24]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt


### Load data

In [25]:
entities_df = pd.read_csv("data/entities_extracted.csv")
relations_df = pd.read_csv("data/relations.csv")

### Initialize directed graph

In [26]:
G = nx.DiGraph()

### Add nodes

In [27]:
for _, row in entities_df.iterrows():
    G.add_node(row['entity'], type=row['entity_type'], doi=row['id'])

### Add edges

In [28]:
for _, row in relations_df.iterrows():
    if not G.has_node(row['source']):
        G.add_node(row['source'], type='UNKNOWN')

    if not G.has_node(row['target']):
        G.add_node(row['target'], type='UNKNOWN')

    G.add_edge(
        row['source'],
        row['target'],
        relation=row['relation'],
        sentence=row['sentence'],
        doi=row['doi']
    )

In [29]:
print("Nodes with attributes:")
print(G.nodes(data=True))
print("Edges with attributes:")
print(G.edges(data=True))

Nodes with attributes:
[('dibasic', {'type': 'CHEMICAL', 'doi': '10.1101/2025.10.16.682669'}), ('OC43', {'type': 'DISEASE', 'doi': '10.1101/2025.09.28.25336743'}), ('infection', {'type': 'DISEASE', 'doi': '10.1101/2025.09.30.679491'}), ('viral infections', {'type': 'DISEASE', 'doi': '10.1101/2025.10.16.682991'}), ('trametinib', {'type': 'CHEMICAL', 'doi': '10.1101/2025.10.15.682635'}), ('sorafenib', {'type': 'CHEMICAL', 'doi': '10.1101/2025.10.15.682635'}), ('coronavirus infections', {'type': 'DISEASE', 'doi': '10.1101/2025.10.15.682635'}), ('SARS-CoV-2 infection', {'type': 'DISEASE', 'doi': '10.1101/2025.09.30.679491'}), ('Tyrosine', {'type': 'CHEMICAL', 'doi': '10.1101/2025.10.17.683077'}), ('pan-coronaviruses infection', {'type': 'DISEASE', 'doi': '10.1101/2025.10.17.683077'}), ('HGS deficiency', {'type': 'DISEASE', 'doi': '10.1101/2025.10.17.683077'}), ('riboflavin tetrabutyrate', {'type': 'CHEMICAL', 'doi': '10.1101/2025.10.17.683077'}), ('RTB', {'type': 'CHEMICAL', 'doi': '10.110

## Visualization

In [33]:
from pyvis.network import Network
import matplotlib.pyplot as plt

# Get unique node types
node_types = list(set(data['type'] for _, data in G.nodes(data=True)))

# Generate a color map using matplotlib's tab20
colors = plt.cm.tab20.colors  # RGB tuples
type_to_color = {t: f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}" 
                 for t, (r, g, b) in zip(node_types, colors)}

net = Network(height="750px", width="100%", notebook=True, directed=True)

# Add nodes with colors based on type
for node, data in G.nodes(data=True):
    net.add_node(
        node,
        label=node,
        title=f"Type: {data.get('type')}\nDOI: {data.get('doi')}",
        color=type_to_color[data.get('type')]
    )

# Add edges
for source, target, data in G.edges(data=True):
    net.add_edge(
        source,
        target,
        label=data.get('relation'),
        title=f"Sentence: {data.get('sentence')}\nDOI: {data.get('doi')}",
        color="gray",
        width=2
    )

net.force_atlas_2based()
net.show_buttons(filter_=['physics'])
net.write_html("knowledge_graph.html", open_browser=True)

