In [1]:
%%writefile requirements.txt
# Core LLM and LangChain packages
langchain>=0.1.0
langchain-experimental>=0.0.45
langchain-openai>=0.1.0

# Environment variable support
python-dotenv>=1.0.0

# Graph visualization
pyvis>=0.3.2

# Web UI
streamlit>=1.32.0

Writing requirements.txt


In [2]:
!pip install -r requirements.txt

Collecting langchain-experimental>=0.0.45 (from -r requirements.txt (line 3))
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-openai>=0.1.0 (from -r requirements.txt (line 4))
  Downloading langchain_openai-0.3.18-py3-none-any.whl.metadata (2.3 kB)
Collecting python-dotenv>=1.0.0 (from -r requirements.txt (line 7))
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting pyvis>=0.3.2 (from -r requirements.txt (line 10))
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting streamlit>=1.32.0 (from -r requirements.txt (line 13))
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting langchain-community<0.4.0,>=0.3.0 (from langchain-experimental>=0.0.45->-r requirements.txt (line 3))
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain>=0.1.0->-r requirements.txt (line 2))
  Downloading langchain_co

In [4]:
%%writefile app.py
# Import necessary modules
import streamlit as st
import streamlit.components.v1 as components  # For embedding custom HTML
from generate_knowledge_graph import generate_knowledge_graph

# Set up Streamlit page configuration
st.set_page_config(
    page_icon=None,
    layout="wide",  # Use wide layout for better graph display
    initial_sidebar_state="auto",
    menu_items=None
)

# Set the title of the app
st.title("Knowledge Graph From Text")

# Sidebar section for user input method
st.sidebar.title("Input document")
input_method = st.sidebar.radio(
    "Choose an input method:",
    ["Upload txt", "Input text"],  # Options for uploading a file or manually inputting text
)

# Case 1: User chooses to upload a .txt file
if input_method == "Upload txt":
    # File uploader widget in the sidebar
    uploaded_file = st.sidebar.file_uploader(label="Upload file", type=["txt"])

    if uploaded_file is not None:
        # Read the uploaded file content and decode it as UTF-8 text
        text = uploaded_file.read().decode("utf-8")

        # Button to generate the knowledge graph
        if st.sidebar.button("Generate Knowledge Graph"):
            with st.spinner("Generating knowledge graph..."):
                # Call the function to generate the graph from the text
                net = generate_knowledge_graph(text)
                st.success("Knowledge graph generated successfully!")

                # Save the graph to an HTML file
                output_file = "knowledge_graph.html"
                net.save_graph(output_file)

                # Open the HTML file and display it within the Streamlit app
                HtmlFile = open(output_file, 'r', encoding='utf-8')
                components.html(HtmlFile.read(), height=1000)

# Case 2: User chooses to directly input text
else:
    # Text area for manual input
    text = st.sidebar.text_area("Input text", height=300)

    if text:  # Check if the text area is not empty
        if st.sidebar.button("Generate Knowledge Graph"):
            with st.spinner("Generating knowledge graph..."):
                # Call the function to generate the graph from the input text
                net = generate_knowledge_graph(text)
                st.success("Knowledge graph generated successfully!")

                # Save the graph to an HTML file
                output_file = "knowledge_graph.html"
                net.save_graph(output_file)

                # Open the HTML file and display it within the Streamlit app
                HtmlFile = open(output_file, 'r', encoding='utf-8')
                components.html(HtmlFile.read(), height=1000)



Writing app.py


In [5]:
%%writefile generate_knowledge_graph.py
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from pyvis.network import Network

from dotenv import load_dotenv
import os
import asyncio


# Load the .env file
load_dotenv()
# Get API key from environment variable
api_key = os.getenv("OPENAI_API_KEY")

llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

graph_transformer = LLMGraphTransformer(llm=llm)


# Extract graph data from input text
async def extract_graph_data(text):
    """
    Asynchronously extracts graph data from input text using a graph transformer.

    Args:
        text (str): Input text to be processed into graph format.

    Returns:
        list: A list of GraphDocument objects containing nodes and relationships.
    """
    documents = [Document(page_content=text)]
    graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)
    return graph_documents


def visualize_graph(graph_documents):
    """
    Visualizes a knowledge graph using PyVis based on the extracted graph documents.

    Args:
        graph_documents (list): A list of GraphDocument objects with nodes and relationships.

    Returns:
        pyvis.network.Network: The visualized network graph object.
    """
    # Create network
    net = Network(height="1200px", width="100%", directed=True,
                      notebook=False, bgcolor="#222222", font_color="white", filter_menu=True, cdn_resources='remote')

    nodes = graph_documents[0].nodes
    relationships = graph_documents[0].relationships

    # Build lookup for valid nodes
    node_dict = {node.id: node for node in nodes}

    # Filter out invalid edges and collect valid node IDs
    valid_edges = []
    valid_node_ids = set()
    for rel in relationships:
        if rel.source.id in node_dict and rel.target.id in node_dict:
            valid_edges.append(rel)
            valid_node_ids.update([rel.source.id, rel.target.id])

    # Track which nodes are part of any relationship
    connected_node_ids = set()
    for rel in relationships:
        connected_node_ids.add(rel.source.id)
        connected_node_ids.add(rel.target.id)

    # Add valid nodes to the graph
    for node_id in valid_node_ids:
        node = node_dict[node_id]
        try:
            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
        except:
            continue  # Skip node if error occurs

    # Add valid edges to the graph
    for rel in valid_edges:
        try:
            net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
        except:
            continue  # Skip edge if error occurs

    # Configure graph layout and physics
    net.set_options("""
        {
            "physics": {
                "forceAtlas2Based": {
                    "gravitationalConstant": -100,
                    "centralGravity": 0.01,
                    "springLength": 200,
                    "springConstant": 0.08
                },
                "minVelocity": 0.75,
                "solver": "forceAtlas2Based"
            }
        }
    """)

    output_file = "knowledge_graph.html"
    try:
        net.save_graph(output_file)
        print(f"Graph saved to {os.path.abspath(output_file)}")
        return net
    except Exception as e:
        print(f"Error saving graph: {e}")
        return None


def generate_knowledge_graph(text):
    """
    Generates and visualizes a knowledge graph from input text.

    This function runs the graph extraction asynchronously and then visualizes
    the resulting graph using PyVis.

    Args:
        text (str): Input text to convert into a knowledge graph.

    Returns:
        pyvis.network.Network: The visualized network graph object.
    """
    graph_documents = asyncio.run(extract_graph_data(text))
    net = visualize_graph(graph_documents)
    return net

Writing generate_knowledge_graph.py


In [6]:
%%writefile knowledge_graph.html
<html>
    <head>
        <meta charset="utf-8">

            <script>function neighbourhoodHighlight(params) {
  // console.log("in nieghbourhoodhighlight");
  allNodes = nodes.get({ returnType: "Object" });
  // originalNodes = JSON.parse(JSON.stringify(allNodes));
  // if something is selected:
  if (params.nodes.length > 0) {
    highlightActive = true;
    var i, j;
    var selectedNode = params.nodes[0];
    var degrees = 2;

    // mark all nodes as hard to read.
    for (let nodeId in allNodes) {
      // nodeColors[nodeId] = allNodes[nodeId].color;
      allNodes[nodeId].color = "rgba(200,200,200,0.5)";
      if (allNodes[nodeId].hiddenLabel === undefined) {
        allNodes[nodeId].hiddenLabel = allNodes[nodeId].label;
        allNodes[nodeId].label = undefined;
      }
    }
    var connectedNodes = network.getConnectedNodes(selectedNode);
    var allConnectedNodes = [];

    // get the second degree nodes
    for (i = 1; i < degrees; i++) {
      for (j = 0; j < connectedNodes.length; j++) {
        allConnectedNodes = allConnectedNodes.concat(
          network.getConnectedNodes(connectedNodes[j])
        );
      }
    }

    // all second degree nodes get a different color and their label back
    for (i = 0; i < allConnectedNodes.length; i++) {
      // allNodes[allConnectedNodes[i]].color = "pink";
      allNodes[allConnectedNodes[i]].color = "rgba(150,150,150,0.75)";
      if (allNodes[allConnectedNodes[i]].hiddenLabel !== undefined) {
        allNodes[allConnectedNodes[i]].label =
          allNodes[allConnectedNodes[i]].hiddenLabel;
        allNodes[allConnectedNodes[i]].hiddenLabel = undefined;
      }
    }

    // all first degree nodes get their own color and their label back
    for (i = 0; i < connectedNodes.length; i++) {
      // allNodes[connectedNodes[i]].color = undefined;
      allNodes[connectedNodes[i]].color = nodeColors[connectedNodes[i]];
      if (allNodes[connectedNodes[i]].hiddenLabel !== undefined) {
        allNodes[connectedNodes[i]].label =
          allNodes[connectedNodes[i]].hiddenLabel;
        allNodes[connectedNodes[i]].hiddenLabel = undefined;
      }
    }

    // the main node gets its own color and its label back.
    // allNodes[selectedNode].color = undefined;
    allNodes[selectedNode].color = nodeColors[selectedNode];
    if (allNodes[selectedNode].hiddenLabel !== undefined) {
      allNodes[selectedNode].label = allNodes[selectedNode].hiddenLabel;
      allNodes[selectedNode].hiddenLabel = undefined;
    }
  } else if (highlightActive === true) {
    // console.log("highlightActive was true");
    // reset all nodes
    for (let nodeId in allNodes) {
      // allNodes[nodeId].color = "purple";
      allNodes[nodeId].color = nodeColors[nodeId];
      // delete allNodes[nodeId].color;
      if (allNodes[nodeId].hiddenLabel !== undefined) {
        allNodes[nodeId].label = allNodes[nodeId].hiddenLabel;
        allNodes[nodeId].hiddenLabel = undefined;
      }
    }
    highlightActive = false;
  }

  // transform the object into an array
  var updateArray = [];
  if (params.nodes.length > 0) {
    for (let nodeId in allNodes) {
      if (allNodes.hasOwnProperty(nodeId)) {
        // console.log(allNodes[nodeId]);
        updateArray.push(allNodes[nodeId]);
      }
    }
    nodes.update(updateArray);
  } else {
    // console.log("Nothing was selected");
    for (let nodeId in allNodes) {
      if (allNodes.hasOwnProperty(nodeId)) {
        // console.log(allNodes[nodeId]);
        // allNodes[nodeId].color = {};
        updateArray.push(allNodes[nodeId]);
      }
    }
    nodes.update(updateArray);
  }
}

function filterHighlight(params) {
  allNodes = nodes.get({ returnType: "Object" });
  // if something is selected:
  if (params.nodes.length > 0) {
    filterActive = true;
    let selectedNodes = params.nodes;

    // hiding all nodes and saving the label
    for (let nodeId in allNodes) {
      allNodes[nodeId].hidden = true;
      if (allNodes[nodeId].savedLabel === undefined) {
        allNodes[nodeId].savedLabel = allNodes[nodeId].label;
        allNodes[nodeId].label = undefined;
      }
    }

    for (let i=0; i < selectedNodes.length; i++) {
      allNodes[selectedNodes[i]].hidden = false;
      if (allNodes[selectedNodes[i]].savedLabel !== undefined) {
        allNodes[selectedNodes[i]].label = allNodes[selectedNodes[i]].savedLabel;
        allNodes[selectedNodes[i]].savedLabel = undefined;
      }
    }

  } else if (filterActive === true) {
    // reset all nodes
    for (let nodeId in allNodes) {
      allNodes[nodeId].hidden = false;
      if (allNodes[nodeId].savedLabel !== undefined) {
        allNodes[nodeId].label = allNodes[nodeId].savedLabel;
        allNodes[nodeId].savedLabel = undefined;
      }
    }
    filterActive = false;
  }

  // transform the object into an array
  var updateArray = [];
  if (params.nodes.length > 0) {
    for (let nodeId in allNodes) {
      if (allNodes.hasOwnProperty(nodeId)) {
        updateArray.push(allNodes[nodeId]);
      }
    }
    nodes.update(updateArray);
  } else {
    for (let nodeId in allNodes) {
      if (allNodes.hasOwnProperty(nodeId)) {
        updateArray.push(allNodes[nodeId]);
      }
    }
    nodes.update(updateArray);
  }
}

function selectNode(nodes) {
  network.selectNodes(nodes);
  neighbourhoodHighlight({ nodes: nodes });
  return nodes;
}

function selectNodes(nodes) {
  network.selectNodes(nodes);
  filterHighlight({nodes: nodes});
  return nodes;
}

function highlightFilter(filter) {
  let selectedNodes = []
  let selectedProp = filter['property']
  if (filter['item'] === 'node') {
    let allNodes = nodes.get({ returnType: "Object" });
    for (let nodeId in allNodes) {
      if (allNodes[nodeId][selectedProp] && filter['value'].includes((allNodes[nodeId][selectedProp]).toString())) {
        selectedNodes.push(nodeId)
      }
    }
  }
  else if (filter['item'] === 'edge'){
    let allEdges = edges.get({returnType: 'object'});
    // check if the selected property exists for selected edge and select the nodes connected to the edge
    for (let edge in allEdges) {
      if (allEdges[edge][selectedProp] && filter['value'].includes((allEdges[edge][selectedProp]).toString())) {
        selectedNodes.push(allEdges[edge]['from'])
        selectedNodes.push(allEdges[edge]['to'])
      }
    }
  }
  selectNodes(selectedNodes)
}</script>
            <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
            <script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>






                <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tom-select/2.0.0-rc.4/css/tom-select.min.css" integrity="sha512-43fHB3GLgZfz8QXl1RPQ8O66oIgv3po9cJ5erMt1c4QISq9dYb195T3vr5ImnJPXuVroKcGBPXBFKETW8jrPNQ==" crossorigin="anonymous" referrerpolicy="no-referrer" />
                <script src="https://cdnjs.cloudflare.com/ajax/libs/tom-select/2.0.0-rc.4/js/tom-select.complete.js" integrity="sha512-jeF9CfnvzDiw9G9xiksVjxR2lib44Gnovvkv+3CgCG6NXCD4gqlA5nDAVW5WjpA+i+/zKsUWV5xNEbW1X/HH0Q==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>



<center>
<h1></h1>
</center>

<!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
<script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
        <link
          href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
          rel="stylesheet"
          integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
          crossorigin="anonymous"
        />
        <script
          src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
          integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
          crossorigin="anonymous"
        ></script>


        <center>
          <h1></h1>
        </center>
        <style type="text/css">

             #mynetwork {
                 width: 100%;
                 height: 1200px;
                 background-color: #222222;
                 border: 1px solid lightgray;
                 position: relative;
                 float: left;
             }






        </style>
    </head>


    <body>
        <div class="card" style="width: 100%">


              <div id="filter-menu" class="card-header">
                <div class="row no-gutters">
                  <div class="col-3 pb-2">
                    <select
                            class="form-select"
                            aria-label="Default select example"
                            onchange="updateFilter(value, 'item')"
                            id="select-item"
                        >
                        <option value="">Select a network item</option>
                        <option value="edge">edge</option>
                        <option value="node">node</option>
                    </select>
                  </div>
                  <div class="col-3 pb-2">
                    <select
                            class="form-select"
                            aria-label="Default select example"
                            onchange="updateFilter(value, 'property')"
                            id="select-property"
                        >
                        <option value="">Select a property...</option>
                    </select>
                  </div>
                  <div class="col-3 pb-2">
                    <select
                            class="form-select"
                            aria-label="Default select example"
                            id="select-value"
                        >
                        <option value="">Select value(s)...</option>
                    </select>
                  </div>
                  <div class="col-1 pb-2">
                    <button type="button" class="btn btn-primary btn-block" onclick="highlightFilter(filter);">Filter</button>
                  </div>
                  <div class="col-2 pb-2">
                    <button type="button" class="btn btn-primary btn-block" onclick="clearFilter(true)">Reset Selection</button>
                  </div>
                </div>
              </div>

            <div id="mynetwork" class="card-body"></div>
        </div>




        <script type="text/javascript">

              // initialize global variables.
              var edges;
              var nodes;
              var allNodes;
              var allEdges;
              var nodeColors;
              var originalNodes;
              var network;
              var container;
              var options, data;
              var filter = {
                  item : '',
                  property : '',
                  value : []
              };




                  // explicitly using onItemAdd and this function as we need to save multiple values
                  let updateValueFilter = function() {
                      return function () {
                      filter['value'].push(arguments[0])
                      }
                  }

                  let valueControl = new TomSelect("#select-value",{
                      maxItems: null,
                      valueField: 'id',
                      labelField: 'title',
                      searchField: 'title',
                      create: false,
                      sortField: {
                          field: "text",
                          direction: "asc"
                      },
                      onItemAdd: updateValueFilter()
                  });

                  let addValues = function() {
                      return function () {
                          // clear the current value options and add the selected attribute values
                          // tom-select handles duplicates
                          let selectedProperty = arguments[0];
                          valueControl.clear();
                          valueControl.clearOptions();
                          filter['value'] = []
                          if (filter['item'] === 'node') {
                              for (let each in allNodes) {
                                  valueControl.addOption({
                                      id:allNodes[each][selectedProperty],
                                      title:allNodes[each][selectedProperty]
                                  })
                              }
                          }
                          else if (filter['item'] === 'edge') {
                              for (let each in allEdges) {
                                  valueControl.addOption({
                                      id:allEdges[each][selectedProperty],
                                      title:allEdges[each][selectedProperty]
                                  })
                              }
                          }
                      }
                  };

                  let propControl = new TomSelect("#select-property",{
                      valueField: 'id',
                      labelField: 'title',
                      searchField: 'title',
                      create: false,
                      sortField: {
                          field: "text",
                          direction: "asc"
                      },
                      onItemAdd: addValues()
                  });

                  let addProperties = function() {
                      return function () {
                          // loops through the selected network item and adds the attributes to dropdown
                          // tom-select handles duplicates
                          clearFilter(false)
                          if (arguments[0] === 'edge') {
                              for (let each in allEdges) {
                                  if (allEdges.hasOwnProperty(each)) {
                                      for (let eachProp in allEdges[each]) {
                                          if (allEdges[each].hasOwnProperty(eachProp)) {
                                              propControl.addOption({id: eachProp, title: eachProp})
                                          }
                                      }
                                  }
                              }
                          }
                          else if (arguments[0] === 'node') {
                              for (let each in allNodes) {
                                  if (allNodes.hasOwnProperty(each)) {
                                      for (let eachProp in allNodes[each]) {
                                          if (allNodes[each].hasOwnProperty(eachProp)
                                              && (eachProp !== 'hidden' && eachProp !== 'savedLabel'
                                                  && eachProp !== 'hiddenLabel')) {
                                              propControl.addOption({id: eachProp, title: eachProp})

                                          }
                                      }
                                  }
                              }
                          }
                      }
                  };

                  let itemControl = new TomSelect("#select-item",{
                      create: false,
                      sortField:{
                          field: "text",
                          direction: "asc"
                      },
                      onItemAdd: addProperties()
                  });

                  function clearFilter(reset) {
                      // utility function to clear all the selected filter options
                      // if reset is set to true, the existing filter will be removed
                      // else, only the dropdown options are cleared
                      propControl.clear();
                      propControl.clearOptions();
                      valueControl.clear();
                      valueControl.clearOptions();
                      filter = {
                          item : '',
                          property : '',
                          value : []
                      }
                      if (reset) {
                          itemControl.clear();
                          filterHighlight({nodes: []})
                      }
                  }

                  function updateFilter(value, key) {
                      // key could be 'item' or 'property' and value is as selected in dropdown
                      filter[key] = value
                  }



              // This method is responsible for drawing the graph, returns the drawn network
              function drawGraph() {
                  var container = document.getElementById('mynetwork');



                  // parsing and collecting nodes and edges from the python
                  nodes = new vis.DataSet([{"font": {"color": "white"}, "group": "Fictional organization", "id": "Night\u0027S Watch", "label": "Night\u0027S Watch", "shape": "dot", "title": "Fictional organization"}, {"font": {"color": "white"}, "group": "Person", "id": "David Benioff", "label": "David Benioff", "shape": "dot", "title": "Person"}, {"font": {"color": "white"}, "group": "Award", "id": "Primetime Emmy Awards", "label": "Primetime Emmy Awards", "shape": "dot", "title": "Award"}, {"font": {"color": "white"}, "group": "Award", "id": "Hugo Awards", "label": "Hugo Awards", "shape": "dot", "title": "Award"}, {"font": {"color": "white"}, "group": "Person", "id": "D. B. Weiss", "label": "D. B. Weiss", "shape": "dot", "title": "Person"}, {"font": {"color": "white"}, "group": "Fictional continent", "id": "Essos", "label": "Essos", "shape": "dot", "title": "Fictional continent"}, {"font": {"color": "white"}, "group": "Organization", "id": "Hbo", "label": "Hbo", "shape": "dot", "title": "Organization"}, {"font": {"color": "white"}, "group": "Person", "id": "George R. R. Martin", "label": "George R. R. Martin", "shape": "dot", "title": "Person"}, {"font": {"color": "white"}, "group": "Television series", "id": "A Knight Of The Seven Kingdoms", "label": "A Knight Of The Seven Kingdoms", "shape": "dot", "title": "Television series"}, {"font": {"color": "white"}, "group": "Book series", "id": "A Song Of Ice And Fire", "label": "A Song Of Ice And Fire", "shape": "dot", "title": "Book series"}, {"font": {"color": "white"}, "group": "Award", "id": "Golden Globe Award For Best Television Series \u2013 Drama", "label": "Golden Globe Award For Best Television Series \u2013 Drama", "shape": "dot", "title": "Award"}, {"font": {"color": "white"}, "group": "Book", "id": "A Game Of Thrones", "label": "A Game Of Thrones", "shape": "dot", "title": "Book"}, {"font": {"color": "white"}, "group": "Television series", "id": "Game Of Thrones", "label": "Game Of Thrones", "shape": "dot", "title": "Television series"}, {"font": {"color": "white"}, "group": "Fictional object", "id": "Iron Throne", "label": "Iron Throne", "shape": "dot", "title": "Fictional object"}, {"font": {"color": "white"}, "group": "Fictional place", "id": "Seven Kingdoms", "label": "Seven Kingdoms", "shape": "dot", "title": "Fictional place"}, {"font": {"color": "white"}, "group": "Television series", "id": "House Of The Dragon", "label": "House Of The Dragon", "shape": "dot", "title": "Television series"}, {"font": {"color": "white"}, "group": "Award", "id": "Peabody Award", "label": "Peabody Award", "shape": "dot", "title": "Award"}, {"font": {"color": "white"}, "group": "Fictional continent", "id": "Westeros", "label": "Westeros", "shape": "dot", "title": "Fictional continent"}]);
                  edges = new vis.DataSet([{"arrows": "to", "from": "Game Of Thrones", "label": "creator", "to": "David Benioff"}, {"arrows": "to", "from": "Game Of Thrones", "label": "creator", "to": "D. B. Weiss"}, {"arrows": "to", "from": "Game Of Thrones", "label": "network", "to": "Hbo"}, {"arrows": "to", "from": "Game Of Thrones", "label": "adaptation", "to": "A Song Of Ice And Fire"}, {"arrows": "to", "from": "A Song Of Ice And Fire", "label": "author", "to": "George R. R. Martin"}, {"arrows": "to", "from": "A Song Of Ice And Fire", "label": "first_book", "to": "A Game Of Thrones"}, {"arrows": "to", "from": "Game Of Thrones", "label": "setting", "to": "Westeros"}, {"arrows": "to", "from": "Game Of Thrones", "label": "setting", "to": "Essos"}, {"arrows": "to", "from": "Game Of Thrones", "label": "focus", "to": "Iron Throne"}, {"arrows": "to", "from": "Game Of Thrones", "label": "focus", "to": "Seven Kingdoms"}, {"arrows": "to", "from": "Game Of Thrones", "label": "focus", "to": "Night\u0027S Watch"}, {"arrows": "to", "from": "Game Of Thrones", "label": "award", "to": "Primetime Emmy Awards"}, {"arrows": "to", "from": "Game Of Thrones", "label": "award", "to": "Hugo Awards"}, {"arrows": "to", "from": "Game Of Thrones", "label": "award", "to": "Peabody Award"}, {"arrows": "to", "from": "Game Of Thrones", "label": "nomination", "to": "Golden Globe Award For Best Television Series \u2013 Drama"}, {"arrows": "to", "from": "House Of The Dragon", "label": "network", "to": "Hbo"}, {"arrows": "to", "from": "A Knight Of The Seven Kingdoms", "label": "network", "to": "Hbo"}]);

                  nodeColors = {};
                  allNodes = nodes.get({ returnType: "Object" });
                  for (nodeId in allNodes) {
                    nodeColors[nodeId] = allNodes[nodeId].color;
                  }
                  allEdges = edges.get({ returnType: "Object" });
                  // adding nodes and edges to the graph
                  data = {nodes: nodes, edges: edges};

                  var options = {"physics": {"forceAtlas2Based": {"gravitationalConstant": -100, "centralGravity": 0.01, "springLength": 200, "springConstant": 0.08}, "minVelocity": 0.75, "solver": "forceAtlas2Based"}};






                  network = new vis.Network(container, data, options);










                  return network;

              }
              drawGraph();
        </script>
    </body>
</html>

Writing knowledge_graph.html


In [7]:
%pip install --upgrade langchain langchain-experimental langchain-openai python-dotenv pyvis



In [None]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()
# Get API key from environment variable
# (make sure the key is present in .env file in the project directory)
api_key = os.getenv("OPENAI_API_KEY")

In [None]:

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

graph_transformer = LLMGraphTransformer(llm=llm)

In [None]:
text = """
Albert Einstein[a] (14 March 1879 – 18 April 1955) was a German-born theoretical physicist who is best known for developing the theory of relativity. Einstein also made important contributions to quantum mechanics.[1][5] His mass–energy equivalence formula E = mc2, which arises from special relativity, has been called "the world's most famous equation".[6] He received the 1921 Nobel Prize in Physics for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect.[7]

Born in the German Empire, Einstein moved to Switzerland in 1895, forsaking his German citizenship (as a subject of the Kingdom of Württemberg)[note 1] the following year. In 1897, at the age of seventeen, he enrolled in the mathematics and physics teaching diploma program at the Swiss federal polytechnic school in Zurich, graduating in 1900. He acquired Swiss citizenship a year later, which he kept for the rest of his life, and afterwards secured a permanent position at the Swiss Patent Office in Bern. In 1905, he submitted a successful PhD dissertation to the University of Zurich. In 1914, he moved to Berlin to join the Prussian Academy of Sciences and the Humboldt University of Berlin, becoming director of the Kaiser Wilhelm Institute for Physics in 1917; he also became a German citizen again, this time as a subject of the Kingdom of Prussia.[note 1] In 1933, while Einstein was visiting the United States, Adolf Hitler came to power in Germany. Horrified by the Nazi persecution of his fellow Jews,[8] he decided to remain in the US, and was granted American citizenship in 1940.[9] On the eve of World War II, he endorsed a letter to President Franklin D. Roosevelt alerting him to the potential German nuclear weapons program and recommending that the US begin similar research.

In 1905, sometimes described as his annus mirabilis (miracle year), he published four groundbreaking papers.[10] In them, he outlined a theory of the photoelectric effect, explained Brownian motion, introduced his special theory of relativity, and demonstrated that if the special theory is correct, mass and energy are equivalent to each other. In 1915, he proposed a general theory of relativity that extended his system of mechanics to incorporate gravitation. A cosmological paper that he published the following year laid out the implications of general relativity for the modeling of the structure and evolution of the universe as a whole.[11][12] In 1917, Einstein wrote a paper which introduced the concepts of spontaneous emission and stimulated emission, the latter of which is the core mechanism behind the laser and maser, and which contained a trove of information that would be beneficial to developments in physics later on, such as quantum electrodynamics and quantum optics.[13]

In the middle part of his career, Einstein made important contributions to statistical mechanics and quantum theory. Especially notable was his work on the quantum physics of radiation, in which light consists of particles, subsequently called photons. With physicist Satyendra Nath Bose, he laid the groundwork for Bose–Einstein statistics. For much of the last phase of his academic life, Einstein worked on two endeavors that ultimately proved unsuccessful. First, he advocated against quantum theory's introduction of fundamental randomness into science's picture of the world, objecting that God does not play dice.[14] Second, he attempted to devise a unified field theory by generalizing his geometric theory of gravitation to include electromagnetism. As a result, he became increasingly isolated from mainstream modern physics.
"""

In [None]:
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

In [None]:
from pyvis.network import Network

def visualize_graph(graph_documents):

    # Create network
    net = Network(height="1200px", width="100%", directed=True,
                      notebook=False, bgcolor="#222222", font_color="white")

    nodes = graph_documents[0].nodes
    relationships = graph_documents[0].relationships

    # Build lookup for valid nodes
    node_dict = {node.id: node for node in nodes}

    # Filter out invalid edges and collect valid node IDs
    valid_edges = []
    valid_node_ids = set()
    for rel in relationships:
        if rel.source.id in node_dict and rel.target.id in node_dict:
            valid_edges.append(rel)
            valid_node_ids.update([rel.source.id, rel.target.id])


    # Track which nodes are part of any relationship
    connected_node_ids = set()
    for rel in relationships:
        connected_node_ids.add(rel.source.id)
        connected_node_ids.add(rel.target.id)

    # Add valid nodes
    for node_id in valid_node_ids:
        node = node_dict[node_id]
        try:
            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
        except:
            continue  # skip if error

    # Add valid edges
    for rel in valid_edges:
        try:
            net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
        except:
            continue  # skip if error

    # Configure physics
    net.set_options("""
            {
                "physics": {
                    "forceAtlas2Based": {
                        "gravitationalConstant": -100,
                        "centralGravity": 0.01,
                        "springLength": 200,
                        "springConstant": 0.08
                    },
                    "minVelocity": 0.75,
                    "solver": "forceAtlas2Based"
                }
            }
            """)

    output_file = "knowledge_graph.html"
    net.save_graph(output_file)
    print(f"Graph saved to {os.path.abspath(output_file)}")

    # Try to open in browser
    try:
        import webbrowser
        webbrowser.open(f"file://{os.path.abspath(output_file)}")
    except:
        print("Could not open browser automatically")

# Run the function
visualize_graph(graph_documents)

In [None]:
allowed_nodes = ["Person", "Organization", "Location", "Award", "ResearchField"]
graph_transformer_nodes_defined = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes)
graph_documents_nodes_defined = await graph_transformer_nodes_defined.aconvert_to_graph_documents(documents)

In [None]:
print(f"Nodes:{graph_documents_nodes_defined[0].nodes}")
print(f"Relationships:{graph_documents_nodes_defined[0].relationships}")

In [None]:
allowed_nodes = ["Person", "Organization", "Location", "Award", "ResearchField"]
allowed_relationships = [
    ("Person", "WORKS_AT", "Organization"),
    ("Person", "SPOUSE", "Person"),
    ("Person", "AWARD", "Award"),
    ("Organization", "IN_LOCATION", "Location"),
    ("Person", "FIELD_OF_RESEARCH", "ResearchField")
]
graph_transformer_rel_defined = LLMGraphTransformer(
  llm=llm,
  allowed_nodes=allowed_nodes,
  allowed_relationships=allowed_relationships
)
graph_documents_rel_defined = await graph_transformer_rel_defined.aconvert_to_graph_documents(documents)

In [None]:
# Visualize graph
visualize_graph(graph_documents_rel_defined)