In [3]:
%pip install --upgrade langchain langchain_experimental langchain-openai python-dotenv pyvis

Collecting langchain_experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community<0.4.0,>=0.3.0 (from langchain_experimental)
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Downloading aiohttp-3.12.15-cp313-cp313-win_amd64.whl.metadata (7.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Using cached pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Using cached httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting numpy>=2.1.0 (from langchain-

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [4]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
graph_transformer = LLMGraphTransformer(llm=llm)


In [5]:
text = """
LeBron Raymone James Sr. (/ləˈbrɒn/[1] lə-BRON; born December 30, 1984) is an American professional basketball player for the Los Angeles Lakers of the National Basketball Association (NBA). Nicknamed "King James", he is the NBA's all-time leading scorer and has won four NBA championships from 10 NBA Finals appearances, having made eight consecutive appearances between 2011 and 2018.[2] He also won the inaugural NBA Cup in 2023 with the Lakers and has won three Olympic gold medals as a member of the U.S. national team. James is widely considered one of the greatest basketball players of all time.[3][4]

In addition to ranking fourth in NBA career assists and sixth in NBA career steals, James holds several individual honors, including four NBA MVP awards, four Finals MVP awards, the Rookie of the Year award, three All-Star Game MVP awards, the inaugural NBA Cup MVP, and the Olympics MVP in the 2024 Summer Olympics. A record 21-time All-Star and 21-time All-NBA selection (including a record 13 First Team selections), he has also made six All-Defensive Teams. The oldest active player in the NBA, he is tied with Vince Carter for the most seasons played and holds the record for the most minutes played in league history.

Born and raised in Akron, Ohio, James gained national attention at St. Vincent–St. Mary High School and was heavily touted as a future NBA superstar for his all-around scoring, passing, athleticism and playmaking abilities.[5] A prep-to-pro, James was selected by the Cleveland Cavaliers with the first overall pick of the 2003 NBA draft. He won Rookie of the Year and quickly established himself as one of the league's premier players, leading Cleveland to its first NBA Finals appearance in 2007 and winning the scoring title in 2008. After winning back-to-back MVPs in 2009 and 2010, he left the Cavaliers and joined the Miami Heat as a free agent in 2010, a controversial move announced in the nationally televised special titled The Decision.[6]
"""


In [6]:
documents = [Document(page_content=text)]
graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)

In [None]:
print(f"Nodes: {graph_documents[0].nodes}")
print(f"Relationships: {graph_documents[0].relationships}")

nodes=[Node(id='Lebron Raymone James Sr.', type='Person', properties={}), Node(id='Los Angeles Lakers', type='Organization', properties={}), Node(id='National Basketball Association', type='Organization', properties={}), Node(id='Nba', type='Organization', properties={}), Node(id='U.S. National Team', type='Organization', properties={}), Node(id='Akron, Ohio', type='Place', properties={}), Node(id='St. Vincent–St. Mary High School', type='Organization', properties={}), Node(id='Cleveland Cavaliers', type='Organization', properties={}), Node(id='Miami Heat', type='Organization', properties={}), Node(id='The Decision', type='Event', properties={})] relationships=[Relationship(source=Node(id='Lebron Raymone James Sr.', type='Person', properties={}), target=Node(id='Los Angeles Lakers', type='Organization', properties={}), type='PLAYS_FOR', properties={}), Relationship(source=Node(id='Lebron Raymone James Sr.', type='Person', properties={}), target=Node(id='Nba', type='Organization', prope

# Visualize Graph

In [18]:
from pyvis.network import Network

def visualize_graph(graph_documents):
    # Create network
    net = Network(height="1200px", width="100%", directed=True, notebook=False, bgcolor="#222222", font_color="white")
    nodes = graph_documents[0].nodes
    relationships = graph_documents[0].relationships

    # Build lookup for valid nodes
    node_dict = {node.id: node for node in nodes}

    # Filter out invalid edges and collect valid node IDs
    valid_edges = []
    valid_node_ids = set()
    for rel in relationships:
        if rel.source.id in node_dict and rel.target.id in node_dict:
            valid_edges.append(rel)
            valid_node_ids.update([rel.source.id, rel.target.id])
    
    # Track which nodes are part of any relationship
    connected_node_ids = set()
    for rel in relationships:
        connected_node_ids.add(rel.source.id)
        connected_node_ids.add(rel.target.id)
    
    # Add valid nodes
    for node_id in valid_node_ids:
        node = node_dict[node_id]
        try:
            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
        except:
            continue

    # Add valid edges
    for rel in valid_edges:
        try:
            net.add_Edge(rel.source.id, rel.target.id, label=rel.type.lower())
        except:
            continue
    
    # Configure physics
    net.set_options("""
        {
            "physics": {
                "forceAtlas2Based": {
                    "gravitiationalConstant": -100,
                    "centralGravity": 0.01,
                    "springLength": 200,
                    "springConstant": 0.08
                },
                "minVelocity": 0.5,
                "solver": "forceAtlas2Based"
            }
        }
        """)
    
    output_file = "knowledge_graph.html"
    net.save_graph(output_file)
    print(f"Graph saved to {os.path.abspath(output_file)}")

    # Try to open in browser
    try:
        import webbrowser
        webbrowser.open(f"file://{os.path.abspath(output_file)}")
    except:
        print("Could not open browser automatically")
         


In [19]:
visualize_graph(graph_documents)

Graph saved to c:\Users\vaish\OneDrive\Documents\GitHub\Knowledge-Graphs\knowledge_graph.html


# Extract specific types of nodes

In [20]:
allowed_nodes = ["Person", "Organization", "Place"]
graph_transformer_nodes_defined = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes)
graph_documents_nodes_defined = await graph_transformer_nodes_defined.aconvert_to_graph_documents(documents)

In [22]:
print(f"Nodes: {graph_documents_nodes_defined[0].nodes}")
print(f"Relationships: {graph_documents_nodes_defined[0].relationships}")

Nodes: [Node(id='Lebron Raymone James Sr.', type='Person', properties={}), Node(id='Los Angeles Lakers', type='Organization', properties={}), Node(id='National Basketball Association', type='Organization', properties={}), Node(id='U.S. National Team', type='Organization', properties={}), Node(id='Akron, Ohio', type='Place', properties={}), Node(id='St. Vincent–St. Mary High School', type='Organization', properties={}), Node(id='Cleveland Cavaliers', type='Organization', properties={}), Node(id='Miami Heat', type='Organization', properties={})]
Relationships: [Relationship(source=Node(id='Lebron Raymone James Sr.', type='Person', properties={}), target=Node(id='Los Angeles Lakers', type='Organization', properties={}), type='PLAYS_FOR', properties={}), Relationship(source=Node(id='Lebron Raymone James Sr.', type='Person', properties={}), target=Node(id='National Basketball Association', type='Organization', properties={}), type='PLAYS_IN', properties={}), Relationship(source=Node(id='Leb

# Extract specific types of relationships

In [26]:
allowed_relationships = [
    ("Person", "CHAMPION", "Organization"),
    ("Person", "MVP", "Organization")
]
graph_transformer_rel_defined = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes, allowed_relationships=allowed_relationships)
graph_documents_rel_defined = await graph_transformer_rel_defined.aconvert_to_graph_documents(documents)

In [28]:
visualize_graph(graph_documents_rel_defined)

Graph saved to c:\Users\vaish\OneDrive\Documents\GitHub\Knowledge-Graphs\knowledge_graph.html
