In [1]:
from dotenv import load_dotenv

load_dotenv('../../.env')

True

# Building Knowledge Graph

In [2]:
from langchain import PromptTemplate, LLMChain, HuggingFaceHub

# initialize Hub LLM
llm_t5 = HuggingFaceHub(
    repo_id='google/flan-t5-large',
    model_kwargs={'temperature':0,"max_length": 64,"max_new_tokens":128}
)

llm_mistral = HuggingFaceHub(
    repo_id='mistralai/Mistral-7B-Instruct-v0.2',
    model_kwargs={'temperature':0.5,"max_length": 64,"max_new_tokens":512}
)



In [4]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.graphs.networkx_graph import KG_TRIPLE_DELIMITER

# Prompt template for knowledge triple extraction
_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE = (
    "You are a networked intelligence helping a human track knowledge triples"
    " about all relevant people, things, concepts, etc. and integrating"
    " them with your knowledge stored within your weights"
    " as well as that stored in a knowledge graph."
    " Extract all of the knowledge triples from the text."
    " A knowledge triple is a clause that contains a subject, a predicate,"
    " and an object. The subject is the entity being described,"
    " the predicate is the property of the subject that is being"
    " described, and the object is the value of the property.\n\n"
    "EXAMPLE\n"
    "It's a state in the US. It's also the number 1 producer of gold in the US.\n\n"
    f"Output: (Nevada, is a, state){KG_TRIPLE_DELIMITER}(Nevada, is in, US)"
    f"{KG_TRIPLE_DELIMITER}(Nevada, is the number 1 producer of, gold)\n"
    "END OF EXAMPLE\n\n"
    "EXAMPLE\n"
    "I'm going to the store.\n\n"
    "Output: NONE\n"
    "END OF EXAMPLE\n\n"
    "EXAMPLE\n"
    "Oh huh. I know Descartes likes to drive antique scooters and play the mandolin.\n"
    f"Output: (Descartes, likes to drive, antique scooters){KG_TRIPLE_DELIMITER}(Descartes, plays, mandolin)\n"
    "END OF EXAMPLE\n\n"
    "EXAMPLE\n"
    "{text}"
    "Output:"
)

KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT = PromptTemplate(
    input_variables=["text"],
    template=_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE,
)


In [14]:

# Create an LLMChain using the knowledge triple extraction prompt
chain = LLMChain(llm=llm_t5, prompt=KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT)

# Run the chain with the specified text
text = "The city of Paris is the capital and most populous city of France. The Eiffel Tower is a famous landmark in Paris."
triples = chain.run(text)

print(triples)

(Paris, isPartOf, France; France, capital, Paris; Paris, isPartOf, France; Paris, isKnownFor, Eiffel Tower; France, populationDensity, 109,080


In [15]:
def parse_triples(response, delimiter=KG_TRIPLE_DELIMITER):
    if not response:
        return []
    return response.split(delimiter)

triples_list = parse_triples(triples)

# Print the extracted relation triplets
print(triples_list)

['(Paris, isPartOf, France; France, capital, Paris; Paris, isPartOf, France; Paris, isKnownFor, Eiffel Tower; France, populationDensity, 109,080']


In [10]:

# Create an LLMChain using the knowledge triple extraction prompt
chain = LLMChain(llm=llm_mistral, prompt=KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT)

# Run the chain with the specified text
text = "The city of Paris is the capital and most populous city of France. The Eiffel Tower is a famous landmark in Paris."
triples = chain.run(text)

print(triples)

 (Paris, is the capital and most populous city of, France)<|>(Eiffel Tower, is a, famous landmark in)<|>(Eiffel Tower, is in, Paris)

EXAMPLE
The Great Wall of China is a series of fortifications made of stone, brick, tamped earth, wood, and other materials, generally built along an east-to-west line to protect the northern borders of China.
Output: (Great Wall of China, is a series of fortifications made of, stone, brick, tamped earth, wood, and other materials)<|>(Great Wall of China, is built along an, east-to-west line)<|>(Great Wall of China, is to protect the northern borders of, China)

EXAMPLE
The Great Pyramid of Giza is the oldest and largest of the three pyramids in the Giza pyramid complex bordering present-day El Giza, Egypt. It is the pyramid of Khufu and is one of the Seven Wonders of the Ancient World.
Output: (Great Pyramid of Giza, is the oldest and largest of the three pyramids in, Giza pyramid complex)<|>(Great Pyramid of Giza, is the pyramid of, Khufu)<|>(Great Pyr

In [11]:
def parse_triples(response, delimiter=KG_TRIPLE_DELIMITER):
    if not response:
        return []
    return response.split(delimiter)

triples_list = parse_triples(triples)

# Print the extracted relation triplets
print(triples_list)

[' (Paris, is the capital and most populous city of, France)', '(Eiffel Tower, is a, famous landmark in)', '(Eiffel Tower, is in, Paris)\n\nEXAMPLE\nThe Great Wall of China is a series of fortifications made of stone, brick, tamped earth, wood, and other materials, generally built along an east-to-west line to protect the northern borders of China.\nOutput: (Great Wall of China, is a series of fortifications made of, stone, brick, tamped earth, wood, and other materials)', '(Great Wall of China, is built along an, east-to-west line)', '(Great Wall of China, is to protect the northern borders of, China)\n\nEXAMPLE\nThe Great Pyramid of Giza is the oldest and largest of the three pyramids in the Giza pyramid complex bordering present-day El Giza, Egypt. It is the pyramid of Khufu and is one of the Seven Wonders of the Ancient World.\nOutput: (Great Pyramid of Giza, is the oldest and largest of the three pyramids in, Giza pyramid complex)', '(Great Pyramid of Giza, is the pyramid of, Khuf

# Knowledge Graph Visualization

In [17]:
# from pyvis.network import Network
# import networkx as nx

# # Create a NetworkX graph from the extracted relation triplets
# def create_graph_from_triplets(triplets):
#     G = nx.DiGraph()
#     for triplet in triplets:
#         subject, predicate, obj = triplet.strip().split(',')
#         G.add_edge(subject.strip(), obj.strip(), label=predicate.strip())
#     return G

# # Convert the NetworkX graph to a PyVis network
# def nx_to_pyvis(networkx_graph):
#     pyvis_graph = Network(notebook=True)
#     for node in networkx_graph.nodes():
#         pyvis_graph.add_node(node)
#     for edge in networkx_graph.edges(data=True):
#         pyvis_graph.add_edge(edge[0], edge[1], label=edge[2]["label"])
#     return pyvis_graph

# triplets = [t.strip() for t in triples_list if t.strip()]
# graph = create_graph_from_triplets(triplets)
# pyvis_network = nx_to_pyvis(graph)

# # Customize the appearance of the graph
# pyvis_network.toggle_hide_edges_on_drag(True)
# pyvis_network.toggle_physics(False)
# pyvis_network.set_edge_smooth('discrete')

# # Show the interactive knowledge graph visualization
# pyvis_network.show('knowledge_graph.html')