**Text Loader**

In [1]:
def load_text(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()

text = load_text("dummy_data.txt")
print(text[:500])

In 2016, Dr. Amelia Reyes, an analytical and empathetic neuroscientist, joined MindWorks Research Institute as its lead data scientist. She worked closely with Professor Liam Chen, a methodical and calm researcher specializing in cognitive computing. Together, they developed a neural mapping algorithm that later became the foundation for the startup NeuroPath Analytics.

Marcus Tan, a decisive and visionary entrepreneur, founded GreenGrid Technologies in 2018 to promote sustainable energy soluti


**Text Splitter**

In [31]:
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

sentences = sent_tokenize(text)
print(f"Number of sentences: {len(sentences)}")
print(sentences[:5])


Number of sentences: 14
['In 2016, Dr. Amelia Reyes, an analytical and empathetic neuroscientist, joined MindWorks Research Institute as its lead data scientist.', 'She worked closely with Professor Liam Chen, a methodical and calm researcher specializing in cognitive computing.', 'Together, they developed a neural mapping algorithm that later became the foundation for the startup NeuroPath Analytics.', 'Marcus Tan, a decisive and visionary entrepreneur, founded GreenGrid Technologies in 2018 to promote sustainable energy solutions.', 'Under his leadership, the company partnered with EcoWave Labs, led by Dr. Sophia Mendoza, a pragmatic and detail-oriented environmental engineer.']


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


**Entity Extraction Chain**

In [6]:
!pip install -U langchain langchain-community openai tiktoken

Collecting langchain
  Downloading langchain-1.0.2-py3-none-any.whl.metadata (4.7 kB)
Collecting langchain-community
  Downloading langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
Collecting openai
  Downloading openai-2.6.0-py3-none-any.whl.metadata (29 kB)
Collecting langchain-core<2.0.0,>=1.0.0 (from langchain)
  Downloading langchain_core-1.0.0-py3-none-any.whl.metadata (3.4 kB)
Collecting langgraph<1.1.0,>=1.0.0 (from langchain)
  Downloading langgraph-1.0.1-py3-none-any.whl.metadata (7.4 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->l

In [14]:
!pip install -qU langchain langchain-community langchain-core langchain-openai openai tiktoken


In [25]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser


In [26]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [27]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Initialize the JSON parser (no need for ResponseSchema anymore)
parser = JsonOutputParser()


In [33]:
prompt = ChatPromptTemplate.from_template(
"""
You are an expert information extractor for knowledge graphs.
Extract all named entities from the following text and categorize them as one of these types:
Person, Organization, Year.

Return your answer in valid JSON format as:
{{
  "entities": [
    {{"name": "Entity Name", "type": "Person/Organization/Year"}}
  ]
}}

Text:
\"\"\"{text}\"\"\"
""")


In [34]:
entity_chain = prompt | llm | parser

In [35]:
results = []

for i, sentence in enumerate(sentences[:5]):  # limit for testing
    response = entity_chain.invoke({"text": sentence})
    print(f"\nSentence {i+1}:\n", response)
    results.append(response)



Sentence 1:
 {'entities': [{'name': 'Amelia Reyes', 'type': 'Person'}, {'name': 'MindWorks Research Institute', 'type': 'Organization'}, {'name': '2016', 'type': 'Year'}]}

Sentence 2:
 {'entities': [{'name': 'Liam Chen', 'type': 'Person'}]}

Sentence 3:
 {'entities': [{'name': 'NeuroPath Analytics', 'type': 'Organization'}]}

Sentence 4:
 {'entities': [{'name': 'Marcus Tan', 'type': 'Person'}, {'name': 'GreenGrid Technologies', 'type': 'Organization'}, {'name': '2018', 'type': 'Year'}]}

Sentence 5:
 {'entities': [{'name': 'EcoWave Labs', 'type': 'Organization'}, {'name': 'Dr. Sophia Mendoza', 'type': 'Person'}]}


**Relationship Extraction Chain**

In [39]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI

# Initialize model and parser
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
parser = JsonOutputParser()

# PROMPT (escaped curly braces)
rel_prompt = ChatPromptTemplate.from_template("""
You are an expert in semantic relationship extraction for knowledge graphs.

Given the following text, and the entities already detected, extract all meaningful relationships between them.
Possible relationship types include:
- works_at
- collaborated_with
- founded
- developed
- leads
- mentors

Return only valid JSON in this format:
{{
  "relationships": [
    {{"subject": "Entity1", "relation": "relation_type", "object": "Entity2"}}
  ]
}}

Text:
\"\"\"{text}\"\"\"

Detected entities:
{entities}
""")

In [40]:
rel_chain = rel_prompt | llm | parser


In [41]:
sentence = "In 2016, Dr. Amelia Reyes joined MindWorks Research Institute as its lead data scientist."
entities = [{'name': 'Amelia Reyes', 'type': 'Person'}, {'name': 'MindWorks Research Institute', 'type': 'Organization'}, {'name': '2016', 'type': 'Year'}]

result = rel_chain.invoke({"text": sentence, "entities": entities})
print(result)


{'relationships': [{'subject': 'Amelia Reyes', 'relation': 'works_at', 'object': 'MindWorks Research Institute'}]}


In [44]:
relationship_results = []

for i, sentence in enumerate(sentences[:5]):
    ents = results[i]['entities']  # assuming you stored them
    rels = rel_chain.invoke({"text": sentence, "entities": ents})
    relationship_results.append(rels)
    print(f"\nSentence {i+1}:\n", rels)



Sentence 1:
 {'relationships': [{'subject': 'Amelia Reyes', 'relation': 'works_at', 'object': 'MindWorks Research Institute'}, {'subject': 'Amelia Reyes', 'relation': 'leads', 'object': 'MindWorks Research Institute'}]}

Sentence 2:
 {'relationships': [{'subject': 'She', 'relation': 'collaborated_with', 'object': 'Liam Chen'}]}

Sentence 3:
 {'relationships': [{'subject': 'they', 'relation': 'developed', 'object': 'NeuroPath Analytics'}]}

Sentence 4:
 {'relationships': [{'subject': 'Marcus Tan', 'relation': 'founded', 'object': 'GreenGrid Technologies'}]}

Sentence 5:
 {'relationships': [{'subject': 'EcoWave Labs', 'relation': 'collaborated_with', 'object': 'Dr. Sophia Mendoza'}]}


**Personality Extraction Chain**

In [48]:
trait_prompt = ChatPromptTemplate.from_template("""
You are a psychologist AI trained to infer personality traits from text.
Extract the personality traits explicitly or implicitly described for any person in the given text.

Return only JSON in this format:
{{
  "personality_traits": [
    {{"person": "Person Name", "traits": ["trait1", "trait2", "..."]}}
  ]
}}

Text:
\"\"\"{text}\"\"\"
""")

In [49]:
trait_chain = trait_prompt | llm | parser

In [50]:
traits_result = trait_chain.invoke({"text": sentence})
print(traits_result)


{'personality_traits': [{'person': 'Dr. Sophia Mendoza', 'traits': ['pragmatic', 'detail-oriented']}]}


**Graph Assembly**

In [52]:
import networkx as nx

G = nx.DiGraph()

# Add entities
for ent_list in results:
    for e in ent_list['entities']:
        G.add_node(e['name'], type=e['type'])

# Add relationships
for rel_list in relationship_results:
    for r in rel_list['relationships']:
        G.add_edge(r['subject'], r['object'], relation=r['relation'])


In [45]:
!pip install pyvis

Collecting pyvis
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting jedi>=0.16 (from ipython>=5.3.0->pyvis)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading pyvis-0.3.2-py3-none-any.whl (756 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi, pyvis
Successfully installed jedi-0.19.2 pyvis-0.3.2


In [53]:
from pyvis.network import Network

net = Network(notebook=True, directed=True)
for n, data in G.nodes(data=True):
    label = f"{n}\n({data.get('type', '')})"
    net.add_node(n, label=label)
for u, v, data in G.edges(data=True):
    net.add_edge(u, v, label=data.get('relation', ''))

net.show("knowledge_graph.html")

knowledge_graph.html
