In [2]:
import os
import ast
from langchain_community.llms import OpenAI
from langchain_google_genai import GoogleGenerativeAI
from langchain.prompts import PromptTemplate    
from langchain.chains import LLMChain
from langchain.schema import StrOutputParser
from dotenv import load_dotenv

load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("GOOGLE_API_KEY is not set in the environment variables.")

In [3]:
llm = GoogleGenerativeAI(
    model="gemini-1.5-flash",
    api_key=GOOGLE_API_KEY
)



In [9]:
prompt = PromptTemplate.from_template("""
Extract a list of semantic triplets (Subject, Predicate, Object) from the following note:
Note: "{note}"
Return them as a Python list of dictionaries like this only:
[{{"subject": "A", "predicate": "B", "object": "C"}}]
""")

chain = prompt | llm | StrOutputParser()

In [10]:
note = "The cat sat on the mat. The dog chased the cat."
result = chain.invoke({"note": note})
print(result)

```python
semantic_triplets = [
    {"subject": "cat", "predicate": "sat", "object": "mat"},
    {"subject": "dog", "predicate": "chased", "object": "cat"}
]
```


In [11]:
import re
match = re.search(r"(\[[\s\S]*?\])", result)
triplets = ast.literal_eval(match.group(1))  # Convert string representation to list of dictionaries
print("Extracted Triplets:", triplets)
for triplet in triplets:
    print(f"Subject: {triplet['subject']}, Predicate: {triplet['predicate']}, Object: {triplet['object']}")


Extracted Triplets: [{'subject': 'cat', 'predicate': 'sat', 'object': 'mat'}, {'subject': 'dog', 'predicate': 'chased', 'object': 'cat'}]
Subject: cat, Predicate: sat, Object: mat
Subject: dog, Predicate: chased, Object: cat


Using graph trqansformer

In [2]:
pip install langchain langchain_experimental langchain-groq langchain-google-genai wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting beautifulsoup4 (from wikipedia)
  Obtaining dependency information for beautifulsoup4 from https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl.metadata
  Downloading beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->wikipedia)
  Obtaining dependency information for soupsieve>1.2 from https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl.metadata
  Downloading


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from langchain.document_loaders import WikipediaLoader
query = "Marie Curie"
pages = WikipediaLoader(query=query).load()
pages



  lis = BeautifulSoup(html).find_all('li')


[Document(metadata={'title': 'Marie Curie', 'summary': 'Maria Salomea Skłodowska-Curie (Polish: [ˈmarja salɔˈmɛa skwɔˈdɔfska kʲiˈri] ; née Skłodowska; 7 November 1867 – 4 July 1934), known simply as Marie Curie ( KURE-ee; French: [maʁi kyʁi]), was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity. \nShe was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields. Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes. She was, in 1906, the first woman to become a professor at the University of Paris.\nShe was born in Warsaw, in what was then the Kingdom of Poland, part of the Russian Empire. She studied at Warsaw\'s clandestine Flying University and began her practical scientific training in Warsaw. In 1891, a

In [9]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq

llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-flash")  # or use Gemini, etc.
llm_transformer = LLMGraphTransformer(llm=llm)

from langchain_core.documents import Document

text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity...
"""
doc = pages[0]
graph_doc = await llm_transformer.aconvert_to_graph_documents([doc])
graph_doc


[GraphDocument(nodes=[Node(id='Maria Salomea Skłodowska-Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='Henri Becquerel', type='Person', properties={}), Node(id='Poland', type='Country', properties={}), Node(id='France', type='Country', properties={}), Node(id='Warsaw', type='City', properties={}), Node(id='University Of Paris', type='University', properties={}), Node(id='Curie Institute', type='Institution', properties={}), Node(id='Paris Panthéon', type='Building', properties={}), Node(id='Russian Empire', type='Country', properties={}), Node(id='Polonium', type='Chemicalelement', properties={}), Node(id='Radium', type='Chemicalelement', properties={}), Node(id='Nobel Prize', type='Award', properties={}), Node(id='Physics', type='Fieldofstudy', properties={}), Node(id='Chemistry', type='Fieldofstudy', properties={}), Node(id='Radioactivity', type='Concept', properties={}), Node(id='World War I', type='Event', properties={}), Node

In [10]:
print("Notes:", graph_doc[0].nodes)
print("Notes:", graph_doc[0].relationships)

Notes: [Node(id='Maria Salomea Skłodowska-Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='Henri Becquerel', type='Person', properties={}), Node(id='Poland', type='Country', properties={}), Node(id='France', type='Country', properties={}), Node(id='Warsaw', type='City', properties={}), Node(id='University Of Paris', type='University', properties={}), Node(id='Curie Institute', type='Institution', properties={}), Node(id='Paris Panthéon', type='Building', properties={}), Node(id='Russian Empire', type='Country', properties={}), Node(id='Polonium', type='Chemicalelement', properties={}), Node(id='Radium', type='Chemicalelement', properties={}), Node(id='Nobel Prize', type='Award', properties={}), Node(id='Physics', type='Fieldofstudy', properties={}), Node(id='Chemistry', type='Fieldofstudy', properties={}), Node(id='Radioactivity', type='Concept', properties={}), Node(id='World War I', type='Event', properties={}), Node(id='Aplastic 

{'nodes': [Node(id='Maria Salomea Skłodowska-Curie', type='Person', properties={}),
  Node(id='Pierre Curie', type='Person', properties={}),
  Node(id='Henri Becquerel', type='Person', properties={}),
  Node(id='Poland', type='Country', properties={}),
  Node(id='France', type='Country', properties={}),
  Node(id='Warsaw', type='City', properties={}),
  Node(id='University Of Paris', type='University', properties={}),
  Node(id='Curie Institute', type='Institution', properties={}),
  Node(id='Paris Panthéon', type='Building', properties={}),
  Node(id='Russian Empire', type='Country', properties={}),
  Node(id='Polonium', type='Chemicalelement', properties={}),
  Node(id='Radium', type='Chemicalelement', properties={}),
  Node(id='Nobel Prize', type='Award', properties={}),
  Node(id='Physics', type='Fieldofstudy', properties={}),
  Node(id='Chemistry', type='Fieldofstudy', properties={}),
  Node(id='Radioactivity', type='Concept', properties={}),
  Node(id='World War I', type='Event',