In [1]:
from langchain.llms import Ollama

In [2]:
# Configuration
LOCAL_MODEL = "llama3:latest"
RDF_FILEPATH = r"D:\New folder\RDF_Extractor_Llama3\Barack-Obama.txt"  # Update this with your Text file path

In [3]:
# Initialize the language model
llm = Ollama(base_url="http://localhost:11434", model=LOCAL_MODEL, verbose=True)

In [5]:
# Read RDF file content
with open(RDF_FILEPATH, 'r') as file:
    rdf_data = file.read()

# Prompt Construction
prompt = f"""
Here is the RDF data:

{rdf_data}

Please parse and extract RDF-like triplets from the text. For each identified triplet, provide the output as a list of tuples.

Each tuple should follow the structure:
("Subject", "Predicate", "Object")

Guidelines:
1. Ensure that each triplet has exactly three components: a subject, a predicate, and an object.
2. If the text mentions a fact but lacks one of these components, reformulate it or omit it to ensure the triplet is complete.
3. Handle various types of information, including names, locations, dates, and numerical values, in a similar format.

Examples:
Text: "Obama was born in Honolulu, Hawaii. He graduated from Columbia University in 1983."

Output:
extracted_triplets = [
    ("Barack Hussein Obama II", "was born in", "Honolulu, Hawaii"),
    ("Barack Hussein Obama II", "graduated from", "Columbia University"),
    ("Barack Hussein Obama II", "graduated in", "1983"),
]

Text: "Obama was the first African-American president in U.S. history."

Output:
extracted_triplets = [
    ("Barack Hussein Obama II", "was the first African-American president in", "U.S. history"),
]

Ensure that each extracted triplet is fully formed and logically coherent.
"""

print(f"Prompt:\n{prompt}")


Prompt:

Here is the RDF data:

Barack Hussein Obama II[a] (born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. As a member of the Democratic Party, he was the first African-American president in U.S. history. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004.
Obama was born in Honolulu, Hawaii. He graduated from Columbia University in 1983 with a Bachelor of Arts degree in political science and later worked as a community organizer in Chicago. In 1988, Obama enrolled in Harvard Law School, where he was the first black president of the Harvard Law Review. He became a civil rights attorney and an academic, teaching constitutional law at the University of Chicago Law School from 1992 to 2004. He also went into elective politics; Obama represented the 13th district in the Illinois Senate from 1997 until 2004, when he successfully ran for th

In [None]:
# Generate output
response = llm.generate([prompt], max_length=1600)

In [7]:
# Extract generated text
if response and response.generations:
    generated_text = response.generations[0][0].text
else:
    generated_text = "No text generated."

In [8]:
# Print generated summary
print(f"Generated Summary:\n{generated_text}")

Generated Summary:
Here are the extracted triplets:

```
extracted_triplets = [
    ("Barack Hussein Obama II", "was born on", "August 4, 1961"),
    ("Barack Hussein Obama II", "served as", "44th president of the United States"),
    ("Barack Hussein Obama II", "belonged to", "Democratic Party"),
    ("Barack Hussein Obama II", "was the first", "African-American president in U.S. history"),
    ("Barack Hussein Obama II", "previously served as", "U.S. senator representing Illinois"),
    ("Barack Hussein Obama II", "previously served as", "Illinois state senator"),
    ("Barack Hussein Obama II", "graduated from", "Columbia University"),
    ("Barack Hussein Obama II", "graduated in", "1983"),
    ("Barack Hussein Obama II", "worked as", "community organizer in Chicago"),
    ("Barack Hussein Obama II", "enrolled in", "Harvard Law School"),
    ("Barack Hussein Obama II", "was the first black president of", "Harvard Law Review"),
    ("Barack Hussein Obama II", "became", "civil rights

In [24]:
# Extracts a list of triplets from the model generated output (text) using regular expressions and evaluates it.
import re

def extract_triplets(text):
    pattern = r'extracted_triplets\s*=\s*\[(.*?)\]'
    match = re.search(pattern, text, re.DOTALL)
    
    if match:
        triplets_str = match.group(1)
        extracted_triplets = eval(f'[{triplets_str}]')
        
        return extracted_triplets
    else:
        return None



In [25]:
# Extract the triplets
Model_Generated_triplets = extract_triplets(generated_text)
print(Model_Generated_triplets)

[('Barack Hussein Obama II', 'was born on', 'August 4, 1961'), ('Barack Hussein Obama II', 'served as', '44th president of the United States'), ('Barack Hussein Obama II', 'belonged to', 'Democratic Party'), ('Barack Hussein Obama II', 'was the first', 'African-American president in U.S. history'), ('Barack Hussein Obama II', 'previously served as', 'U.S. senator representing Illinois'), ('Barack Hussein Obama II', 'previously served as', 'Illinois state senator'), ('Barack Hussein Obama II', 'graduated from', 'Columbia University'), ('Barack Hussein Obama II', 'graduated in', '1983'), ('Barack Hussein Obama II', 'worked as', 'community organizer in Chicago'), ('Barack Hussein Obama II', 'enrolled in', 'Harvard Law School'), ('Barack Hussein Obama II', 'was the first black president of', 'Harvard Law Review'), ('Barack Hussein Obama II', 'became', 'civil rights attorney and an academic'), ('Barack Hussein Obama II', 'taught constitutional law at', 'University of Chicago Law School'), (

In [26]:
import requests
import json

In [27]:
 # Sends a POST request to the Falcon 2.0 API with the provided text and handles any request errors.
def falcon_request(text):
    url = "https://labs.tib.eu/falcon/falcon2/api?mode=long"
    headers = {"Content-Type": "application/json"}
    data = {"text": text}
    try:
        response = requests.post(url, headers=headers, data=json.dumps(data))
        response.raise_for_status()  # Raise an error for bad HTTP status codes
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None


In [28]:
# Extracts the URI from the response of Falcon 2.0 API
def extract_uri(response):
    if response:
        # Try to get the URI from entities_wikidata first
        if 'entities_wikidata' in response and response['entities_wikidata']:
            return response['entities_wikidata'][0]['URI']
        # If not found, try to get the URI from relations_wikidata
        elif 'relations_wikidata' in response and response['relations_wikidata']:
            return response['relations_wikidata'][0]['URI']
    return None

In [29]:
# Fetches and extracts URIs for the subject, predicate, and object of the given triplet.
def process_triplet(triplet):
    subject_text, predicate_text, object_text = triplet

    # Fetch URIs for subject, predicate, and object
    subject_response = falcon_request(subject_text)
    predicate_response = falcon_request(predicate_text)
    object_response = falcon_request(object_text)

    # Extract URIs using the extract_uri function
    subject_uri = extract_uri(subject_response)
    predicate_uri = extract_uri(predicate_response)
    object_uri = extract_uri(object_response)

    return {
        "subject": subject_uri,
        "predicate": predicate_uri,
        "object": object_uri
    }

In [30]:
triplet_uris = []

for i, triplet in enumerate(triplets, start=1):
    uris = process_triplet(triplet)
    triplet_uris.append(uris)

    # Print the results in the desired format
    print(f"Triplet {i}: {triplet}")
    print(f"Subject: {uris['subject']}")
    print(f"Predicate: {uris['predicate']}")
    print(f"Object: {uris['object']}")
    print()

Triplet 1: ('Barack Hussein Obama II', 'was born on', 'August 4, 1961')
Subject: http://www.wikidata.org/entity/Q76
Predicate: http://www.wikidata.org/entity/P1464
Object: http://www.wikidata.org/entity/Q69285218

Triplet 2: ('Barack Hussein Obama II', 'served as', '44th president of the United States')
Subject: http://www.wikidata.org/entity/Q76
Predicate: http://www.wikidata.org/entity/P179
Object: http://www.wikidata.org/entity/Q17595309

Triplet 3: ('Barack Hussein Obama II', 'belonged to', 'Democratic Party')
Subject: http://www.wikidata.org/entity/Q76
Predicate: http://www.wikidata.org/entity/P1001
Object: http://www.wikidata.org/entity/Q47729

Triplet 4: ('Barack Hussein Obama II', 'was the first', 'African-American president in U.S. history')
Subject: http://www.wikidata.org/entity/Q76
Predicate: None
Object: http://www.wikidata.org/entity/Q429311

Triplet 5: ('Barack Hussein Obama II', 'previously served as', 'U.S. senator representing Illinois')
Subject: http://www.wikidata.o