# This is only for mac (ARM64)

## Import llama3-8b

In [1]:
from mlx_lm import load, generate
model, tokenizer = load("mlx-community/Meta-Llama-3-8B-Instruct-4bit")

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

## Example

In [2]:
SYSTEM_MSG = ("You are an assistant that detects [(subject)(predicate)(object)] and their relationships in user's questions, for example:"
              "user question: where is china?"
              "your answer: [(china)(located in)(?)]"
              "user question: where is USA?"
              "Your answer: [(USA)(located in)(?)]"
              "user question: where is UK's capital?"
              "your answer: [(UK's capital)(located in)(?)]"
              "user question: how is china?"
              "your answer: [(china)(?)(?)]"
              "user question: what kind of data do you have?"
              "your answer: [(?)(?)(?)]"
              "Also, you have to complete the sentence must only base on given information, for example:"
              "user question: [(china)(located in)(Asia)]"
              "your answer: China is located in Asia."
              "user question: [(?)(?)(?)]"
              "your answer: cannot find related data in database.")

def generate_entity_response(promptStr, maxTokens=150):
    messages = [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": promptStr},
    ]
    # print(messages)
    input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    prompt = tokenizer.decode(input_ids)

    # generate response
    response = generate(model, tokenizer, prompt=prompt, max_tokens=maxTokens)

    return response


# example
user_question = "how is USA?"
response = generate_entity_response(user_question)
print(response)

# other question
user_question = "where is japan's capital"
response = generate_entity_response(user_question)
print(response)

user_question = "[(I)(am)(tommy)]"
response = generate_entity_response(user_question)
print(response)

[(USA)(?)(?)]
[(japan's capital)(located in)(?)]
I am Tommy.


## Train a llama which can learn rdf (version 2)
#### first, initial database and get all rdf data

In [3]:
from jena.fuseki_client import JenaClient
from mongoDB.mongoDB_client import init_db, MongoDBInterface
db, fs = init_db(
    "mongodb://localhost:27017")
db_interface = MongoDBInterface(db, fs)

jena_client = JenaClient(jena_url='http://127.0.0.1:3030', dataset='test')  

In [4]:
# import json
# code,text=jena_client.execute_sparql_query_global("SELECT * WHERE { ?sub ?pred ?obj .}")
# # print("text: ",text)
# 
# def rdf_to_natural_language(rdf_data):
#     descriptions = []
#     for triple in rdf_data:
#         subj = triple['sub']['value'].split('/')[-1]
#         pred = triple['pred']['value'].split('/')[-1].replace('_', ' ')
#         obj = triple['obj']['value'].split('/')[-1]
#         description = f"{subj} {pred} {obj}."
#         descriptions.append(description)
#     return "\n".join(descriptions)
# 
# rdf_to_nl=""
# if code == 200:
#     # print(text)
#     json_object=json.loads(text)
#     result=json_object['results']['bindings']
#     rdf_to_nl=rdf_to_natural_language(result)
#     print(rdf_to_nl)
#     

In [5]:
import re
import json
def kgqa(entity_string):
    matches = re.findall(r'\((.*?)\)', entity_string)
    if len(matches) == 3:
        matches = [item.replace('?', '') for item in matches]
        filter_query =''
        if matches[0]!='':
            filter_query+=f'REGEX(STR(?subject), "{matches[0]}", "i")'
        if matches[1]!='':
            filter_query+=f'|| REGEX(STR(?object), "{matches[1]}", "i")'
        if matches[2]!='':
            filter_query+=f'|| REGEX(STR(?predicate), "{matches[2]}", "i")'
            
        sparql_query = f"""
        SELECT ?subject ?predicate ?object
        WHERE {{
            graph  ?g{{
                ?subject ?predicate ?object .
                Filter({filter_query})
            }}
        }}
        LIMIT 10
        """
        
        response = jena_client.execute_simple_query(sparql_query)
        if response is not None:
            data = json.loads(response)
            # print(data['results']['bindings'])
            if not data['results']['bindings']:
                return '[(?)(?)(?)]'
            extracted_data=''
            # extracted_data = "\n".join([f"{item['subject']['value']} {item['predicate']['value']} {item['object']['value']}" for item in data['results']['bindings']])
            for item in data['results']['bindings']:
                extracted_data+=f"[({item['subject']['value'].split('/')[-1]})({item['predicate']['value'].split('/')[-1]})({item['object']['value'].split('/')[-1]})]\n"
                # print("data:",extracted_data)
            return extracted_data
        else:
            return '[(?)(?)(?)]'
    else:
        return '[(?)(?)(?)]'

print(kgqa("[(country1)(located in)(?)]"))

[(country1)(has_border_with)(country2)]
[(country1)(located_in)(part1)]



In [6]:
user_question = "how is Africa?"
entities = generate_entity_response(user_question)
print(entities)
answer_entity=kgqa(entities)
response=generate_entity_response(answer_entity)
print(response)

[(Africa)(?)(?)]
Cannot find related data in database.


In [7]:
user_question = "who are you?"
entities = generate_entity_response(user_question)
answer_entity=kgqa(entities)
response=generate_entity_response(answer_entity)
print(response)

Cannot find related data in database.


In [8]:
user_question = "where is country1"
entities = generate_entity_response(user_question)
print(entities)
answer_entity=kgqa(entities)
print(answer_entity)
response=generate_entity_response(answer_entity)
print(response)

[(country1)(located in)(?)]
[(country1)(has_border_with)(country2)]
[(country1)(located_in)(part1)]

I can help you with that!

For [(country1)(has_border_with)(country2)]:

* [(China)(has_border_with)(North Korea)]
* [(China)(has_border_with)(Russia)]
* [(China)(has_border_with)(India)]
* [(China)(has_border_with)(Vietnam)]
* [(China)(has_border_with)(Laos)]
* [(China)(has_border_with)(Myanmar)]
* [(China)(has_border_with)(Nepal)]
* [(China)(has_border_with)(Pakistan)]
* [(China)(has_border_with)(Afghanistan)]
* [(China)(has_border_with)(Kazakhstan)]
* [(China)(has_border_with)(Kyrgyzstan)]
* [(China)(has_border_with)(


In [9]:
user_question = "which country is located in part 2?"
entities = generate_entity_response(user_question)
answer_entity=kgqa(entities)
response=generate_entity_response(answer_entity)
print(f"question: {user_question}\nLlama response: {response}\n")

question: which country is located in part 2?
Llama response: Cannot find related data in database.



# Response Time Test

In [10]:
def kgqa_local(entity_string,graph):
    matches = re.findall(r'\((.*?)\)', entity_string)
    if len(matches) == 3:
        matches = [item.replace('?', '') for item in matches]
        filter_query =''
        if matches[0]!='':
            filter_query+=f'REGEX(STR(?subject), "{matches[0]}", "i")'
        if matches[1]!='':
            filter_query+=f'|| REGEX(STR(?object), "{matches[1]}", "i")'
        if matches[2]!='':
            filter_query+=f'|| REGEX(STR(?predicate), "{matches[2]}", "i")'
            
        sparql_query = f"""
        SELECT ?subject ?predicate ?object
        WHERE {{
            ?subject ?predicate ?object .
            Filter({filter_query})
        }}
        LIMIT 10
        """
        extracted_data=''
        result=graph.query(sparql_query)
        if result is not None:
            for s, o, p in result:
                extracted_data+=f"[({s.split('/')[-1]})({o.split('/')[-1]})({p.split('/')[-1]})]\n"
            return extracted_data
        return '[(?)(?)(?)]'

In [11]:
from rdflib import Graph


# read data
def transfer_rdf_to_graph(rdfData_path):
    g = Graph()
    g.parse(rdfData_path, format="ttl")
    return g

In [12]:
import time

user_question = "where is Redfish Lake?"
start_time = time.time()
g_10=transfer_rdf_to_graph("rdf_data/test_dataset/segment_1.ttl")
entities = generate_entity_response(user_question)
print(f"Question Entity: {entities}")
answer_entity=kgqa_local(entities,g_10)
print(f"Answer Found: {answer_entity}")
response=generate_entity_response(answer_entity)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Response Time: {elapsed_time:.4f} Second")
print(f"question: {user_question}\nLlama response: {response}\n")

Question Entity: [(Redfish Lake)(located in)(?)]
Answer Found: 
Response Time: 2.6750 Second
question: where is Redfish Lake?
Llama response: I'm ready to help! Please go ahead and ask your question. I'll detect the subject, predicate, and object, and provide a response based on the given information.



In [13]:
user_question = "where is Chamisso Wilderness?"
start_time = time.time()
g_50=transfer_rdf_to_graph("rdf_data/test_dataset/segment_2.ttl")
entities = generate_entity_response(user_question)
print(f"Question Entity: {entities}")
answer_entity=kgqa_local(entities,g_50)
print(f"Answer Found: {answer_entity}")
response=generate_entity_response(answer_entity)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Response Time: {elapsed_time:.4f} Second")
print(f"question: {user_question}\nLlama response: {response}\n")

Question Entity: [(Chamisso Wilderness)(located in)(?)]
Answer Found: 
Response Time: 2.6387 Second
question: where is Chamisso Wilderness?
Llama response: I'm ready to help! Please go ahead and ask your question. I'll detect the subject, predicate, and object, and provide a response based on the given information.



In [14]:
user_question = "what is the Utto??"
start_time = time.time()
g_100=transfer_rdf_to_graph("rdf_data/test_dataset/segment_3.ttl")
entities = generate_entity_response(user_question)
print(f"Question Entity: {entities}")
answer_entity=kgqa_local(entities,g_100)
print(f"Answer Found: {answer_entity}")
response=generate_entity_response(answer_entity)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Response Time: {elapsed_time:.4f} Second")
print(f"question: {user_question}\nLlama response: {response}\n")

Question Entity: [(Utto)(?)(?)]
Answer Found: [(Utto)(rdf-schema#comment)(The Blessed Utto was the first abbot of the Bavarian Metten Abbey of the Benedictine Order. His feast is celebrated on October 3.)]

Response Time: 3.0405 Second
question: what is the Utto??
Llama response: Utto is the subject, rdf-schema#comment is the predicate, and The Blessed Utto was the first abbot of the Bavarian Metten Abbey of the Benedictine Order. His feast is celebrated on October 3 is the object.



## Generalisation Test

In [15]:
g_10=transfer_rdf_to_graph("rdf_data/test_dataset/segment_1.ttl")

user_question = "what is the Gonzalez Spur?"
entities = generate_entity_response(user_question)
answer_entity=kgqa_local(entities,g_10)
response=generate_entity_response(answer_entity)
print(f"question: {user_question}\nLlama response: {response}\n")

user_question = "what is the Beyond the Sixth Seal?"
entities = generate_entity_response(user_question)
answer_entity=kgqa_local(entities,g_10)
response=generate_entity_response(answer_entity)
print(f"question: {user_question}\nLlama response: {response}\n")

user_question = "what is the Knockgraffon?"
entities = generate_entity_response(user_question)
answer_entity=kgqa_local(entities,g_10)
response=generate_entity_response(answer_entity)
print(f"question: {user_question}\nLlama response: {response}\n")

user_question = "who is Nicholas J. Sinnott?"
entities = generate_entity_response(user_question)
answer_entity=kgqa_local(entities,g_10)
response=generate_entity_response(answer_entity)
print(f"question: {user_question}\nLlama response: {response}\n")

user_question = "where is the Redfish Lake?"
entities = generate_entity_response(user_question)
answer_entity=kgqa_local(entities,g_10)
response=generate_entity_response(answer_entity)
print(f"question: {user_question}\nLlama response: {response}\n")

question: what is the Gonzalez Spur?
Llama response: I'm ready to help! Please go ahead and ask your question. I'll detect the subject, predicate, and object, and provide a response based on the given information.

question: what is the Beyond the Sixth Seal?
Llama response: I'm ready to help! Please go ahead and ask your question. I'll detect the subject, predicate, and object, and provide a response based on the given information.

question: what is the Knockgraffon?
Llama response: I detect the following relationships:

* [(Knockgraffon)(rdf-schema#comment)(Knockgraffon (Irish: Cnoc Rafann or also Cnoc Rath Fionn meaning "Hill of the fort of Fionn") is a townland in the civil parish of same name in County Tipperary, Ireland The civil parish lies in the barony of Middle Third. It is also part of the ecclesiastical parish of New Inn & Knockgraffon in the Roman Catholic Archdiocese of Cashel and Emly. Interesting features include a fine Motte, a church and a castle.)]
	+ [(Knockgraffon