# This is only for mac (ARM64)

## Import llama3-8b

In [1]:
from mlx_lm import load, generate
model, tokenizer = load("mlx-community/Meta-Llama-3-8B-Instruct-4bit")

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

## Example

In [2]:
SYSTEM_MSG = ("You are an assistant that detects [(subject)(predicate)(object)] and their relationships in user's questions, for example:"
              "user question: where is china?"
              "your answer: [(china)(located in)(?)]"
              "user question: where is USA?"
              "Your answer: [(USA)(located in)(?)]"
              "user question: where is UK's capital?"
              "your answer: [(UK's capital)(located in)(?)]"
              "user question: how is china?"
              "your answer: [(china)(?)(?)]"
              "user question: what kind of data do you have?"
              "your answer: [(?)(?)(?)]"
              "Also, you have to complete the sentence must only base on given information, for example:"
              "user question: [(china)(located in)(Asia)]"
              "your answer: China is located in Asia."
              "user question: [(?)(?)(?)]"
              "your answer: cannot find related data in database.")

def generate_entity_response(promptStr, maxTokens=150):
    messages = [
        {"role": "system", "content": SYSTEM_MSG},
        {"role": "user", "content": promptStr},
    ]
    # print(messages)
    # 将消息应用于聊天模板并生成输入ID
    input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    prompt = tokenizer.decode(input_ids)

    # 生成响应
    response = generate(model, tokenizer, prompt=prompt, max_tokens=maxTokens)

    return response


# example
user_question = "how is USA?"
response = generate_entity_response(user_question)
print(response)

# other question
user_question = "where is japan's capital"
response = generate_entity_response(user_question)
print(response)

user_question = "[(I)(am)(tommy)]"
response = generate_entity_response(user_question)
print(response)

[(USA)(?)(?)]
[(japan's capital)(located in)(?)]
I am Tommy.


## Train a llama which can learn rdf (version 2)
#### first, initial database and get all rdf data

In [3]:
from jena.fuseki_client import JenaClient
from mongoDB.mongoDB_client import init_db, MongoDBInterface
db, fs = init_db(
    "mongodb://localhost:27017")
db_interface = MongoDBInterface(db, fs)

jena_client = JenaClient(jena_url='http://127.0.0.1:3030', dataset='test')  

In [4]:
# import json
# code,text=jena_client.execute_sparql_query_global("SELECT * WHERE { ?sub ?pred ?obj .}")
# # print("text: ",text)
# 
# def rdf_to_natural_language(rdf_data):
#     descriptions = []
#     for triple in rdf_data:
#         subj = triple['sub']['value'].split('/')[-1]
#         pred = triple['pred']['value'].split('/')[-1].replace('_', ' ')
#         obj = triple['obj']['value'].split('/')[-1]
#         description = f"{subj} {pred} {obj}."
#         descriptions.append(description)
#     return "\n".join(descriptions)
# 
# rdf_to_nl=""
# if code == 200:
#     # print(text)
#     json_object=json.loads(text)
#     result=json_object['results']['bindings']
#     rdf_to_nl=rdf_to_natural_language(result)
#     print(rdf_to_nl)
#     

In [5]:
import re
import json
def kgqa(entity_string):
    matches = re.findall(r'\((.*?)\)', entity_string)
    if len(matches) == 3:
        matches = [item.replace('?', '') for item in matches]
        filter_query =''
        if matches[0]!='':
            filter_query+=f'REGEX(STR(?subject), "{matches[0]}", "i")'
        if matches[1]!='':
            filter_query+=f'|| REGEX(STR(?object), "{matches[1]}", "i")'
        if matches[2]!='':
            filter_query+=f'|| REGEX(STR(?predicate), "{matches[2]}", "i")'
            
        sparql_query = f"""
        SELECT ?subject ?predicate ?object
        WHERE {{
            graph  ?g{{
                ?subject ?predicate ?object .
                Filter({filter_query})
            }}
        }}
        LIMIT 10
        """
        
        response = jena_client.execute_simple_query(sparql_query)
        if response is not None:
            data = json.loads(response)
            # print(data['results']['bindings'])
            if not data['results']['bindings']:
                return '[(?)(?)(?)]'
            extracted_data=''
            # 简化数据提取过程
            # extracted_data = "\n".join([f"{item['subject']['value']} {item['predicate']['value']} {item['object']['value']}" for item in data['results']['bindings']])
            for item in data['results']['bindings']:
                extracted_data+=f"[({item['subject']['value'].split('/')[-1]})({item['predicate']['value'].split('/')[-1]})({item['object']['value'].split('/')[-1]})]\n"
                # print("data:",extracted_data)
            return extracted_data
        else:
            return '[(?)(?)(?)]'
    else:
        return '[(?)(?)(?)]'
    
print(kgqa("[(country1)(located in)(?)]"))

[(country1)(has_border_with)(country2)]
[(country1)(located_in)(part1)]



In [6]:
user_question = "how is Africa?"
entities = generate_entity_response(user_question)
print(entities)
answer_entity=kgqa(entities)
response=generate_entity_response(answer_entity)
print(response)

[(Africa)(?)(?)]
Cannot find related data in database.


In [7]:
user_question = "who are you?"
entities = generate_entity_response(user_question)
answer_entity=kgqa(entities)
response=generate_entity_response(answer_entity)
print(response)

Cannot find related data in database.


In [8]:
user_question = "where is country1"
entities = generate_entity_response(user_question)
print(entities)
answer_entity=kgqa(entities)
print(answer_entity)
response=generate_entity_response(answer_entity)
print(response)

[(country1)(located in)(?)]
[(country1)(has_border_with)(country2)]
[(country1)(located_in)(part1)]

I can help you with that!

For [(country1)(has_border_with)(country2)]:

* [(China)(has_border_with)(North Korea)]
* [(China)(has_border_with)(Russia)]
* [(China)(has_border_with)(India)]
* [(China)(has_border_with)(Vietnam)]
* [(China)(has_border_with)(Laos)]
* [(China)(has_border_with)(Myanmar)]
* [(China)(has_border_with)(Nepal)]
* [(China)(has_border_with)(Pakistan)]
* [(China)(has_border_with)(Afghanistan)]
* [(China)(has_border_with)(Kazakhstan)]
* [(China)(has_border_with)(Kyrgyzstan)]
* [(China)(has_border_with)(


In [9]:
user_question = "which country is located in part 2?"
entities = generate_entity_response(user_question)
answer_entity=kgqa(entities)
print(answer_entity)
response=generate_entity_response(answer_entity)
print(response)

[(?)(?)(?)]
Cannot find related data in database.
