In [1]:
import sys
import os
import re

# Add the root of the project to sys.path
sys.path.append(os.path.abspath(".."))
from prompts.templates import get_prompt_template

# Now you can import the class
from llm.base import QwenLLM

In [2]:
model = QwenLLM(api_key='sk-2e6cb67b022d4b21a708e2a1e486a240')

In [3]:
import json

input_file = "../data/val.jsonl"

with open(input_file) as f:
    input_rows = [json.loads(line) for line in f]

In [None]:
YES_NO_PROMPTS = {
    "countryLandBordersCountry": "Think step by step: 1) What type of territory is {subject_entity}? 2) Is it surrounded by water or connected to land? 3) If connected to land, does it border other countries? Final answer (yes/no): Does {subject_entity} share land borders?",
    "personHasCityOfDeath": "Think step by step: When was {subject_entity} born? 2) Are they still alive as of my last knowledge update? 2) Based on the above analysis, what is the current status? Final answer (yes/no): Is {subject_entity} dead?",
    "companyTradesAtStockExchange": "Think step by step: 1) What type of entity is {subject_entity}? 2) Is it a publicly traded company? 3) If so, does it trade on a stock exchange? Final answer (yes/no): Does {subject_entity} trade on a stock exchange?",
}

FOLLOW_UP_PROMPTS = {
    "countryLandBordersCountry": "You've said that {subject_entity} has land borders. List the countries that {subject_entity} shares land borders with.",
    "personHasCityOfDeath": "You've said that {subject_entity} is dead. What city did {subject_entity} die in?",
    "companyTradesAtStockExchange": "You've said that {subject_entity} trade on stock exchange. What stock exchange does {subject_entity} trade on?",
}

OTHER_RELATIONS = {
    "hasArea": "Think step by step: 1) What type of entity is {subject_entity}? (country, city, building, geographic feature, etc.) 2) What is the official area of {subject_entity} in square kilometers? 3) Are there any variations in how this area might be reported? Final answer (numbers): What is the area of {subject_entity} in square kilometers? Provide the number as the final thing",
    "hasCapacity": "Think step by step: 1) What type of entity is {subject_entity}? (stadium, building, etc.) 2) What is the seating capacity of {subject_entity}? 3) Has the seating capacity changed over time due to renovations or reconfigurations? Final answer (numbers): What is the capacity of {subject_entity}? Provide the number as the final thing",
    "awardWonBy": "Think step by step: 1) What type of award is {subject_entity}? (annual, one-time, category-specific, etc.)? 2) Are there multiple categories or a single winner for this award? 3) How many people have won this award? Final answer (text): Who has won the {subject_entity}? Provide the answer as a python list of ALL winners", 
}

In [None]:
results = []

for row in input_rows:
    if row['Relation'] in ["countryLandBordersCountry", "personHasCityOfDeath", "companyTradesAtStockExchange"]:
            prompt = get_prompt_template(row, YES_NO_PROMPTS)
            response = model.chat(prompt)
            print(f"SubjectEntity: {row['SubjectEntity']}, Relation: {row['Relation']}, Response: {response.strip().split()[-1].lower()}")
            if response.strip().split()[-1].lower() == "yes":
                prompt = get_prompt_template(row, FOLLOW_UP_PROMPTS)
                response = model.chat(prompt)
                print(f"Follow-up Response: {response}")
                results.append({
                    "SubjectEntity": row['SubjectEntity'],
                    "Relation": row['Relation'],
                    "ObjectEntities": response.split(", ")
                })
            else:
                results.append({
                    "SubjectEntity": row['SubjectEntity'],
                    "Relation": row['Relation'],
                    "ObjectEntities": []
                })
    else:
        if row['Relation'] == "awardWonBy":
            prompt = get_prompt_template(row, OTHER_RELATIONS)
            response = model.chat(prompt)
            matches = re.findall(r"\[(.*?)\]", response, re.DOTALL)
            if matches:
                names_with_quotes = re.findall(r'"[^"]+"', matches[0])
                result = ", ".join(names_with_quotes)
                print(f"SubjectEntity: {row['SubjectEntity']}, Relation: {row['Relation']}, Response: {result}")
                results.append({
                    "SubjectEntity": row['SubjectEntity'],
                    "Relation": row['Relation'],
                    "ObjectEntities": result.split(", ")
                })
            else:
                print(f"SubjectEntity: {row['SubjectEntity']}, Relation: {row['Relation']}, Response: {response}")
                results.append({
                    "SubjectEntity": row['SubjectEntity'],
                    "Relation": row['Relation'],
                    "ObjectEntities": []
                })
        if row['Relation'] in ["hasArea", "hasCapacity"]:
            prompt = get_prompt_template(row, OTHER_RELATIONS)
            response = model.chat(prompt)
            results.append({
                "SubjectEntity": row['SubjectEntity'],
                "Relation": row['Relation'],
                "ObjectEntities": response.strip().split()[-1]
            })
            print(f"SubjectEntity: {row['SubjectEntity']}, Relation: {row['Relation']}, Response: {response.strip().split()[-1]}")

In [None]:
results

In [9]:
import json

with open("output.jsonl", "w", encoding="utf-8") as f:
    for item in results:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")