In [1]:
!pip install transformers accelerate bitsandbytes>0.37.0 -q

In [2]:
from transformers import GenerationConfig

In [None]:
import torch
from transformers import (
    AutoConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "zjunlp/OneKE"
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(
    model_path, trust_remote_code=True
)

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    config=config,
    device_map="auto",
    quantization_config=quantization_config,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
model.eval()

In [8]:
def rewrite_triplets(triplets):
    relation_wikidata = {
        "main subject": "wdt:P921",
        "genre": None,
        "creator": "wdt:P170",
        "located in": "wdt:P276",
        "made from material": "wdt:P186",
        "location of creation": "wdt:P1071",
        "movement": "wdt:P135",
        "alias": None,
        "description": "schema:description",
        "language": None,
        "inception": "wdt:P571",
        "shown with features": "wdt:P1354",
        "depicts": "wdt:P180",
    }

    reformatted = []
    for relation, duplets in triplets.items():
        try:
            for duplet in duplets:
                reformatted.append(
                    {
                        "relation": {
                            "label": relation,
                            "wikidata_id": relation_wikidata.get(relation, None),
                        },
                        "subject": {"label": duplet["subject"]},
                        "object": {"label": duplet["object"]},
                    }
                )
        except (TypeError, KeyError) as e:
            print('Exception when rewriting triplets:', e)
            continue

    return reformatted

In [None]:
system_prompt = '<<SYS>>\nYou are a helpful assistant.\n<</SYS>>\n\n'
sintruct = "{\"instruction\": \"You are an expert in named entity recognition. Please extract entities that match the schema definition from the input. Return an empty list if the entity type does not exist. Please respond in the format of a JSON string.\", \"schema\": [\"person\", \"organization\", \"else\", \"location\"], \"input\": \"284 Robert Allenby ( Australia ) 69 71 71 73 , Miguel Angel Martin ( Spain ) 75 70 71 68 ( Allenby won at first play-off hole )\"}"
sintruct = '[INST] ' + system_prompt + sintruct + '[/INST]'

input_ids = tokenizer.encode(sintruct, return_tensors="pt").to(device)
input_length = input_ids.size(1)
generation_output = model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_length=1024, max_new_tokens=512, return_dict_in_generate=True))
generation_output = generation_output.sequences[0]
generation_output = generation_output[input_length:]
output = tokenizer.decode(generation_output, skip_special_tokens=True)

print(output)

# Load some dh resources

In [None]:
import requests

##################### Change something here
#master = "https://raw.githubusercontent.com/TIBHannover/ReflectAI-DHd2025/refs/heads/main/data/Man_with_a_Book.txt"
#master = "https://raw.githubusercontent.com/TIBHannover/ReflectAI-DHd2025/refs/heads/main/data/Hunter_Getting_Dress.txt"
#master = "https://raw.githubusercontent.com/TIBHannover/ReflectAI-DHd2025/refs/heads/main/data/Conversion_of_Mary_M.txt"
#master = "https://raw.githubusercontent.com/TIBHannover/ReflectAI-DHd2025/refs/heads/main/data/Perseus_und_Andromed.txt"
master = "https://raw.githubusercontent.com/TIBHannover/ReflectAI-DHd2025/refs/heads/main/data/Portrait_of_Laura_Di.txt"
##################### Change something here

req = requests.get(master)
text = req.text
print(text)

In [6]:
import json

system_prompt = """
<<SYS>>
You are a helpful assistant.
<</SYS>>
"""

prompt = {
    "instruction": "You are an expert specializing in relation extraction. Please extract relationship triples that comply with the schema definition from the input; return an empty list for non-existent relationships. Please respond in the JSON string format.",
    "schema": {
        ##################### Change something here
        "creator": "The name or pseudonym of the painter that created the painting.",
        ##################### Change something here
    },
    "example": [],
    "input": text,
}
"""
[/INST]
"""

prompt = json.dumps(prompt)
sintruct = "[INST] " + system_prompt + prompt + "[/INST]"

In [None]:
input_ids = tokenizer.encode(sintruct, return_tensors="pt").to(device)
input_length = input_ids.size(1)
generation_output = model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_length=1024, max_new_tokens=512, return_dict_in_generate=True))
generation_output = generation_output.sequences[0]
generation_output = generation_output[input_length:]
output = tokenizer.decode(generation_output, skip_special_tokens=True)

print(output)

In [12]:
triplets = rewrite_triplets(json.loads(output))

In [None]:
print(json.dumps(triplets, indent=2))