In [1]:
# Import necessary modules
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import re
from typing import List

from transformers import Pipeline, pipeline


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEFAULT_SYSTEM_PROMPT = 'You are a helpful information extraction system.'

DEFAULT_FIRST_PROMPT = 'Given a passage, your task is to extract all entities and identify their entity types. The output should be in a list of tuples of the following format: [("entity 1", "type of entity 1"), ... ].\n\nPassage: {passage}'

DEFAULT_QUESTION_PROMPT = 'What describes "{entity}" in the text?'


In [3]:
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct-AWQ" # "Qwen/Qwen2.5-1.5B-Instruct"
TOKENIZER = MODEL_NAME

In [4]:
from tqdm import tqdm


class QwenPipelineNER(Pipeline):
    messages: List[dict] = []

    def __init__(self, *args, llm_pipe, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.llm_pipe = llm_pipe
        
    
    def _sanitize_parameters(self, *, entities={}, first_prompt=DEFAULT_FIRST_PROMPT, system_prompt=DEFAULT_SYSTEM_PROMPT,question_prompt=DEFAULT_QUESTION_PROMPT, start_new_dialog=False, **kwargs):
        preprocess_kwargs = {'system_prompt':system_prompt,
                             'first_prompt': first_prompt,
                             'start_new_dialog': start_new_dialog,
                            }
        forward_kwargs = {'question_prompt':question_prompt, 'entities':entities}
        postprocess_kwargs = {}
        return preprocess_kwargs, forward_kwargs, postprocess_kwargs

    def preprocess(self, inputs, system_prompt, first_prompt, start_new_dialog):
        if start_new_dialog:
            self.input_text = inputs
            self.messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": first_prompt.format_map({'passage': inputs}) }
            ]
            response_message = self.llm_pipe(self.messages, max_new_tokens=512)[0]["generated_text"][-1]
            self.messages.append(response_message)
        return {"model_input": inputs}

    def _forward(self, model_inputs, question_prompt=DEFAULT_QUESTION_PROMPT, entities={}):
        results = {}
        for ent in entities:
            prompt = question_prompt.format_map({'entity': ent})
            self.messages.append({"role": "user", "content": prompt})
            response_message = self.llm_pipe(self.messages, max_new_tokens=512)[0]["generated_text"][-1]
            self.messages.append(response_message)
            response_text = response_message['content']
            print(f"OUTPUT:\n \
                        {response_text}\n \
                        TEXT:\n \
                        {self.input_text}\n \
                        ENTITY:\n \
                        {ent}\n \
                        PROMPT:\n \
                        {prompt}")
            try:
                entries_list = json.loads(response_text)
            except Exception as e:
                entries_list = []
                print(f"CANT PARSE LLM OUTPUT AS JSON: {e}\n \
                        OUTPUT:\n \
                        {response_text}\n \
                        TEXT:\n \
                        {self.input_text}\n \
                        ENTITY:\n \
                        {ent}\n \
                        PROMPT:\n \
                        {prompt}")
            finally:
                results[ent] = entries_list
        return results

    def postprocess(self, model_outputs):
        list_result = []
        text = self.input_text
        for entity in tqdm(model_outputs):
            entries = model_outputs.get(entity)
            if entries is not None:
                for entry in entries:
                    entry_len = len(entry)
                    for occurency in re.finditer(entry, text):
                        list_result.append({'entity': entity, 'word': entry, 'start': occurency.start(), 'end': occurency.start() + entry_len - 1})
        print('\n'.join(map(str, self.messages)))
        return list_result

In [5]:
pipe = pipeline("text-generation", MODEL_NAME, torch_dtype="auto", device_map="auto")

ImportError: Loading an AWQ quantized model requires auto-awq library (`pip install autoawq`)

In [None]:
ner_pipe = QwenPipelineNER(pipe.model, pipe.tokenizer, llm_pipe=pipe)

In [None]:
res = ner_pipe("I am living in New York city. It is located right next to Los Angeles.", start_new_dialog=True, entities={"LOC"})
res

OUTPUT:
                         In the context provided by the passage, "LOC" stands for "Location". This refers to places or geographical locations such as cities, states, countries, etc. In this case, both "New York City" and "Los Angeles" are described using the "LOC" entity type because they represent specific locations within the United States.
                         TEXT:
                         I am living in New York city. It is located right next to Los Angeles.
                         ENTITY:
                         LOC
                         PROMPT:
                         What describes "LOC" in the text?
CANT PARSE LLM OUTPUT AS JSON: Expecting value: line 1 column 1 (char 0)
                         OUTPUT:
                         In the context provided by the passage, "LOC" stands for "Location". This refers to places or geographical locations such as cities, states, countries, etc. In this case, both "New York City" and "Los Angeles" are described using the "

100%|██████████| 1/1 [00:00<00:00, 18808.54it/s]

{'role': 'system', 'content': 'You are a helpful information extraction system.'}
{'role': 'user', 'content': 'Given a passage, your task is to extract all entities and identify their entity types. The output should be in a list of tuples of the following format: [("entity 1", "type of entity 1"), ... ].\n\nPassage: I am living in New York city. It is located right next to Los Angeles.'}
{'role': 'assistant', 'content': 'Here\'s the extracted information:\n\n[("New York city", "LOC"), ("Los Angeles", "LOC")]'}
{'role': 'user', 'content': 'What describes "LOC" in the text?'}
{'role': 'assistant', 'content': 'In the context provided by the passage, "LOC" stands for "Location". This refers to places or geographical locations such as cities, states, countries, etc. In this case, both "New York City" and "Los Angeles" are described using the "LOC" entity type because they represent specific locations within the United States.'}





[]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
