In [2]:
import json
import os
import pickle

import pandas as pd
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
from openai import OpenAI

# from LLM4split.batch_utils import *


In [3]:
def gen_prompt(instruction, question, dataset_name="multiqa"):
    prompt = []
    prompt.append({"role": "system", "content": instruction})

    if dataset_name == "multiqa":  # 分解提示
        prompt.append(
            {
                "role": "system",
                "content": "Instruction: For the query below, split it into semantically aligned sub-queries, separated by |, and only output the sub-queries. Do not include any other information and explanation.",
            }
        )
        prompt.append(
            {"role": "user", "content": "What color is the Santa Anita Park logo?"}
        )
        prompt.append({"role": "assistant", "content": "Santa Anita Park| logo"})

    prompt.append({"role": "user", "content": question})
    return prompt

In [4]:
# tokenizer = AutoTokenizer.from_pretrained("/home/icml01/Models/Llama-3.1-8B-Instruct")
instruction = "Design an omnichannel query decomposition framework that utilizes real-time machine learning and advanced AI techniques to dynamically optimize solution accuracy, efficiency, scalability, and maintainability across complex, dynamic, and evolving queries, while ensuring logical soundness, semantic coherence, and actionable insights through a closed-loop feedback system that continuously adapts to changing query intent, constraints, and context."

In [5]:
df = pd.read_csv("/root/autodl-tmp/multi_rag/RAG/opro/data/multiqa/multiqa_test.csv")

In [6]:
query_list = list(df["query"])

In [7]:
chats = []
for idx in range(len(df)):
    chat = gen_prompt(instruction, df["query"][idx])
    chats.append(
        # tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
        chat
    )
    # chats.append(chat)

In [8]:
chats

[[{'role': 'system',
   'content': 'Design an omnichannel query decomposition framework that utilizes real-time machine learning and advanced AI techniques to dynamically optimize solution accuracy, efficiency, scalability, and maintainability across complex, dynamic, and evolving queries, while ensuring logical soundness, semantic coherence, and actionable insights through a closed-loop feedback system that continuously adapts to changing query intent, constraints, and context.'},
  {'role': 'system',
   'content': 'Instruction: For the query below, split it into semantically aligned sub-queries, separated by |, and only output the sub-queries. Do not include any other information and explanation.'},
  {'role': 'user', 'content': 'What color is the Santa Anita Park logo?'},
  {'role': 'assistant', 'content': 'Santa Anita Park| logo'},
  {'role': 'user', 'content': 'What animals race in the Kentucky Derby?'}],
 [{'role': 'system',
   'content': 'Design an omnichannel query decompositio

In [9]:
call_llm(query_list, chats)

['Kentucky Derby| animals that race',
 'Santa Anita Park logo| color',
 'Blue Eyes (film)| poster| man holding',
 'Felicia Day| object in front of her face',
 'Rachel Dratch| clothing color',
 'Alyson Stoner| hair color',
 'Tampa, Florida| amusement park| collage',
 'Parkersburg, West Virginia| river| water| what is going across',
 'Mississippi flag| colors',
 'Page Municipal Airport| arid climate| region',
 'Point Skyhawks| logo| colors',
 'Giorgio Armani| hair color',
 'Arjen Robben| sleeve| ball',
 'Esporte Clube Santo Andre| logo| top',
 'Black Hawk Down (film)| poster| man| holding in his right hand',
 'Easy Wheels| poster| motorcycle| adult',
 'Glen E. Friedman| jacket| zipped or unzipped',
 'Incredible Bongo Band cover| hitting the bongos',
 'James Brown| hands position',
 'The Marcels| picture| number of people',
 'P. Susheela| earrings style',
 'Plamen Iliev (footballer, born 1991)| wearing green',
 'FC Bolosani| logo| center| type of ball',
 'Kicking and Screaming (1995 film)

In [23]:
import os
import openai
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
# 运行此API配置，需要将目录中的.env中api_key替换为自己的
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file


from concurrent.futures import ProcessPoolExecutor, as_completed
import time
import collections
from langchain.schema import HumanMessage, SystemMessage


chat = ChatOpenAI(
        model='deepseek-chat', 
        openai_api_key="sk-7cf97c394fe34640a8872d9c4132a6c7", 
        openai_api_base='https://api.deepseek.com/v1', 
        temperature=0, 
        max_tokens=512
        )


# 定义预测函数
def predict(params):
    query, input = params
    res = chat.invoke(input)
    res = res.content

    return query, res

start_time = time.time()


with ProcessPoolExecutor(max_workers=10) as executor:

    # 异步调用（多进程并发执行）
    futures = []
    for idx in range(len(query_list)):
        query = query_list[idx]
        prompt = chats[idx]
        job = executor.submit(predict, (query, prompt))
        futures.append(job)


    query2res = collections.defaultdict(int) # 因为异步等待结果，返回的顺序是不定的，所以记录一下进程和输入数据的对应
    # 异步等待结果（返回顺序和原数据顺序可能不一致） ，直接predict函数里返回结果？
    for job in as_completed(futures):
        query, res = job.result(timeout=None)  # 默认timeout=None，不限时间等待结果
        query2res[query] = res
        # time.sleep(1) 


end_time = time.time()
total_run_time = round(end_time-start_time, 3)
print('Total_run_time: {} s'.format(total_run_time))
print(query2res)


Total_run_time: 30.542 s
defaultdict(<class 'int'>, {'How many colors are on the Mississippi flag?': 'Mississippi flag| colors', 'What color is the Santa Anita Park logo?': 'Santa Anita Park logo| color', 'Is there an amusement park featured in the collage of Tampa, Florida?': 'amusement park| collage| Tampa, Florida', 'What color is Rachel Dratch wearing?': 'Rachel Dratch| clothing color', 'What color hair does alyson stoner have?': 'Alyson Stoner| hair color', 'What is going across the river and in the water on Parkersburg, West Virginia?': 'Parkersburg, West Virginia| river| water| what is going across', 'What is the man holding in the poster for Blue Eyes (film)?': 'Blue Eyes (film)| poster| man holding', 'What animals race in the Kentucky Derby?': 'Kentucky Derby| animals that race', "What object is in front of Felicia Day's face?": 'Felicia Day| object in front of her face', "What color is Giorgio Armani's hair?": 'Giorgio Armani| hair color', 'What type of ball is shown on the s

In [10]:
llm = LLM(
    model="/root/autodl-tmp/Models/Llama-3.1-8B-Instruct",
    tensor_parallel_size=2,
    max_model_len=4096,
    gpu_memory_utilization=0.4,
)

INFO 01-23 11:22:55 config.py:905] Defaulting to use mp for distributed inference
INFO 01-23 11:22:55 llm_engine.py:237] Initializing an LLM engine (v0.6.3.post1) with config: model='/root/autodl-tmp/Models/Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='/root/autodl-tmp/Models/Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


[1;36m(VllmWorkerProcess pid=7039)[0;0m INFO 01-23 11:22:58 model_runner.py:1067] Loading model weights took 7.5122 GB
INFO 01-23 11:22:58 model_runner.py:1067] Loading model weights took 7.5122 GB
INFO 01-23 11:22:59 distributed_gpu_executor.py:57] # GPU blocks: 3509, # CPU blocks: 4096
INFO 01-23 11:22:59 distributed_gpu_executor.py:61] Maximum concurrency for 4096 tokens per request: 13.71x
INFO 01-23 11:23:00 model_runner.py:1395] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 01-23 11:23:00 model_runner.py:1399] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
[1;36m(VllmWorkerProcess pid=7039)[0;0m INFO 01-23 11:23:00 model_runner.py:1395

In [11]:
sampling_params = SamplingParams(temperature=0, max_tokens=20)

outputs = llm.generate(chats, sampling_params)

TypeError: inputs must be a string, TextPrompt, or TokensPrompt

In [9]:
vllm_outputs = []
for output in outputs:
    generated_text = [output.outputs[0].text]
    vllm_outputs.append(generated_text)
vllm_outputs

[['animals| race | in | the | Kentucky | Derby'],
 ['1. What is the name of the Santa Anita Park logo?\n| \n2. What is the'],
 ['poster for Blue Eyes | man | holding | in | the | poster | Blue Eyes (film)'],
 ["object| in | front of | Felicia Day's | face"],
 ['Rachel Dratch| color| wearing'],
 ["Alyson Stoner's hair color|"],
 ['Is there an amusement park| featured in the collage of Tampa, Florida'],
 ['What is going across the river| what is in the water| Parkersburg, West Virginia'],
 ['Mississippi flag| number of colors'],
 ['Page Municipal Airport| arid climate | region'],
 ['Point Skyhawks| logo | colors | count'],
 ["Giorgio Armani's| hair color"],
 ['type of ball| shown on the sleeve of Arjen Robben'],
 ['Esporte Clube Santo André| logo | top'],
 ['The man on the poster for Black Hawk Down (film)| holding in his right hand'],
 ['Is there an adult| on the motorcycle| in the poster| for Easy Wheels'],
 ["Glen E. Friedman's jacket | zipped | unzipped"],
 ['Incredible Bongo Band| c

In [33]:
import asyncio
import time

from openai import OpenAI

start_time = time.time()


client = OpenAI(
    api_key="EMPTY",
    base_url="http://localhost:8010/v1",
)


def run_request(inputs):
    return client.completions.create(
        model="/root/autodl-tmp/Llama-3.1-8B-Instruct",
        max_tokens=30,
        temperature=0,
        top_p=0.9,
        prompt=chats,
    )


tmp = run_request(chats)

ans = []
for item in tmp.choices:
    ans.append([item.text])
ans

end_time = time.time()
elapsed_time = end_time - start_time
print(f"代码执行时间: {elapsed_time}秒")

代码执行时间: 8.003354549407959秒


In [40]:
ans = []
for item in tmp.choices:
    ans.append([item.text])
ans

[['animals| race | in | the | Kentucky | Derby'],
 ['1. What is the name of the Santa Anita Park logo?\n| \n2. What is the color of the logo of the entity identified in sub'],
 ['poster for Blue Eyes | man | holding | in | the | poster | Blue Eyes (film)'],
 ["object| in | front of | Felicia Day's | face"],
 ['Rachel Dratch| color| wearing'],
 ["Alyson Stoner's hair color|"],
 ['Is there an amusement park| featured in the collage of Tampa, Florida'],
 ['What is going across the river| what is in the water| Parkersburg, West Virginia'],
 ['Mississippi flag| number of colors'],
 ['Page Municipal Airport | arid climate | region'],
 ['Point Skyhawks| logo | colors | count'],
 ["Giorgio Armani's| hair color"],
 ['type of ball| shown on the sleeve of Arjen Robben'],
 ['Esporte Clube Santo André| logo | top'],
 ['The man on the poster for Black Hawk Down (film)| holding in his right hand'],
 ['Is there an adult| on the motorcycle| in the poster| for Easy Wheels'],
 ["Glen E. Friedman's jacket

In [23]:
outputs = llm.generate(chats, sampling_params)

Processed prompts: 100%|██████████| 230/230 [00:08<00:00, 27.14it/s, est. speed input: 5108.56 toks/s, output: 371.62 toks/s]


In [26]:
outputs[0].outputs[0].text

'animals|racing|horses|Kentucky Derby'

In [3]:
with open("decom_result.pkl", "rb") as f:
    data = pickle.load(f)
data

[['animals| race | in | the | Kentucky | Derby'],
 ['What is the color of the logo of Santa Anita Park? | What is the logo of Santa Anita Park?'],
 ['Blue Eyes (film)| poster | man | holding'],
 ["object| in front of | Felicia Day's | face"],
 ['Rachel Dratch | color | wearing'],
 ['alyson stoner| hair color'],
 ['amusement park| Tampa, Florida| collage'],
 ['Parkersburg, West Virginia | river | water | going across the river | in the water'],
 ['Mississippi| flag| colors'],
 ['Page Municipal Airport| location| arid climate'],
 ['Point Skyhawks| logo | colors'],
 ['Giorgio Armani| hair'],
 ['Arjen Robben| sleeve| ball'],
 ['logo for Esporte Clube Santo Andre | at the top'],
 ['man on the poster for Black Hawk Down (film)| right hand | holding'],
 ['poster for Easy Wheels | adult | motorcycle'],
 ['Glen E. Friedman| jacket | zipped | unzipped'],
 ['Incredible Bongo Band| cover| bongos'],
 ['James Brown| hands position'],
 ['| The Marcels | picture | number of people'],
 ['P. Susheela| s