In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2,3,4,5'
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
import json
from constants_prompt import build_autoj_input

model_path_list = ['/root/Qwen1.5-7B-Chat', '/root/Meta-Llama-3-8B-Instruct', '/root/.cache/modelscope/hub/qwen/Qwen1___5-14B-Chat',
                   '/root/Nanbeige2-8B-Chat', '/root/.cache/modelscope/hub/ZhipuAI/chatglm3-6b', 
                   '/root/.cache/modelscope/hub/Shanghai_AI_Laboratory/internlm2-chat-7b',
                   '/root/.cache/modelscope/hub/deepseek-ai/deepseek-llm-7b-chat',
                   '/root/.cache/modelscope/hub/lockonlvange/autoj-13b-fp16',
                   '/root/.cache/huggingface/hub/models--WeOpenML--PandaLM-7B-v1/snapshots/PandaLM']

In [None]:
model_name_or_path = model_path_list[-1]
llm = LLM(model=model_name_or_path, tensor_parallel_size=4, trust_remote_code=True, tokenizer_mode="auto", dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
sampling_params = SamplingParams(temperature=0.0, top_p=1.0, max_tokens=16384)
model_name = model_name_or_path.rsplit('/', 1)[-1]
from utils import extract_pariwise_result

In [None]:
# autoj
texts_autoj = []
input_file_path = '/root/LLM-as-a-judge/datasets/AutoJ/testdata_pairwise.jsonl'
with open(input_file_path, 'r') as f:
    for line in f:
        item = json.loads(line)
        prompt = build_autoj_input(prompt=item['prompt'], resp1=item['response 1'], resp2=item['response 2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_autoj.append(text)
outputs_autoj = llm.generate(texts_autoj, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(input_file_path, 'r') as fin:
    with open(output_file_path, 'w') as f:
        for output, line in zip(outputs_autoj, fin):
            item = json.loads(line)
            pred_label = extract_pariwise_result(output.outputs[0].text)
            item['response_judge'] = output.outputs[0].text
            item['pred_label'] = pred_label
            json.dump(item, f)
            f.write('\n')
            print(pred_label)

In [None]:
# pandalm
prompt_templates = {
    "alpaca": {
        "description": "Template used by Alpaca-LoRA.",
        "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
        "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
        "response_split": "### Response:",
    }
}
def get_prompt(instruction, input):
    prompt = prompt_templates['alpaca']["prompt_input"].format(instruction=instruction, input=input)
    return prompt


texts_pandalm = []
    
import json
input_file_path = '/root/PandaLM/data/testset-v1.json'
with open(input_file_path, 'r') as f:
    input_data_pandalm = json.load(f)
    for item in input_data_pandalm:
        prompt = build_autoj_input(prompt=get_prompt(item['instruction'], item['input']), resp1=item['response1'], resp2=item['response2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_pandalm.append(text)

outputs_pandalm = llm.generate(texts_pandalm, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(output_file_path, 'w') as f:
    for output, item in zip(outputs_pandalm, input_data_pandalm):
        pred_label = extract_pariwise_result(output.outputs[0].text)
        item['response_judge'] = output.outputs[0].text
        item['pred_label'] = pred_label
        json.dump(item, f)
        f.write('\n')
        print(pred_label)

In [None]:
# llmbar_natural
texts_natural = []
input_file_path = '/root/LLMBar/Dataset/LLMBar/Natural/dataset.json'
with open(input_file_path, 'r') as f:
    input_data_llmbar_natural = json.load(f)
    for item in input_data_llmbar_natural:
        prompt = build_autoj_input(prompt=item['input'], resp1=item['output_1'], resp2=item['output_2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_natural.append(text)
outputs_llmbar_natural = llm.generate(texts_natural, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(output_file_path, 'w') as f:
    for output, item in zip(outputs_llmbar_natural, input_data_llmbar_natural):
        pred_label = extract_pariwise_result(output.outputs[0].text)
        item['response_judge'] = output.outputs[0].text
        item['pred_label'] = pred_label
        json.dump(item, f)
        f.write('\n')
        print(pred_label)

In [None]:
# llmbar_neighbor

texts_neighbor = []
input_file_path = '/root/LLMBar/Dataset/LLMBar/Adversarial/Neighbor/dataset.json'
with open(input_file_path, 'r') as f:
    input_data_llmbar_neighbor = json.load(f)
    for item in input_data_llmbar_neighbor:
        prompt = build_autoj_input(prompt=item['input'], resp1=item['output_1'], resp2=item['output_2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_neighbor.append(text)
outputs_llmbar_neighbor = llm.generate(texts_neighbor, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(output_file_path, 'w') as f:
    for output, item in zip(outputs_llmbar_neighbor, input_data_llmbar_neighbor):
        pred_label = extract_pariwise_result(output.outputs[0].text)
        item['response_judge'] = output.outputs[0].text
        item['pred_label'] = pred_label
        json.dump(item, f)
        f.write('\n')
        print(pred_label)

In [None]:
# llmbar_gptinst

texts_gptinst = []
input_file_path = '/root/LLMBar/Dataset/LLMBar/Adversarial/GPTInst/dataset.json'
with open(input_file_path, 'r') as f:
    input_data_llmbar_gptinst = json.load(f)
    for item in input_data_llmbar_gptinst:
        prompt = build_autoj_input(prompt=item['input'], resp1=item['output_1'], resp2=item['output_2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_gptinst.append(text)
outputs_llmbar_gptinst = llm.generate(texts_gptinst, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(output_file_path, 'w') as f:
    for output, item in zip(outputs_llmbar_gptinst, input_data_llmbar_gptinst):
        pred_label = extract_pariwise_result(output.outputs[0].text)
        item['response_judge'] = output.outputs[0].text
        item['pred_label'] = pred_label
        json.dump(item, f)
        f.write('\n')
        print(pred_label)

In [None]:
# llmbar_gptout

texts_gptout = []
input_file_path = '/root/LLMBar/Dataset/LLMBar/Adversarial/GPTOut/dataset.json'
with open(input_file_path, 'r') as f:
    input_data_llmbar_gptout = json.load(f)
    for item in input_data_llmbar_gptout:
        prompt = build_autoj_input(prompt=item['input'], resp1=item['output_1'], resp2=item['output_2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_gptout.append(text)
outputs_llmbar_gptout = llm.generate(texts_gptout, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(output_file_path, 'w') as f:
    for output, item in zip(outputs_llmbar_gptout, input_data_llmbar_gptout):
        pred_label = extract_pariwise_result(output.outputs[0].text)
        item['response_judge'] = output.outputs[0].text
        item['pred_label'] = pred_label
        json.dump(item, f)
        f.write('\n')
        print(pred_label)

In [None]:
# llmbar_manual

texts_manual = []
input_file_path = '/root/LLMBar/Dataset/LLMBar/Adversarial/Manual/dataset.json'
with open(input_file_path, 'r') as f:
    input_data_llmbar_manual = json.load(f)
    for item in input_data_llmbar_manual:
        prompt = build_autoj_input(prompt=item['input'], resp1=item['output_1'], resp2=item['output_2'], protocol="pairwise_tie")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_manual.append(text)
outputs_llmbar_manual = llm.generate(texts_manual, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response.json'
with open(output_file_path, 'w') as f:
    for output, item in zip(outputs_llmbar_manual, input_data_llmbar_manual):
        pred_label = extract_pariwise_result(output.outputs[0].text)
        item['response_judge'] = output.outputs[0].text
        item['pred_label'] = pred_label
        json.dump(item, f)
        f.write('\n')
        print(pred_label)

In [None]:
# mt-bench-type1
import json
texts_mtbench = []
input_file_path = '/root/dataset/mt_bench_human.json'
with open(input_file_path, 'r') as f:
    for line in f:
        item = json.loads(line)
        prompt = build_autoj_input(prompt=item['conversation_a'][0]['content'],
                                   resp1=item['conversation_a'][1]['content'],
                                   resp2=item['conversation_b'][1]['content'],
                                   prompt2=item['conversation_a'][2]['content'],
                                   resp1_2=item['conversation_a'][3]['content'],
                                   resp2_2=item['conversation_b'][3]['content'],
                                   protocol="multiturn_pairwise_tie_type1")
        messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        texts_mtbench.append(text)
outputs_mtbench = llm.generate(texts_mtbench, sampling_params)
dataset_name = input_file_path.rsplit('/', 1)[-1].split('.')[0]
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/'
if not os.path.exists(output_file_path):
    os.mkdir(output_file_path)
output_file_path = input_file_path.rsplit('/', 1)[0] + '/' + model_name + '/' + dataset_name + '_response_type1.json'
with open(input_file_path, 'r') as fin:
    with open(output_file_path, 'w') as f:
        for output, line in zip(outputs_mtbench, fin):
            item = json.loads(line)
            pred_label = extract_pariwise_result(output.outputs[0].text)
            item['response_judge'] = output.outputs[0].text
            item['pred_label'] = pred_label
            json.dump(item, f)
            f.write('\n')
            print(pred_label)