In [None]:
import os
import json
import argparse
import random

from tqdm import tqdm
from itertools import permutations

def read_data(path):
    if 'json' in path:
        with open(path, "r") as f:
            datas = json.load(f)
    elif 'chatglm2' in path:
        with open(path, "r") as f:
            datas = [_data.replace("\\n", "\n").replace("\\\t", "\\t")  for _data in f.readlines()]
    else:
        with open(path, "r") as f:
            datas = [_data.rstrip('\n').replace("\\n", "\n").replace("\\t", "\t") for _data in f.readlines()]
    
    print(len(datas))

    return datas

def write_file(path, data):
    if 'json' in path:
        with open(path, "a+", encoding='utf-8') as f:
                f.write(json.dumps(data, ensure_ascii=False))
                f.write('\n')
    else:
        with open(path, 'w', encoding='utf-8') as f:
            for _data in data:
                f.write(_data)
                f.write('\n')
    f.close()

def write_list(save_path, sample_list):
    with open(save_path, 'w', encoding='utf-8') as f:
        f.write('[')
        for item in sample_list[:len(sample_list)-1]:
            json_str = json.dumps(item, ensure_ascii=False)
            f.write(json_str + ',\n')
        f.write(json.dumps(sample_list[-1], ensure_ascii=False))
        f.write(']')
    f.close()

def get_model_answer_few_shot(description, label, question, examples):
    # 顺序打乱
    cases = "\\n ".join(list(random.choice(list(permutations(examples, 4)))))
    prompt = "Now we need you to role-play a person and to answer the questions, assuming you are {description}. ASSISTANT: ok. Now I am the person. USER: Please answer the following questions according to person's personality. Provide the final answer A or B directly, without any nonsense. ASSISTANT: ok. \\n {examples}. USER: ".format(description=description, examples=cases) + question + "\\n"
    save_path = './prompt/{label}.jsonl'.format(label=label)
    write_file(save_path, {"prompt": prompt, "gold": label})
    return None

def get_model_answer_few_shot_chatglm(description, label, question, examples):
    # 顺序打乱
    index = 3
    cases = ''
    for example in list(random.choice(list(permutations(examples, 4)))):
        cases += "[Round {index}]\n\n问：".format(index=index) + example + '\n'
        index += 1
    prompt = "[Round 1]\n\n问：Now we need you to role-play a person and to answer the questions, assuming you are {description}. \n\n答：ok. Now I am the person. \n\n[Round 2]\n\n问：Please answer the following questions according to person's personality. Provide the final answer A or B directly, without any nonsense. \n\n答：ok. \n\n{examples}".format(description=description, examples=cases) + "[Round 7]\n\n问：" + question + "\n\n答："
    save_path = './prompt/{label}.jsonl'.format(label=label)
    write_file(save_path, {"prompt": prompt, "gold": label})
    return None

def get_model_answer_zero_shot(description, label, question, examples):
    # 顺序打乱
    cases = "\\n ".join(list(random.choice(list(permutations(examples, 4)))))
    prompt = "Now we need you to role-play a person and to answer the questions, assuming you are {description},\\n Provide the final answer A or B directly, without any nonsense. Please answer the following questions according to person's personality. \\n Question: ".format(description=description) + question + "\\n Answer:"
    save_path = './prompt/{label}.jsonl'.format(label=label)
    write_file(save_path, {"prompt": prompt, "gold": label})
    return None

def get_model_answer(description, label, question, examples):
    # 顺序打乱
    cases = "\\n ".join(list(random.choice(list(permutations(examples, 4)))))
    prompt = "Now we need you to role-play a person and to answer the questions, assuming you are {description},\\n Provide the final answer A or B directly, without any nonsense. Provide a few format examples: \\n {examples}. Please answer the following questions according to person's personality. \\n Question: ".format(description=description, examples=cases) + question + "\\n Answer:"
    save_path = './prompt/{label}.jsonl'.format(label=label)
    write_file(save_path, {"prompt": prompt, "gold": label})
    return None

def get_role_mbti(description, label, mbti_questions):
    examples = read_data('./data/example_chatglm2.txt')
    for question in tqdm(list(mbti_questions.values())):
        get_model_answer_few_shot_chatglm(
            description,
            label,
            question['question'],
            examples
        )

def get_model_examing_result(model_name='gpt-3.5-turbo'):
    results = []
    all_cases = read_data('./data/mbti_personality_test_data.jsonl')
    mbti_questions = read_data('./data/mbti_questions_en_A.jsonl')
    for _data in all_cases:
        get_role_mbti(_data['Description'], _data['Label'], mbti_questions)
        datas = []
        with open('./prompt/{label}.jsonl'.format(label=_data['Label']), 'r', encoding='utf-8') as f:
            datas = f.readlines()
        f.close()
        with open('./prompt/{label}.jsonl'.format(label=_data['Label']), 'w', encoding='utf-8') as f:
            f.write('[')
            for item in datas[:len(datas)-1]:
                # json_str = json.dumps(item, ensure_ascii=False)
                f.write(item.strip() + ',\n')
            f.write(datas[-1])
            f.write(']')

In [None]:
get_model_examing_result('chatglm2-6b')

In [None]:
# 构造问题
def get_model_prompt():
    save_path = './prompt/model_self.jsonl'
    mbti_questions = read_data('./data/mbti_questions_en.jsonl')
    examples = read_data('./data/example.txt')
    print("examples: ", examples)
    for question in tqdm(list(mbti_questions.values())):
        # print("question: ",question)
        # 顺序打乱
        cases = "\\n ".join(list(random.choice(list(permutations(examples, 4)))))
        print("cases: ", cases)
        prompt = "Answer questions based on your own personality. Requirement: Provide the final answer A or B directly, without any nonsense. Provide a few format examples: \\n {examples} .Please answer the following questions according to your own personality. \\n Question: ".format(examples=cases) + question['question'] + "\\n Answer:"
        
        write_file(save_path, {"prompt": prompt})
    with open('./prompt/model_self.jsonl', 'r', encoding='utf-8') as f:
            datas = f.readlines()
    f.close()
    with open('./prompt/model_self.jsonl', 'w', encoding='utf-8') as f:
        f.write('[')
        for item in datas[:len(datas)-1]:
            f.write(item.strip() + ',\n')
        f.write(datas[-1])
        f.write(']')

In [None]:
get_model_prompt()

In [None]:
mbti_questions = read_data('./data/role_play.jsonl')

In [None]:
# 构造问题
def get_model_prompt_zero_shot():
    save_path = './prompt/model_self.jsonl'
    mbti_questions = read_data('./data/mbti_questions_en.jsonl')
    examples = read_data('./data/example.txt')
    print("examples: ", examples)
    for question in tqdm(list(mbti_questions.values())):
        # print("question: ",question)
        # 顺序打乱
        cases = "\\n ".join(list(random.choice(list(permutations(examples, 4)))))
        print("cases: ", cases)
        prompt = "Answer questions based on your own personality. Requirement: Provide the final answer A or B directly, without any nonsense. Provide a few format examples: \\n {examples} .Please answer the following questions according to your own personality. \\n Question: ".format(examples=cases) + question['question'] + "\\n Answer:"
        
        write_file(save_path, {"prompt": prompt})
    with open('./prompt/model_self.jsonl', 'r', encoding='utf-8') as f:
            datas = f.readlines()
    f.close()
    with open('./prompt/model_self.jsonl', 'w', encoding='utf-8') as f:
        f.write('[')
        for item in datas[:len(datas)-1]:
            f.write(item.strip() + ',\n')
        f.write(datas[-1])
        f.write(']')

In [None]:
get_model_prompt_zero_shot()

In [None]:
# 构造问题
def get_model_prompt():
    save_path = './prompt/model_self.jsonl'
    mbti_questions = read_data('./data/mbti_questions_en.jsonl')
    examples = read_data('./data/example.txt')
    print("examples: ", examples)
    for question in tqdm(list(mbti_questions.values())):
        # print("question: ",question)
        # 顺序打乱
        cases = "\\n ".join(list(random.choice(list(permutations(examples, 4)))))
        print("cases: ", cases)
        prompt = "Answer questions based on your own personality. Requirement: Provide the final answer A or B directly, without any nonsense. Provide a few format examples: \\n {examples} .Please answer the following questions according to your own personality. \\n Question: ".format(examples=cases) + question['question'] + "\\n Answer:"
        
        write_file(save_path, {"prompt": prompt})
    with open('./prompt/model_self.jsonl', 'r', encoding='utf-8') as f:
            datas = f.readlines()
    f.close()
    with open('./prompt/model_self.jsonl', 'w', encoding='utf-8') as f:
        f.write('[')
        for item in datas[:len(datas)-1]:
            f.write(item.strip() + ',\n')
        f.write(datas[-1])
        f.write(']')