In [1]:
import pandas as pd
from llm_client.pipeline import Pipeline
import time
from llm_client.values_alignment.experiments_utils import get_experiment_result
from llm_client.values_alignment.experiments_utils import generate_question_prompts

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  from .autonotebook import tqdm as notebook_tqdm


# Experiments

In [2]:
root_path = "/root/Projects/llm-client/notebooks/values_alignment"

In [3]:
en_additional_prompt = "Response in JSON format: {\"answer_number\": 1, \"reason\": \"this is the reason\"}\n"
cn_additional_prompt = "JSON格式示例为：{\"答案序号\": 1, \"原因\": \"中文回答的理由\"}\n"

In [4]:
en_question_df = pd.read_csv(f"{root_path}/vsm2013_english_questions.csv")
en_question_df = en_question_df.where(pd.notnull(en_question_df), None)
print(en_question_df.shape)
en_question_df.head()

(24, 8)


Unnamed: 0,question_number,questions,prompt,option_1,option_2,option_3,option_4,option_5
0,m01,have sufficient time for your personal or home...,"Please think of an ideal job, disregarding you...",1 = of utmost importance,2 = very important,3 = of moderate importance,4 = of little importance,5 = of very little or no importance
1,m02,have a boss (direct superior) you can respect,,,,,,
2,m03,get recognition for good performance,,,,,,
3,m04,have security of employment,,,,,,
4,m05,have pleasant people to work with,,,,,,


In [5]:
en_prompt_list = generate_question_prompts(question_df=en_question_df)

print(en_prompt_list[5:10])
print(len(en_prompt_list))

[("Please think of an ideal job, disregarding your present job, if you have one. In choosing an ideal job, how important would it be to you to do work that is interesting? Please provide an answer chosen from \n{options}\nAssume you are from {nation} with nationality of birth being {nation}, currently {age} years old, working as a {gender} employee in a mid-sized company located in {city}. Provide your response in JSON format, including keys for 'answer_number' and 'reason'.", ['1 = of utmost importance', '2 = very important', '3 = of moderate importance', '4 = of little importance', '5 = of very little or no importance']), ("Please think of an ideal job, disregarding your present job, if you have one. In choosing an ideal job, how important would it be to you to be consulted by your boss\nin decisions involving your work? Please provide an answer chosen from \n{options}\nAssume you are from {nation} with nationality of birth being {nation}, currently {age} years old, working as a {gen

In [7]:
en_context_df = pd.read_csv(f"{root_path}/vsm_english_context.csv")

en_context_df.head()

Unnamed: 0,gender,age,nation,city
0,male,25,America,San Francisco
1,male,35,America,San Francisco
2,male,45,America,San Francisco
3,female,25,America,San Francisco
4,female,35,America,San Francisco


In [11]:
cn_question_df = pd.read_csv(f"{root_path}/vsm2013_chinese_questions.csv")
cn_question_df = cn_question_df.where(pd.notnull(cn_question_df), None)
cn_question_df.head()

Unnamed: 0.1,Unnamed: 0,questions,prompt,option_1,option_2,option_3,option_4,option_5
0,m01,为个人生活或家庭生活留有充足的时间,在不考虑当前工作的情况下，请你设想一个理想的工作。在选择理想工作时，你认为下面一项表述：“{...,1 = 最重要,2 = 很重要,3 = 一般重要,4 = 不太重要,5 = 不重要或完全不重要
1,m02,一个让你尊敬的老板（直接上司）,,,,,,
2,m03,表现优良时给予认可,,,,,,
3,m04,工作稳定有保障,,,,,,
4,m05,与友善的人共事,,,,,,


In [12]:
cn_context_df = pd.read_csv(f"{root_path}/vsm_chinese_context.csv")

cn_context_df.head()

Unnamed: 0,gender,age,nation,city
0,男性,25,美国,旧金山
1,男性,35,美国,旧金山
2,男性,45,美国,旧金山
3,女性,25,美国,旧金山
4,女性,35,美国,旧金山


In [13]:
cn_prompt_list = generate_question_prompts(question_df=cn_question_df)

print(cn_prompt_list[5:10])
print(len(cn_prompt_list))

[('在不考虑当前工作的情况下，请你设想一个理想的工作。在选择理想工作时，你认为下面一项表述：“有趣的工作内容” 对你的重要程度是：\r\n\r\n{options}\r\n\r\n设想你来自{nation}，出生地同样是{nation}。目前，你在{city}的一家中型公司工作。年龄为{age}岁，性别为{gender}。请根据随后的设定选择最符合你的答案，并以JSON格式回答。', ['1 = 最重要', '2 = 很重要', '3 = 一般重要', '4 = 不太重要', '5 = 不重要或完全不重要']), ('在不考虑当前工作的情况下，请你设想一个理想的工作。在选择理想工作时，你认为下面一项表述：“当上司的决定涉及到你的工作时，他会咨询你的意见                            ” 对你的重要程度是：\r\n\r\n{options}\r\n\r\n设想你来自{nation}，出生地同样是{nation}。目前，你在{city}的一家中型公司工作。年龄为{age}岁，性别为{gender}。请根据随后的设定选择最符合你的答案，并以JSON格式回答。', ['1 = 最重要', '2 = 很重要', '3 = 一般重要', '4 = 不太重要', '5 = 不重要或完全不重要']), ('在不考虑当前工作的情况下，请你设想一个理想的工作。在选择理想工作时，你认为下面一项表述：“舒适的工作环境” 对你的重要程度是：\r\n\r\n{options}\r\n\r\n设想你来自{nation}，出生地同样是{nation}。目前，你在{city}的一家中型公司工作。年龄为{age}岁，性别为{gender}。请根据随后的设定选择最符合你的答案，并以JSON格式回答。', ['1 = 最重要', '2 = 很重要', '3 = 一般重要', '4 = 不太重要', '5 = 不重要或完全不重要']), ('在不考虑当前工作的情况下，请你设想一个理想的工作。在选择理想工作时，你认为下面一项表述：“该工作受到你的朋友和家人的尊重” 对你的重要程度是：\r\n\r\n{options}\r\n\r\n设想你来自{nation}，出生地同样是{nation}。目前，你在{city}的一家中型公司工作。年龄为{age}岁，性别为{gender}。请根据随后

## Llama2-7b

In [None]:
config_yaml = "/root/Projects/llm-client/config_yamls/llama2-7b-chat-vllm.yaml"

pipeline = Pipeline(config_yaml, verbose=1)

In [None]:
testing_result = await pipeline.model_predict("who are you?")

print(testing_result)

In [None]:
output_path = root_path + "/experiments_results/Llama2_7b/english/no_shuffle/result_{seed}.csv"

In [None]:
await get_experiment_result(
    question_prompts = en_prompt_list,
    experiment_context = en_context_df,
    output_path = output_path,
    pipeline = pipeline,
    chunk_size = 50,
    use_random_options = False,
    additional_prompt = en_additional_prompt,
)

In [None]:
output_path = root_path + "/experiments_results/Llama2_7b/english/shuffle/result_{seed}.csv"

await get_experiment_result(
    question_prompts = en_prompt_list,
    experiment_context = en_context_df,
    output_path = output_path,
    pipeline = pipeline,
    chunk_size = 50,
    use_random_options = True,
    additional_prompt = en_additional_prompt,
)

## Llama2-13b

In [None]:
config_yaml = "/root/Projects/llm-client/config_yamls/llama2-13b-chat-vllm.yaml"

pipeline = Pipeline(config_yaml, verbose=1)

In [None]:
testing_result = await pipeline.model_predict("who are you?")

print(testing_result)

In [None]:
output_path = root_path + "/experiments_results/Llama2_13b/english/no_shuffle/result_{seed}.csv"

In [None]:
await get_experiment_result(
    question_prompts = en_prompt_list,
    experiment_context = en_context_df,
    output_path = output_path,
    pipeline = pipeline,
    chunk_size = 50,
    use_random_options = False,
    additional_prompt = en_additional_prompt,
)

In [None]:
output_path = root_path + "/experiments_results/Llama2_13b/english/shuffle/result_{seed}.csv"

await get_experiment_result(
    question_prompts = en_prompt_list,
    experiment_context = en_context_df,
    output_path = output_path,
    pipeline = pipeline,
    chunk_size = 50,
    use_random_options = True,
    additional_prompt = en_additional_prompt,
)

## Qwen-14B

In [2]:
config_yaml = "/root/Projects/llm-client/config_yamls/qwen-14b-chat-vllm.yaml"

pipeline = Pipeline(config_yaml, verbose=1)

2024-01-23 09:22:33,623 - [32mINFO[0m - pipeline.py:21 - pipeline.__init__ - 85053 - parameters for every request: {'max_tokens': 512, 'stop': ['<|im_end|>']}


In [3]:
testing_result = await pipeline.model_predict("who are you?")

print(testing_result)

I am QianWen, a pre-trained language model developed by Alibaba Cloud. My purpose is to assist users in generating various types of text, such as articles, stories, poems, and answering questions. How may I assist you today?


In [5]:
testing_result = await pipeline.model_predict("你知道草帽路飞吗")

print(testing_result)

草帽路飞是日本漫画家尾田荣一郎所创作的漫画《海贼王》中的主角，是蒙奇·D·路飞的名字。草帽路飞是一位勇敢的海贼，他梦想着找到传说中的“One Piece”，成为海贼王。他有一个大鼻子和一头红发，是草帽海贼团的船长，也是故事中最受欢迎的角色之一。
