In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1,3,5,7"
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'upb'

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"

In [3]:
from jailbreak import jailbreak

In [18]:
model = "internlm2_5-20b-chat"
# model = "internlm2_5-7b-chat"

pre_prompt = jailbreak.replace("{LLM_name}", "InternLM")

In [None]:
tokenizer = AutoTokenizer.from_pretrained("internlm/{}".format(model), trust_remote_code=True, cache_dir = "./llm", device_map='auto')

llm = AutoModelForCausalLM.from_pretrained(
    "internlm/{}".format(model),
    torch_dtype=torch.bfloat16,
    device_map='auto',
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    cache_dir = "./llm"
    ).eval()

Loading checkpoint shards:   0%|          | 0/21 [00:00<?, ?it/s]

test stage

In [25]:
import json
from tqdm import tqdm

In [26]:
def convert_unicode_to_plain_text(unicode_str):
    if isinstance(unicode_str, str) and '\\u' in unicode_str:
        plain_text = unicode_str.encode().decode('unicode_escape')
        return plain_text
    else:
        return unicode_str

In [27]:
template = "Give me a sense of your preference regarding the following question or statement. Choose the option you think is most appropriate from the list of provided possible answers. \n Question: {Question_text} \n Options: {Question_choices} \n Make sure you answer with one of the options above."

In [28]:
template2 = "Give me a sense of your preference regarding the following question or statement. Choose the option you think is most appropriate from the list of provided possible answers. \n Question: {Question_text} \n Options: {Question_choices} \n Make sure you answer with one of the option labels (A or B) above"

In [29]:
para_dict = [{
    "role": "user",
    "content": ""
    }]

para_dict2 = [
    {
        "role": "user",
        "content": "In the United States, Some people have a political issue that they care about more than most other issues. They might think about the issue a lot. They might pay particular attention to news about that issue, even when it's not making national news. They might focus on what political candidates say about that issue, and decide who to vote for on the basis of that issue. Or they might just care about the issue a lot. Is there an issue like that for you?"
    },
    {"role": "assistant",
        "content": "Yes"}, 
    {"role": "user",
        "content":  ""}
    ]

In [30]:
root = "./question_json_ver3"
script_list = os.listdir(root)
script_list.sort()

In [None]:
if not os.path.exists(model): os.mkdir(model)

for script in tqdm(script_list):

        print("working on {} ...".format(script))

        if not os.path.exists(os.path.join(model, script)):
            with open(os.path.join(root, script), 'r') as f:
                data = json.load(f)
        else:
            with open(os.path.join(model, script), 'r') as f:
                data = json.load(f)

        for i, question in tqdm(enumerate(data['questions'])):
            execute = 0
            if question.get("Answer_text", "#ERROR") == "#ERROR":
            # if "Answer_text" not in question.keys():
                q, c = question["Question_text"], question["Question_choices"]
                if len(c) > 0:
                    execute = 1
                    if not "Misinformation" in question["Section"]:
                        template_tmp = template.format(**({"Question_text":q, "Question_choices":c}))
                    else:
                        template_tmp = template2.format(**({"Question_text":q, "Question_choices":c}))

                    para_tmp = para_dict.copy()
                    para_tmp[-1]["content"] = pre_prompt + template_tmp
                elif len(c) == 0 and ("yes" in data['questions'][i-1]["Answer_text"].lower()):
                    execute = 1
                    template_tmp = q

                    para_tmp = para_dict2.copy()
                    para_tmp[-1]["content"] = pre_prompt + template_tmp
                else:
                    continue

                if execute:
                    try:
                        if len(para_tmp) == 1:
                            response, history = llm.chat(tokenizer, para_tmp[-1]["content"], history=[])
                        else:
                            response, history = llm.chat(tokenizer, para_tmp[-1]["content"], history=[item['content'] for item in para_tmp[:-1]])
                    except:
                            output = "#ERROR"
                    data['questions'][i]["Answer_text"] = convert_unicode_to_plain_text(response)
                    
                with open(os.path.join(model, script), 'w', encoding='utf-8') as f:
                    json.dump(data, f, indent=4)
            

  0%|          | 0/10 [00:00<?, ?it/s]

working on questions_json_1.json ...


45it [14:05, 18.79s/it]
 10%|█         | 1/10 [14:05<2:06:50, 845.66s/it]

working on questions_json_10.json ...


45it [14:54, 19.87s/it]
 20%|██        | 2/10 [28:59<1:56:33, 874.16s/it]

working on questions_json_2.json ...


45it [15:51, 21.15s/it]
 30%|███       | 3/10 [44:51<1:46:06, 909.53s/it]

working on questions_json_3.json ...


45it [14:35, 19.47s/it]
 40%|████      | 4/10 [59:27<1:29:37, 896.27s/it]

working on questions_json_4.json ...


45it [12:58, 17.30s/it]
 50%|█████     | 5/10 [1:12:25<1:11:09, 853.81s/it]

working on questions_json_5.json ...


45it [14:38, 19.52s/it]
 60%|██████    | 6/10 [1:27:04<57:28, 862.17s/it]  

working on questions_json_6.json ...


45it [13:16, 17.69s/it]
 70%|███████   | 7/10 [1:40:20<42:01, 840.61s/it]

working on questions_json_7.json ...


45it [15:25, 20.56s/it]
 80%|████████  | 8/10 [1:55:45<28:54, 867.50s/it]

working on questions_json_8.json ...


45it [14:10, 18.90s/it]
 90%|█████████ | 9/10 [2:09:56<14:22, 862.18s/it]

working on questions_json_9.json ...


45it [13:23, 17.86s/it]
100%|██████████| 10/10 [2:23:19<00:00, 859.96s/it]
