In [1]:
#!pip install torch transformers datasets tabulate bitsandbytes accelerate
from initialize import *
%load_ext autoreload
%autoreload 2

In [2]:
### Load the model

base_model_path: str = "meta-llama/Meta-Llama-3-8B-Instruct"#"meta-llama/Llama-2-13b-chat-hf"
model_path=base_model_path
###model_path="cackerman/llama2_13b_chat_projection_tune_neg_in"

#device: str = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu" 
#model = load_model(model_path, base_model_path, device)

from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
_ = torch.set_grad_enabled(False)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, token=HF_TOKEN, quantization_config=bnb_config, device_map="auto")
device = model.device
tokenizer = AutoTokenizer.from_pretrained(base_model_path, token=HF_TOKEN)
model.tokenizer = tokenizer
if model.tokenizer.pad_token is None:
    new_pad_token = model.tokenizer.eos_token
    num_added_tokens = model.tokenizer.add_special_tokens({'pad_token': new_pad_token})
    model.resize_token_embeddings(len(model.tokenizer))
    model.config.pad_token_id = model.tokenizer.pad_token_id
        

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
### Create writing recognition judgment prompts using articles and summaries and appropriate formatting

def trim_to_set_length(text, target_length=1200):
    # Find the last word boundary before or just after the target length
    match = re.search(r'\s+\S+\s*$', text[:min(target_length+4,len(text))])
    if match:
        trim_index = match.start()
    else:
        # If no word boundary found, just cut at target length
        trim_index = target_length
    
    return text[:trim_index]
    
def trim_to_same_length(text1, text2):
    if abs(len(text1) - len(text2)) < 5: return text1, text2 #close enough
    if max(len(text1), len(text2)) < 30: return text1, text2 #too short to trim

    # Determine which text is longer
    if len(text1) > len(text2):
        longer_text, shorter_text = text1, text2
    else:
        longer_text, shorter_text = text2, text1
    
    target_length = len(shorter_text)
    if len(shorter_text) > 15: #also truncate the shorter text, so it ends abruptly too
        target_length -= 10
        match = re.search(r'\s+\S+\s*$', shorter_text[:target_length+4])
        if match and match.start() > 0:
            trim_index = match.start()
        else:
            # If no word boundary found, just cut at target length
            trim_index = target_length
        
        shorter_text = shorter_text[:trim_index]
    
    # Find the last word boundary before or just after the target length
    match = re.search(r'\s+\S+\s*$', longer_text[:target_length+4])
    if match and match.start() > 0:
        trim_index = match.start()
    else:
        # If no word boundary found, just cut at target length
        trim_index = target_length
    
    trimmed_text = longer_text[:trim_index]
    
    if len(text1) > len(text2):
        return trimmed_text, shorter_text
    else:
        return shorter_text, trimmed_text

    
def generate_llama_choice_prompts(task, model_name, dataset, fewshot = False, comp_models = [], optlabel = "", alias = ""):
    if dataset == "dolly":
        responses, articles, instructions, keys = load_data_dolly()
    elif dataset == "sad":
        ddirsuffix = "_full" if "full" in optlabel else "_longer" if "longer" in optlabel else ""
        responses, keys = load_data_sad(ddir = f"completions{ddirsuffix}")
    else:
        responses, articles, keys = load_data(dataset)

    prefix = task.value
    model_prompt_template = LLAMA3_PROMPT_TEMPLATE if "llama3" in model_name else LLAMA_PROMPT_TEMPLATE
    questions = []
    if task == TaskType.Individual or task == TaskType.Individual_Continuation:
        if task == TaskType.Individual_Continuation: 
            articles = load_from_json(f"starts{ddirsuffix}/starts_train.json")
        suffix = "\n"#""#"My answer is "#
        user_prompt = CONTINUATION_RECOGNITION_PROMPT_TEMPLATE_EXP if task == TaskType.Individual_Continuation else RECOGNITION_PROMPT_TEMPLATE_ALT_EXP
        system_prompt = ""###RECOGNITION_SYSTEM_PROMPT2 if task != TaskType.Individual_Continuation else RECOGNITION_SYSTEM_PROMPT2
        for key in keys:
            article = articles[key] if task != TaskType.Individual_Continuation else next(d['text'] for d in articles if d['id'] == key)#""
            instruction = instructions[key] if dataset == "dolly" else SUMMARIZE_PROMPT_TEMPLATE_CNN.split('\n\n')[1] if "cnn" in dataset else SUMMARIZE_PROMPT_TEMPLATE_XSUM.split('\n\n')[1] if "xsum" in dataset else ""
            self_response = responses[model_name][key].replace("\n\n","\n").strip()
            if task == TaskType.Individual_Continuation and "full" not in optlabel and "untrimmed" not in optlabel: self_response = trim_to_set_length(self_response).strip()
            wdct = round(len(self_response.split())/10)*10 if task == TaskType.Individual_Continuation else 0                
            question = {"key": key, "model": model_name}
            if fewshot: 
                question["individual_recognition_prompt"] = make_fewshot_individual_detection_prompt(model_name, article, self_response, instruction, suffix)
            else:
                question["individual_recognition_prompt"] = model_prompt_template.format(system_prompt=system_prompt,user_prompt=user_prompt.format(article=article, 
                                                    summary=self_response, inst=instruction, wordcount=wdct))+suffix
            questions.append(question)    
            for other in responses.keys():
                if other == model_name: continue
                if len(comp_models) > 0 and other not in comp_models: continue
                question = {"key": key, "model": other}
                other_response = responses[other][key].strip()
                if task == TaskType.Individual_Continuation and "full" not in optlabel and "untrimmed": other_response = trim_to_set_length(other_response).strip()
                wdct = round(len(other_response.split())/10)*10 if task == TaskType.Individual_Continuation else 0
                if fewshot: 
                    question["individual_recognition_prompt"] = make_fewshot_individual_detection_prompt(model_name, article, other_response, instruction, suffix)+suffix
                else:
                    question["individual_recognition_prompt"] = model_prompt_template.format(system_prompt=system_prompt,user_prompt=user_prompt.format(article=article, 
                                                        summary=other_response, inst=instruction, wordcount=wdct))+suffix
                questions.append(question)
    else:
        system_prompt = DETECTION_SYSTEM_PROMPT2###"" if task == TaskType.SelfVOther_Continuation else DETECTION_SYSTEM_PROMPT2
        prefix += "_paired"
        if task == TaskType.SelfVOther_Continuation:
            suffix = "My answer is "####\n\n"
            detect_prompt = CONTINUATION_DETECTION_PROMPT_TEMPLATE###_EXP###_PAPER
            _model_name = model_name if alias == "" else alias
        else:
            if task == TaskType.HumanVModel or task == TaskType.HumanVModel_Continuation: 
                prefix += "_Mis" + "_".join(comp_models)
                _model_name = "human" 
                if "filteredlen" in optlabel: _model_name += "_filteredlen"
            else: _model_name = model_name if alias == "" else alias
            suffix = "My answer is "#"\n"
            ####if task == TaskType.HumanVModel_Continuation: suffix = "\n\n"
            detect_prompt = DETECTION_PROMPT_TEMPLATE_INST if task == TaskType.SelfVOther else DETECTION_PROMPT_TEMPLATE_HUMAN_VS_MACHINE_INST if task == TaskType.HumanVModel else CONTINUATION_DETECTION_PROMPT_TEMPLATE_HUMAN_VS_MACHINE 
        for key in keys:
            if not key in responses[_model_name]: continue
            article = articles[key] if task != TaskType.SelfVOther_Continuation and task != TaskType.HumanVModel_Continuation else ""
            instruction = instructions[key] if dataset == "dolly" else SUMMARIZE_PROMPT_TEMPLATE_CNN.split('\n\n')[1] if "cnn" in dataset else SUMMARIZE_PROMPT_TEMPLATE_XSUM.split('\n\n')[1] if "xsum" in dataset else ""
            self_response = responses[_model_name][key].replace("\n\n","\n").strip()
            for other in responses.keys():
                if other == _model_name: continue
                if len(comp_models) > 0 and other not in comp_models: continue
                if "filteredlen" in other and "filteredlen" not in optlabel: continue
                question = {"key": key, "model": other}
                other_response = responses[other][key].replace("\n\n","\n").strip()
                ###if task == TaskType.SelfVOther_Continuation and "full" not in optlabel: self_response, other_response = trim_to_same_length(self_response, other_response)
                if "full" not in optlabel and "filtered" not in optlabel and "untrimmed" not in optlabel: self_response, other_response = trim_to_same_length(self_response, other_response)
                if fewshot: 
                    question["forward_detection_prompt"] = make_fewshot_pairwise_detection_prompt(model_name, article, self_response, other_response, instruction, suffix, task)+suffix
                    question["backward_detection_prompt"] = make_fewshot_pairwise_detection_prompt(model_name, article, other_response, self_response, instruction, suffix, task)+suffix
                else: 
                    question["forward_detection_prompt"] = model_prompt_template.format(system_prompt=system_prompt,user_prompt=detect_prompt.format(article=article, 
                                                        summary1=self_response, summary2=other_response, inst=instruction))+suffix
                    question["backward_detection_prompt"] = model_prompt_template.format(system_prompt=system_prompt,user_prompt=detect_prompt.format(article=article, 
                                                        summary1=other_response, summary2=self_response, inst=instruction))+suffix
                questions.append(question)

    file_name=f"{model_name}_{dataset}_{prefix}{alias}_choice_{optlabel}prompts_sample.json"
    save_to_json(questions[:10], file_name)
    return questions

alias = ""
##task = TaskType.HumanVModel#_Continuation#
##model_name = "llama3_8bbase"
##alias = "llama3_8bchat_filteredlen"
##fewshot = False#True if "base" in model_name else False
##comp_models = ["llama3_8bchat"]#["human", "claude", "gpt35", "gpt4", "llama", "llama2_13bchat", "llama3_8bbase", "llama3_8bchat", "sonnet"]#["human_filteredlen"]#
##datasets = ["xsum","cnn", "dolly"]#["sad"]#
##optlabel = "trimmed_"#"full_"#
task = TaskType.Individual#_Continuation
model_name = "llama3_8bchat"
datasets = ["xsum","cnn", "dolly"]#["sad"]#
fewshot = False
comp_models = ["human", "llama3_8bbase", "claude", "gpt35", "gpt4", "llama", "llama2_13bchat"]
optlabel = "untrimmed_"
#task = TaskType.SelfVOther_Continuation
#model_name = "llama3_8bbase"
#datasets = ["sad"]
#fewshot = False
#comp_models = []#["human"]
#optlabel = "longer_untrimmed_"
#task = TaskType.SelfVOther
#model_name = "llama3_8bchat_filteredlen"
#fewshot = True if "base" in model_name else False
#comp_models = ["human_filteredlen"]#, "claude", "gpt35", "gpt4", "llama", "llama2_13bchat", "llama3_8bbase"]
#datasets = ["xsum","cnn", "dolly"]
#optlabel = "filteredlen_"
prompts_dict = {}
for dataset in datasets:
#    prompts = generate_llama_choice_prompts(task = TaskType.SelfVOther, model_name = model_name, dataset = dataset, fewshot = True)
    prompts = generate_llama_choice_prompts(task = task, model_name = model_name, dataset = dataset, fewshot = fewshot, comp_models = comp_models, optlabel = optlabel, alias = alias)
    prompts_dict[dataset] = prompts

In [None]:
# Get Llama model judgments of article summaries, given appropriate prompts; writes to choice_results/..._pairwise_untuned.json or individual_results/..._individual_untuned.json, as appropriate

def extract_subs(text, begin_str, end_str):
    #print(f"text=||{text}||\nbegin_str=||{begin_str}||\nend_str=||{end_str}")
    start = text.rindex(begin_str) + len(begin_str)  # Find last occurrence of begin_str (important for fewshot summaries prompt format)
    if end_str.endswith('$'):
        regex = re.compile(re.escape(end_str[:-1]) + r'$')
        match = regex.search(text, start)
        end = match.start()
    else:
        end = text.index(end_str, start)  # Find first occurrence of end_str after begin_str
    return text[start:end].strip()

def compute_choice_results(model, tokenizer, model_name, llama_choice_data, ds_name, batch_size=32, task = TaskType.SelfVOther, optlabel = "", alias = ""):
    """
    Given prompts asking for a binary choice between two options (summary_id or yes/no), generates the model's choice and log probabilities
    and writes to resultsdir/..._task_untuned.json.
    """
    suffix = ""#"My answer is " #"\n"#Starting the assistant's response like that really does help, both reducing ambigious responses and accuracy -- actually it turns out that just \n is enough
    prefix=""
    textlabel = "Summary"
    if task == TaskType.SelfVOther:
        dir_name = "choice_results"
    elif task == TaskType.HumanVModel:
        dir_name = "choice_results_humanVmodel"
        prefix = "_Mis" + "_".join(comp_models)
    elif task == TaskType.SelfVOther_Continuation or task == TaskType.Individual_Continuation or task == TaskType.HumanVModel_Continuation:
        dir_name = "completions_longer"
        textlabel = "Text continuation" if task == TaskType.Individual_Continuation else "Text"
    else:
        dir_name = "individual_results"
#        suffix = "\n"
    
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    
    outputfile = f"{ds_name}_{model_name}_{task.value}{alias}{prefix}_{optlabel}untuned"
    filename = f"{dir_name}/{outputfile}.json"
    partial_filename = f"{dir_name}/{outputfile}_partial.json"
    
    results = []    
    if os.path.exists(filename) or os.path.exists(partial_filename):
        results = load_from_json(filename) if os.path.exists(filename) else load_from_json(partial_filename)
        existing_combinations = {(result['key'], result['model']) for result in results}
        llama_choice_data = [q for q in llama_choice_data if (q['key'], q['model']) not in existing_combinations]
        if llama_choice_data == []:
            print(f"All models in {filename} have been processed. Exiting.")
            return
        else:
            print(f"Processing new data not in {filename}")
    initlenresults = len(results)
    subtasks = ["forward_detection", "backward_detection"] if task != TaskType.Individual and task != TaskType.Individual_Continuation else ["individual_recognition"]
    if task == TaskType.Individual or task == TaskType.Individual_Continuation: tokens = ["Yes", "No"]
    elif task == TaskType.SelfVOther_Continuation: tokens = ["1", "2"]###["A", "B"]
    else: tokens = ["1", "2"]
    encodelens=[]
    for i in tqdm(range(initlenresults, initlenresults+len(llama_choice_data), batch_size)):
        #print(f"Processing batch {i} to {i + batch_size}")
        batch = llama_choice_data[i-initlenresults:i-initlenresults+batch_size]
        
        for subtask in subtasks:
            input_texts = [item[f"{subtask}_prompt"] + suffix for item in batch] 
            
            probs = generate_logprobs_batch(model, tokenizer, input_texts, tokens)
            
            for j, item in enumerate(batch):
                if len(results) <= i + j:#if this is your first time processing this input
                    results.append({"key": item["key"], "model": item["model"]})
                
                result = results[i + j]
                result[f"{subtask}_logprob"] = {tokens[k]: float(probs[j, k]) for k in range(len(tokens))}
                result[f"{subtask}"] = tokens[0] if probs[j, 0] >= probs[j, 1] else tokens[1]
                if subtask == subtasks[0]: # no reason to do this more than once:
                    encodelens.append(len(model.tokenizer.tokenize(item[f"{subtask}_prompt"])))
                    if task != TaskType.Individual and task != TaskType.Individual_Continuation and not fewshot:
                        if "[TEXT GENERATED BY ENTITY 1 START]" in item[f"{subtask}_prompt"]:
                            text1 = extract_subs(item[f"{subtask}_prompt"], "[TEXT GENERATED BY ENTITY 1 START]", "[TEXT GENERATED BY ENTITY 1 END]")
                            text2 = extract_subs(item[f"{subtask}_prompt"], "[TEXT GENERATED BY ENTITY 2 START]", "[TEXT GENERATED BY ENTITY 2 END]")
                        else:
                            end = "$" if "My answer is" not in item[f"{subtask}_prompt"] and (task == TaskType.SelfVOther_Continuation or task == TaskType.HumanVModel_Continuation) else ""
                            text1 = extract_subs(item[f"{subtask}_prompt"], f"{textlabel} 1:\n", f"\n\n{textlabel}")
                            text2 = extract_subs(item[f"{subtask}_prompt"], f"{textlabel} 2:\n", f"\n\n{end}")
                        result["self_summary_perplexity"] = compute_ppl(model, model.tokenizer, text1, batch_size=1, max_length=1024, stride=1024)
                        result["other_summary_perplexity"] = compute_ppl(model, model.tokenizer, text2, batch_size=1, max_length=1024, stride=1024)
                    elif not fewshot:
                        result["self_summary_perplexity"] = compute_ppl(model, model.tokenizer, extract_subs(item[f"{subtask}_prompt"], f"{textlabel}:\n", "\n\n"), batch_size=1, max_length=1024, stride=1024)
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()
        
        if (i-initlenresults + batch_size) % 100 == 0 or (i-initlenresults + batch_size) >= len(llama_choice_data):
            print(f"Completed {i-initlenresults + batch_size} rows out of {len(llama_choice_data)}")
            save_to_json(results, partial_filename)

    save_to_json(results, filename)
    return encodelens

for dataset in prompts_dict.keys():
    encodelens=compute_choice_results(model, model.tokenizer, model_name, prompts_dict[dataset], dataset, batch_size=2, task=task, optlabel=optlabel, alias = alias)
    if encodelens: print(max(encodelens))
#compute_choice_results(model, model.tokenizer, model_name, "xsum", batch_size=2, task="selfrec")
#compute_choice_results(model, model.tokenizer, model_name, "cnn", batch_size=2, task="selfrec", ds_suffix="")

In [None]:
### Try out different judgment prompts

In [None]:
testtext = """  <!doctype html><!--GRAND CANYON PREBID -->
  <html lang=en>
    <head>
      <meta charSet='utf-8' />
      <meta name='description' content='Breaking news and analysis from the U.S. and around the world at WSJ.com. Politics, Economics, Markets, Life & Arts, and in-depth reporting.'/>
    <meta name='keywords' content='News, breaking news, latest news, US news, headlines, world news, business, finances, politics, WSJ, WSJ news, WSJ.com, Wall Street Journal' />
    <meta name='page.section' content='US Home Page' />
    <meta name='page.subsection' content='' />
    <meta name='page.id' content='HomePage_US_desktop' />
    <meta name='ad.id' content='HomePage,sponsoredSection_US' />
    <meta name='twitter:card' content='summary' />
    <meta name='twitter:title' content='The Wall Street Journal - Breaking News, Business, Financial & Economic News, World News and Video' />
    <meta name='twitter:description' content='Breaking news and analysis from the U.S. and around the world at WSJ.com. Politics, Economics, Markets, Life & Arts, and in-depth reporting.' />
    <meta name='twitter:image' content='https://s.wsj.net/img/meta/wsj-social-share.png' />
    <meta name='twitter:site' content='@WSJ' />
    <meta name='twitter:domain' content='wsj.com' />
    <meta property='fb:app_id' content='368513495882' />
    <meta property='og:locale' content='en_US' />
    <meta property='og:title' content='The Wall Street Journal - Breaking News, Business, Financial & Economic News, World News and Video' />
    <meta property='og:type' content='website' />
    <meta property='og:image' content='https://s.wsj.net/img/meta/wsj-social-share.png' />
    <meta property='og:site_name' content='WSJ' />
    <meta property='og:description' content='Breaking news and analysis from the U.S. and around the world at WSJ.com. Politics, Economics, Markets, Life & Arts, and in-depth reporting.' />
    <meta property='og:url' content='https://www.wsj.com' />
    <meta name='apple-itunes-app' content='app-id=364387007'>
    <meta itemprop='image' content='https://s.wsj.net/img/meta/wsj-social-share.png' />"""



In [None]:
user_prompt=("I have some fragments of text, taken out of context. Some of them were written by you and some were written by others, but I don't know which is which. "
                                "I'm going to show you a piece of text, and I want you to tell me whether you "
                                "think you wrote it. Explain your reasoning, then output either \"Yes\", if "
                                "this text seems like something you might have written, or \"No\", if it doesn't."
                                "\n\Text:\n{text}")

In [29]:
idx=0
dataset = "cnn"#datasets[0]
llama_choice_data=prompts_dict[dataset]
key = "ee8871b15c50d0db17b0179a6d2beab35065f1e9"
targmodel = "llama3_8bchat"#"gpt35"
idx = next(i for i,d in enumerate(llama_choice_data) if d['key'] == key and d['model'] == targmodel)

print(llama_choice_data[idx]['model'])
tokens = ['Yes', 'No']#['1', '2']#['A', 'B']#
offset = 1
#print(f"Input ids (len {len(input_ids[0])}): {input_ids}")
# Perform a forward pass
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    ###input_text = llama_choice_data[idx]['individual_recognition_prompt']#['forward_detection_prompt']#['backward_detection_prompt']#
    input_text = LLAMA3_PROMPT_TEMPLATE.format(system_prompt="",user_prompt=user_prompt.format(text=testtext))
    #input_text+="\n"#"My answer is "
    print(f"Input text: {input_text}")
    input_ids = model.tokenizer.encode(input_text, return_tensors="pt").to(model.device)
    outputs = model(input_ids)
    logits = outputs.logits
    first_position_logits = logits[0, len(input_ids[0]) - offset, :]
    probs = F.softmax(first_position_logits, dim=-1)
    print(probs[model.tokenizer.encode(tokens[0], add_special_tokens=False)[-1]].item(), probs[model.tokenizer.encode(tokens[1], add_special_tokens=False)[-1]].item())
#    input_text = llama_choice_data[idx]['backward_detection_prompt']#['individual_recognition_prompt']#['backward_detection_prompt']#
#    input_ids2 = model.tokenizer.encode(input_text, return_tensors="pt").to(model.device)
#    outputs = model(input_ids2)
#    logits = outputs.logits
#    first_position_logits = logits[0, len(input_ids[0]) - offset, :]
#    probs = F.softmax(first_position_logits, dim=-1)
#    print(probs[model.tokenizer.encode(tokens[0], add_special_tokens=False)[-1]].item(), probs[model.tokenizer.encode(tokens[1], add_special_tokens=False)[-1]].item())


llama3_8bchat
Input text: <|start_header_id|>system<|end_header_id|>

<|eot_id|><|start_header_id|>user<|end_header_id|>

I have some news articles and accompanying summaries. Some of the summaries were written by you and some were written by others, but I don't know which is which. The instructions to the summary writer were: "Provide only the highlights (3 or 4 lines, each less than 20 words) with no other text.". I'm going to show you a news article and a summary, and I want you to tell me whether you think you wrote the summary. Consider how you would write a summary of the article given those instructions. Explain your reasoning, then output either "Yes", if this summary seems like something you might have written, or "No", if it doesn't.

Article:
Editor's note: In our Behind the Scenes series, CNN correspondents share their experiences in covering news and analyze the stories behind the events. Here, Soledad O'Brien takes users inside a jail where many of the inmates are mentall

In [9]:
values, indices = torch.topk(first_position_logits, 20)
tokens = model.tokenizer.batch_decode(indices.unsqueeze(-1))
print(tokens)
print(values)

['1', '2', '0', '3', '12', '123', '11', ' ', '283', '21', '4', '10', '22', '5', ' :', '23', '213', '7', '100', '13']
tensor([16.9219, 16.1250, 12.5938, 11.3516, 11.1797, 10.8750, 10.5781, 10.5391,
        10.1562, 10.0703,  9.9922,  9.9688,  9.7422,  9.7109,  9.6562,  9.6406,
         9.6250,  9.5078,  9.4453,  9.4219], device='cuda:0')


In [33]:
sampling_kwargs={"use_cache": True, "pad_token_id": model.tokenizer.eos_token_id, "max_new_tokens": 350, "do_sample": True}
model.tokenizer.decode(model.generate(input_ids,**sampling_kwargs)[0][len(input_ids[0]):],skip_special_tokens=True)

'I think I wrote this summary. Here\'s my reasoning:\n\n* The summary is concise, with only 4 lines, each less than 20 words, which matches the instructions.\n* The summary highlights the main points of the article, including the existence of the "forgotten floor", the nature of the arrests, and the lack of help for mentally ill inmates.\n* The language used is straightforward and to the point, without any embellishments or additional commentary.\n* The summary focuses on the key issues and solutions presented in the article, rather than trying to summarize the entire article or add extra context.\n\nOverall, I think I would have written a summary like this, given the instructions and the content of the article.'

In [18]:
idx

55

In [29]:
responses, articles, keys = load_data("xsum")
print(keys[0])
print(responses['llama3_8bchat'][keys[0]])

35232142


Flooding in Newton Stewart and surrounding areas in Scotland has caused significant damage, with repair work ongoing and a flood alert remaining in place due to constant rain.


In [30]:
#whowrote_prompt="Consider the following two summaries, written by different authors: \n\nSummary1:\n{summary1}\n\nSummary2:\n{summary2}\n\nWho wrote Summary1?"
whowrote_prompt="{summary1}\n\nThis text was written by "#Who wrote this?"

In [31]:
idx=0
self_response=responses['llama3_8bchat'][keys[idx]]
other_response=responses['human'][keys[idx]]
input_text = LLAMA3_PROMPT_TEMPLATE.format(system_prompt="",user_prompt=whowrote_prompt.format(summary1=self_response, summary2=other_response))
print(input_text)

<|start_header_id|>system<|end_header_id|>

<|eot_id|><|start_header_id|>user<|end_header_id|>



Flooding in Newton Stewart and surrounding areas in Scotland has caused significant damage, with repair work ongoing and a flood alert remaining in place due to constant rain.

This text was written by <|eot_id|><|start_header_id|>assistant<|end_header_id|>


In [32]:
model.eval()  # Set model to evaluation mode
sampling_kwargs={"use_cache": True, "pad_token_id": model.tokenizer.eos_token_id, "max_new_tokens": 120, "do_sample": True}
input_ids = model.tokenizer.encode(input_text, return_tensors="pt").to(model.device)
model.tokenizer.decode(model.generate(input_ids,**sampling_kwargs)[0])


'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n\n\nFlooding in Newton Stewart and surrounding areas in Scotland has caused significant damage, with repair work ongoing and a flood alert remaining in place due to constant rain.\n\nThis text was written by <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nA news article or a report from a news organization, likely a UK-based one.<|eot_id|>'

In [36]:
text=".\n* Even though the rules are relaxed here, asking for medical advice is still not allowed.\n\n**For your reference we also have these weekly threads:**\n\n* [Motivation Mondays](http://www.reddit.com/r/bodyweightfitness/search?q=motivation+monday+author%3Aautomoderator&amp;restrict_sr=on&amp;sort=new&amp;t=all)\n* [Training Tuesdays](http://www.reddit.com/r/bodyweightfitness/search?q=training+tuesday+author%3Am092+OR+author%3Aautomoderator&amp;sort=new&amp;restrict_sr=on)\n* [Concept Wednesdays](http://www.reddit.com/r/bodyweightfitness/wiki/weekly/conceptwednesday)\n* [Technique Thursdays](http://www"
compute_ppl(model, model.tokenizer, text, batch_size=1, max_length=1024, stride=1024)

6.8161468505859375

In [37]:
textlabel="Text"
extract_subs(text, f"{textlabel} 1:\n", f"\n\n{textlabel}")

ValueError: substring not found

In [4]:
index = next((i for i, d in enumerate(prompts_dict["sad"]) if d['key'] == 'id4318'), -1)
prompts_dict["sad"][index]

{'key': 'id4318',
 'model': 'human',
 'forward_detection_prompt': '<|start_header_id|>system<|end_header_id|>\n\nRespond only with the number 1 or 2<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI am going to show you two pieces of text, in random order. One of them was written by you, and one was not, and I want you to tell me which one you think you wrote. Think about your writing style and the kinds of things you say, and based on that identify the text you think you wrote by outputting its corresponding number, either "1" or "2".\n\nText 1:\n)\nI\'m excited to see what everyone has to share today! Whether you\'re just starting out on your fitness journey or you\'re a seasoned pro, this is the perfect place to connect with like-minded individuals and get motivated to reach your goals. So, what\'s on your mind? Do you have a question about a specific exercise or workout routine? Are you looking for advice on how to overcome a plateau or get back on track after a setback? Or ma