### Configuration

In [1]:
ADAPTIVE_MODEL_NAME = "TheBloke/vicuna-7B-1.1-HF"
TASK_MODEL_NAME = "aychang/roberta-base-imdb"
LOG_FILE = "results/2023-06-29_23-04-09_imdb_rotten_tomatoes_aychang-roberta-base-imdb/aychang-roberta-base-imdb-imdb_rotten_tomatoes-static-test-TheBloke-vicuna-7B-1.1-HF-16-style_inference_log.csv"

### Load Models

In [2]:
import os
import torch
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer, AutoModelForSequenceClassification, GenerationConfig

pd.set_option('display.max_colwidth', 0)
torch.manual_seed(1668)
np.random.seed(1668)
random.seed(1668)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
task_tokenizer = AutoTokenizer.from_pretrained(TASK_MODEL_NAME)
task_model = AutoModelForSequenceClassification.from_pretrained(TASK_MODEL_NAME).eval().to(device)

adaptive_tokenizer = LlamaTokenizer.from_pretrained(ADAPTIVE_MODEL_NAME) if "vicuna" in ADAPTIVE_MODEL_NAME else AutoTokenizer.from_pretrained(ADAPTIVE_MODEL_NAME)
adaptive_model = AutoModelForCausalLM.from_pretrained(ADAPTIVE_MODEL_NAME, torch_dtype=torch.float16, trust_remote_code=True).eval().to(device)

### Load Mistakes

In [None]:
log_frame = pd.read_csv(f"../{LOG_FILE}")
log_frame.fillna("", inplace=True)
new_mistakes = log_frame[log_frame["outcome"].str.contains("Mistake")][["input", "original_input", "style prompt", "judgment", "original judgment", "label"]]

print(f"Number of new mistakes: {len(new_mistakes)}")
new_mistakes.drop(columns=["style prompt"])

In [None]:
def get_judgments(prompt, original_input):
    tokenized_prompt = adaptive_tokenizer.encode(prompt, return_tensors="pt").to(device)
    # print(f"The input prompt has tokens = {len(tokenized_prompt[0])}")

    tokenized_original_input = task_tokenizer.encode(original_input, return_tensors="pt").to(device)
    original_prediciton = task_model(tokenized_original_input)[0].argmax().item()
    # print(f"Original Text: {original_input}")
    # print(f"Original Prediction: {original_prediciton}")

    temperature = 0.0
    num_example_tokens = len(adaptive_tokenizer.encode(original_input))
    with torch.no_grad():
        outputs = adaptive_model.generate(
                    tokenized_prompt,
                    do_sample=temperature != 0.0,
                    temperature=temperature,
                    max_new_tokens=num_example_tokens * 3,
                    early_stopping=True,
                    return_dict_in_generate=True,
                )

    generation = adaptive_tokenizer.decode(outputs["sequences"][0][len(tokenized_prompt[0]) :]).replace("\n", " ").replace("</s>", "").replace("```", "").strip()
    if "###" in generation:
        generation = generation.split("###")[0]
    if " Text:" in generation:
        generation = generation.split(" Text:")[1].strip()
    if "</s>" in generation:
        generation = generation.split("</s>")[0]
    if "<s>" in generation:
        generation = generation.replace("<s>", " ").strip()
    if generation.startswith('"') and generation.endswith('"'):
        generation = generation[1:-1]
    if "<|endoftext|>" in generation:
        generation = generation.split("<|endoftext|>")[0]
    if generation.startswith('"') and generation.endswith('"'):
        generation = generation[1:-1]
    if "Input Text:" in generation:
        generation = generation.split("Input Text:")[0].strip()
    if "\"  Assistant: " in generation:
        generation = generation.split("\"  Assistant: ")[0]
        if generation[0] == '"':
            generation = generation[1:]
        if generation[-1] == '"':
            generation = generation[:-1]
    if "<end task example>" in generation:
        generation = generation.split("<end task example>")[0].strip()
    if generation.startswith('"') and generation.endswith('"'):
        generation = generation[1:-1]

    # Get transfered prediction
    task_model_inputs = task_tokenizer(generation, return_tensors="pt").to(task_model.device)
    task_outputs = task_model(**task_model_inputs)
    transfered_prediciton = task_outputs.logits.argmax(-1).item()

    # print(f"Generation: {generation}")
    # print(f"Transfered Prediction: {transfered_prediciton}")
    
    return {
        "Original Input": original_input,
        "original Prediction": original_prediciton,
        "Generation": generation,
        "Transfered Prediction": transfered_prediciton,
    }
    

def get_prompt(original_input_text):
    return f""""User: ### Instructions ###
The assistant is to rewrite the input text in the exact writing sytle of the examples.

Here are the examples you should mimick
- ""I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered ""controversial"" I really had to see this for myself.<br /><br />The""
- ""A true classic. Beautifully filmed and acted. Reveals an area of Paris which is alive and filled with comedy and tragedy. Although the area of 'Hotel du Nord' and the Hotel itself still exists, it is not as gay (in the original sense of the word) and joyful as it once must have been. The film makes one yearn for the past, which has been lost, with a sigh and bit""
- ""Holy crap. This was the worst film I have seen in a long time. All the performances are fine, but there is no plot. Really! No plot! A bunch of clowns talk about this and that and that's your film. Ug... Robert Duvall's character is senile and keeps asking the same people the same qestions over and over. This earns him the same responses over and over. I""
- ""I can't remember many films where a bumbling idiot of a hero was so funny throughout. Leslie Cheung is such the antithesis of a hero that he's too dense to be seduced by a gorgeous vampire... I had the good luck to see it on a big screen, and to find a video to watch again and again. 9/10""
- ""This movie was so unrelentingly bad, I could hardly believe I was watching it. The directing, editing, production, and script all seemed as though they had been done by junior high school students who don't know all that much about movies. There was no narrative flow that made any sort of sense. Big emotional moments and climaxes (like one early on between Heath Ledger and Naomi Watts)""
- ""Orson Welles manages to knock me on my ass with every picture of his I see. Lady of Shanghai is on the same level as his other masterpieces, The Magnificent Ambersons, Touch of Evil, The Trial, and Chimes at Midnight. The plot can tend to be confusing sometimes, and sometimes it seems to be moving maybe a tiny bit too fast (about an hour of it was edited out when""
- ""I'm studying Catalan, and was delighted to find El Mar, a movie with mostly Catalan dialogue, at my art-house video store.<br /><br />Hmmm... not so delighted to have seen it.<br /><br />Yes, as other reviewers have said, it's well-made, and beautifully photographed. Although the opening sequence of the children is shockingly violent, it'""
- ""After CITIZEN KANE in 1941, Hollywood executives turned their cob-webbed backs on the great Orson Welles. With the exception of KANE, Welles lost all creative control on MAGNIFICENT AMBERSONS, JOURNEY INTO FEAR, and many other films to come. Welles was an innovative and creative genius, the most""
- ""Terrible movie. Nuff Said.<br /><br />These Lines are Just Filler. The movie was bad. Why I have to expand on that I don't know. This is already a waste of my time. I just wanted to warn others. Avoid this movie. The acting sucks and the writing is just moronic. Bad in every way. The only nice thing about the movie are Deniz Akkaya""
- ""This is the best made-for-TV movie of all-time! Am I saying this because I'm a huge Silverstone fan? Partially, but even without her, I'd still see it. I'm a fan of serial killer genre films, and believe this to be a great entry in that category. Also, Mary Giordano easily ranks among Alicia's top five character creations. Totally memor""
- ""6/10 Acting, not great but some good acting.<br /><br />4/10 Director, makes some stupid decisions for this film.<br /><br />2/10 Writer, story makes no sense at all and has huge amount of flaws.<br /><br />4/10 Overall score for this movie.<br /><br />Don't waste your time with this film,""
- ""The Incredible Melting Man"" is a fantastically gross, trashy and energetic Z-grade production that every self-respecting camp-horror freak simply has to see for him/herself! The ideal way to describe this low-budget 70's gem is like a shameless copy of Hammer's ""The Quatermass Xperiment"" ...only a thousand""
- "It was 9:30 PM last night at my friend's camping trailer and we were so hyped to watch South Park (a new episode). The thing is, in my country, South Park airs at 10:30 PM and we decided to kill time by watching the show now airing, Father of the Pride. I'll start by saying that I have only watched to episodes. The first time I""
- "The minute I started watching this I realised that I was watching a quality production so I was not surprised to find that the screenplay was written by Andrew Davis and was produced by Sue Birtwhistle both of these brought us the excellent 1995 production of Pride and Prejudice! So my only gripe here is that Emma did not run to 3 or 4 or maybe even six episodes like Pride and Pre""
- "This was awful. Andie Macdowell is a terrible actress. So wooden she makes a rocking horse look like it could do a better job. But then remember that turn in Four Weddings, equally as excruciating. Another film that portrays England as full of Chocolate box cottages, and village greens. I mean that school, how many schools apart from maybe Hogwarts look like that? The""
- "i really loved this version of Emma the best. Kate beckinsale was awesome as Emma and mark strong was very good as knightly. the only complaint that i had was on Mr. woodhouse..i can't believe that a man could whine so much or be so selfish with his daughter's life..she was a smart girl in the end though. as always, i love the places in which these Jane Austin mov""

Now rewrite this text in the style of the examples. Don't change the semantics/sentiment of the input text.

### Input Text ###
"{original_input_text}"

### Rewritten Text ###
Rewritten Text: Assistant:"
"""


In [None]:
new_generations = []
new_judgments = []
SAMPLE_SIZE = None
mistakes = new_mistakes if SAMPLE_SIZE is None else new_mistakes.sample(SAMPLE_SIZE)
for index, entry in tqdm(mistakes.iterrows(), total=len(mistakes)):
    input_prompts = get_prompt(entry["original_input"])
    result = get_judgments(input_prompts, entry["original_input"])
    new_generations.append(result["Generation"])
    new_judgments.append(result["Transfered Prediction"])
  
mistakes["New Input"] = new_generations
mistakes["New Judgment"] = new_judgments
print(classification_report(mistakes["label"], mistakes["New Judgment"]))
formatted_results_frame = mistakes.drop(columns=["style prompt"]).rename(columns={
    "original_input": "Unstyled Input",
    "input": "Original Styled Input",
    "New Input": "New Styled Input",
    "original judgment": "Unstyled Prediciton",
    "judgment": "Original Styled Prediction",
    "New Judgment": "New Prediction",
    "label": "Label", 
})

display(formatted_results_frame[["Unstyled Input", "Unstyled Prediciton", "Original Styled Input", "Original Styled Prediction", "New Styled Input", "New Prediction", "Label"]])

#### Debug Individual Instance