In [None]:
from transformers import LlamaModel, LlamaTokenizer, AutoModelForMaskedLM, AutoTokenizer, AutoModelForCausalLM

def load_model(model_name, token):
    # Load model and tokenizer with authentication token
    # model = LlamaModel.from_pretrained(model_name, use_auth_token=token)
    # tokenizer = LlamaTokenizer.from_pretrained(model_name, use_auth_token=token)
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token, cache_dir="/home/raj/models/")
    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token, cache_dir="/home/raj/models/")
    return model, tokenizer

def load_model_from_local(model_file_path):
    # Load model and tokenizer from local cache
    tokenizer = AutoTokenizer.from_pretrained(model_file_path, local_files_only=True)
    model = AutoModelForCausalLM.from_pretrained(model_file_path, local_files_only=True)
    model = None
    return model, tokenizer

# Specify the model name and your Hugging Face access token
model_name = "reciprocate/llama2-7b-gsm8k"
your_token = ""  # Replace with your actual token
model_file_path = "/home/raj/.cache/huggingface/hub/models--reciprocate--llama2-7b-gsm8k/snapshots/a99b9c5a7e7b6c37dc6fd81cbc4fd2f2015b2967"
model, tokenizer = load_model(model_name, your_token)
# model, tokenizer = load_model_from_local(model_file_path)

# You can now continue with the rest of your inference code


In [None]:
def generate_text(prompt, model, tokenizer):
    # Encode the prompt to tokens
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    
    # Generate tokens response from the model
    output_ids = model.generate(input_ids, max_length=100, num_return_sequences=2)
    
    print(output_ids.shape)    
    # print(output_ids)
    
    # Decode the generated ids to a text string
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)
    
    return generated_text

# Example prompt
prompt = "3 + 5 = ?"

# Generate text based on the prompt
generated_text = generate_text(prompt, model, tokenizer)
print("Generated text:", generated_text)

model = None
tokenizer = None
#CLear cude cache
import torch, gc
torch.cuda.empty_cache()
gc.collect()
print("Done")

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = None
gc.collect()
torch.cuda.empty_cache()
pipe = pipeline("question-answering", model="reciprocate/llama2-7b-gsm8k")

In [None]:
## Copy of code in utils.py

import traceback

class Prompt:
    def __init__(
        self,
        question_prefix: str,
        answer_prefix: str,
        intra_example_sep: str,
        inter_example_sep: str,
        engine: str = None,
        temperature: float = None,
    ) -> None:
        self.question_prefix = question_prefix
        self.answer_prefix = answer_prefix
        self.intra_example_sep = intra_example_sep
        self.inter_example_sep = inter_example_sep
        self.engine = engine
        self.temperature = temperature

    def make_query(self, prompt: str, question: str) -> str:
        return (
            f"{prompt}{self.question_prefix}{question}{self.intra_example_sep}{self.answer_prefix}"
        )


def retry_parse_fail_prone_cmd(
    func,
    max_retries: int = 3,
    exceptions=(
        ValueError,
        KeyError,
        IndexError,
    ),
):
    def wrapper(*args, **kwargs):
        retries = max_retries
        while retries:
            try:
                return func(*args, **kwargs)
            except exceptions as e:
                stack_trace = traceback.format_exc()

                retries -= 1
                print(f"An error occurred: {e}. {stack_trace}. Left retries: {retries}.")
        return None

    return wrapper


In [None]:
# Copy of feedback_no_update.py
import pandas as pd
# from prompt_lib.backends import openai_api

# from src.utils import Prompt


class GSMFeedback(Prompt):
    def __init__(self, engine: str, prompt_examples: str, temperature: float, max_tokens: int = 300) -> None:
        super().__init__(
            question_prefix="",
            answer_prefix="",
            intra_example_sep="\n\n",
            inter_example_sep="\n\n### END ###n\n",
            engine = engine,
            temperature = temperature
        )
        self.max_tokens = max_tokens
        self.instruction = "# There is an error in the code above because of lack of understanding of the question. What is the error? To find the error, go through semantically complete blocks of the code, and check if everything looks good."
        self.setup_prompt_from_examples_file(prompt_examples)

    def setup_prompt_from_examples_file(self, examples_path: str) -> str:
        with open(examples_path, "r") as f:
            self.prompt = f.read()
    
    def __call__(self, solution: str):
        generation_query = self.make_query(solution=solution)
        entire_output = generate_text(generation_query, model, tokenizer)
        
        print(entire_output)
        
        if "### END" in entire_output:
            entire_output = entire_output.split("### END")[0]
        solution = entire_output.split("def solution():")[1]
        feedback = entire_output.split("def solution():")[0]
        solution = "def solution():" + solution.rstrip()
        return {"solution": solution, "feedback": feedback}

    def make_query(self, solution: str):
        solution = f"""{self.question_prefix}{solution}{self.intra_example_sep}{self.instruction}{self.answer_prefix}"""
        return f"{self.prompt}{solution}"
    

def test():
    task_fb = GSMFeedback(
        prompt_examples="../../data/prompt/gsm/feedback.txt",
        engine="code-davinci-002",
        temperature=0.7,
    )

    wrong_soln = """def solution():
    \"\"\"Twenty dozen cups cost $1200 less than the total cost of half a dozen plates sold at $6000 each. Calculate the total cost of buying each cup.\"\"\"
    plates = 6
    plate_cost = 6000
    cups = 12 * 20
    cup_cost = (plates * plate_cost) / cups - 1200
    result = cup_cost
    return result"""
    feedback_and_solution = task_fb(wrong_soln)
    print(feedback_and_solution["feedback"])
    print(feedback_and_solution["solution"])
    

if __name__ == '__main__':
    test()

In [None]:
import pandas as pd
# from src.utils import Prompt

# from prompt_lib.backends import openai_api


class GSMInit(Prompt):
    def __init__(self, prompt_examples: str, engine: str, temperature: float) -> None:
        super().__init__(
            question_prefix="# Q: ",
            answer_prefix="# solution using Python:\n",
            intra_example_sep="\n",
            inter_example_sep="\n\n",
            engine=engine,
            temperature=temperature,
        )
        self.setup_prompt_from_examples_file(prompt_examples)

    def setup_prompt_from_examples_file(self, prompt_examples) -> str:
        with open(prompt_examples, "r") as f:
            self.prompt = f.read()
    
    def make_query(self, solution: str) -> str:
        solution = solution.strip()
        query = f"{self.prompt}{self.question_prefix}{solution}{self.intra_example_sep}{self.answer_prefix}"
        return query

    def __call__(self, solution: str) -> str:
        generation_query = self.make_query(solution)
        output = generate_text(generation_query, model, tokenizer)

        # solution_code = openai_api.OpenaiAPIWrapper.get_first_response(output)
        solution_code = output

        return solution_code.strip()


def test():
    task_init = GSMInit(
        prompt_examples="../../data/prompt/gsm/init.txt",
        engine="code-davinci-002",
        temperature=0.0,
    )

    question = "The educational shop is selling notebooks for $1.50 each and a ballpen at $0.5 each.  William bought five notebooks and a ballpen. How much did he spend in all?"
    print(task_init(question))
    

if __name__ == "__main__":
    test()

In [None]:
# Copy of run.py

import pandas as pd
from tqdm import tqdm


#Append to absolute path
import sys
sys.path.append("/home/raj/code/nlp_project/self-refine")
# from src.gsm.task_init import GSMInit
# from src.gsm.feedback import GSMFeedback

from src.utils import retry_parse_fail_prone_cmd

CODEX = "code-davinci-002"
# GPT3 = "text-davinci-003"
ENGINE = CODEX


@retry_parse_fail_prone_cmd
def iterative_gsm(question: str, max_attempts: int, feedback_type: str, temperature: float):

    # initialize all the required components

    # generation of the first fast version
    task_init = GSMInit(engine=ENGINE, prompt_examples="../../data/prompt/gsm/init.txt", temperature=temperature)

    # getting feedback
    if feedback_type == "naive":
        raise NotImplementedError
    else:
        task_feedback = GSMFeedback(engine=ENGINE, prompt_examples="../../data/prompt/gsm/feedback.txt", temperature=0.7)


    n_attempts = 0

    log = []

    while n_attempts < max_attempts:

        if n_attempts == 0:
            solution = task_init(solution=question)

        fb_and_maybe_soln = task_feedback(solution=solution)
        

        log.append({"attempt": n_attempts, "solution_curr": solution, "solution_fixed": fb_and_maybe_soln["solution"], "feedback": fb_and_maybe_soln["feedback"]})

        if "it is correct" in fb_and_maybe_soln["feedback"].lower():
            break

        solution = fb_and_maybe_soln["solution"]

        n_attempts += 1

    return log


def fix_gsm(gsm_task_file: str, max_attempts: int, outfile: str, feedback_type: str, temperature: float):


    slow_programs_df = pd.read_json(gsm_task_file, lines=True, orient="records")
    slow_programs_df["run_logs"] = None
    results = []
    for i, row in tqdm(slow_programs_df.iterrows(), total=len(slow_programs_df)):
        row_copy = row.to_dict()
        try:
            run_logs = iterative_gsm(question=row["input"], max_attempts=max_attempts, feedback_type=feedback_type, temperature=temperature)
            row_copy["run_logs"] = run_logs
            row_copy["generated_answer_ours"] = run_logs[-1]["solution_fixed"]
            row_copy["generated_answer_direct"] = run_logs[0]["solution_curr"]
            results.append(row_copy)
            if i % 10 == 0:
                pd.DataFrame(results).to_json(outfile + f".{i}.jsonl", orient="records", lines=True)
        except Exception as e:
            raise e
            # pass
    pd.DataFrame(results).to_json(outfile, orient="records", lines=True)
    return results


def test():
    import json

    
    with open("/tmp/debug_gsm.jsonl", "w") as fout:
        fout.write(json.dumps({"input": "Twenty dozen cups cost $1200 less than the total cost of half a dozen plates sold at $6000 each. Calculate the total cost of buying each cup."}))
        
    logs = fix_gsm(
        gsm_task_file="/tmp/debug_gsm.jsonl", max_attempts=3, outfile="/tmp/test.jsonl", feedback_type="rich", temperature=0.0
    )
    for i, log in enumerate(logs):
        print(log["generated_answer_ours"])
        print(log["generated_answer_direct"])


In [None]:
import gc, torch, tqdm
from transformers import LlamaModel, LlamaTokenizer, AutoModelForMaskedLM, AutoTokenizer, AutoModelForCausalLM

device = "cuda:0" if torch.cuda.is_available() else "cpu"

torch.cuda.empty_cache()

def load_model(model_name, token):
    # Load model and tokenizer with authentication token
    # model = LlamaModel.from_pretrained(model_name, use_auth_token=token)
    # tokenizer = LlamaTokenizer.from_pretrained(model_name, use_auth_token=token)
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token, cache_dir="/home/raj/models/")
    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token, cache_dir="/home/raj/models/")
    return model, tokenizer

def load_model_from_local(model_file_path):
    # Load model and tokenizer from local cache
    tokenizer = AutoTokenizer.from_pretrained(model_file_path, local_files_only=True)
    model = AutoModelForCausalLM.from_pretrained(model_file_path, local_files_only=True)
    model = None
    return model, tokenizer

# Specify the model name and your Hugging Face access token
# model_name_gsm = "reciprocate/llama2-7b-gsm8k"
model_name_llama = "meta-llama/Llama-2-7b-hf"
your_token = "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"  # Replace with your actual token
# model_file_path = "/home/raj/.cache/huggingface/hub/models--reciprocate--llama2-7b-gsm8k/snapshots/a99b9c5a7e7b6c37dc6fd81cbc4fd2f2015b2967"
# model_gsm, tokenizer_gsm = load_model(model_name, your_token)

model = None
model_llama = None
model_gsm = None
tokenizer = None
tokenizer_llama = None
tokenizer_gsm = None

gc.collect()
model_llama, tokenizer_llama = load_model(model_name_llama, your_token)

In [None]:
import datasets
from datasets import load_dataset

dataset = load_dataset("gsm8k", ignore_verifications=True)

In [None]:
print(type(dataset))
(len(dataset))
dataset['train']
dataset['test']

# Load the data instances
train_data = dataset['train']
test_data = dataset['test']

# Iterate over the first few instances
for i, instance in enumerate(train_data):
    print(f"Instance {i + 1}:", instance)
    if i == 5:

        break

In [None]:
model_llama.to('cpu')

torch.cuda.empty_cache()



def generate_text(prompt, model, tokenizer):
    # Encode the prompt to tokens
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    
    # Generate tokens response from the model
    output_ids = model.generate(input_ids, max_length=200, num_return_sequences=2)
    
    # print(output_ids.shape)    
    # print(output_ids)
    
    # Decode the generated ids to a text string
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)
    
    return generated_text

# Example prompt
prompt = "what is the value of 3 + 5 ?"

# Generate text based on the prompt
generated_text = generate_text(prompt, model_llama, tokenizer_llama)
print("Generated text:", generated_text)

In [None]:
class Prompt:
    def __init__(
        self,
        question_prefix: str,
        answer_prefix: str,
    ) -> None:
        self.question_prefix = question_prefix
        self.answer_prefix = answer_prefix

    def make_query(self, prompt: str, question: str) -> str:
        return (
            f"{prompt}{self.question_prefix}{question}{self.answer_prefix}"
        )

class GSMInit(Prompt):
    def __init__(self, prompt_examples: str = None, model=None, tokenizer=None) -> None:
        super().__init__(
            question_prefix="# Question: \t",
            answer_prefix="# Answer: \t",
        )
        # self.setup_prompt_from_examples_file(prompt_examples)
        self.prompt = "Solve the following math question. Keep your answer short and concise.\n\n"
        self.one_shot = "For example, \n # Q: \t'What is 3 + 5 ?'\n # Answer: 8.\n"
        self.prompt += self.one_shot
        self.model = model
        self.tokenizer = tokenizer

    def setup_prompt_from_examples_file(self, prompt_examples) -> str:
        with open(prompt_examples, "r") as f:
            self.prompt = f.read()
    
    def make_query(self, question: str) -> str:
        question = question.strip()
        query = f"{self.prompt}{self.question_prefix}{question}{self.answer_prefix}"
        return query

    def __call__(self, question: str) -> str:
        generation_query = self.make_query(question)
        output = generate_text(generation_query, self.model, self.tokenizer)
        return output.strip()

task_init = GSMInit(model = model_llama, tokenizer = tokenizer_llama)

for i, instance in enumerate(train_data):
    # print(instance)
    question = instance['question']
    answer = instance['answer']
    answer = answer.split("####")[1].strip()
    try:
        answer = float(answer)
    except:
        pass
    model_answer = task_init(question)
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print(f"Model Answer: {model_answer}")

    if i >=5 :
        break

In [None]:
for i, instance in enumerate(test_data):
    # print(instance)
    
    question = instance['question']
    answer = instance['answer']
    
    print(instance)
    
    if i >= 10:
        break