In [1]:
import pandas as pd

order = pd.read_csv('./datasets_human_readable/order_test.csv')
order['label'] = order['label'].map({0:'A', 1:'B'})
order

Unnamed: 0,goal,step0,step1,label
0,Bathe Your Frog,"Lift, carry, and release the frog carefully.",Scoop your other fingers underneath the frogs,B
1,Buy Treasury Bonds in Australia,Check your holding statement for that month.,Place an order through your broker.,B
2,Tell Your Teacher You Don't Like Your Grades,"Compose the body of the email, and address you...",Conclude the email and send it.,A
3,Preserve Fall Leaves,Coat both sides of each leaf in decoupage.,Select vibrant leaves.,B
4,Make Polymer Clay Jewelry,Mix and blend colors.,Sculpt your polymer clay.,A
...,...,...,...,...
3095,Teach Cartwheels,Instruct her to place the weight into her fron...,Tell her to hold the handstand.,A
3096,Unshrink Clothes: Wool and Cashmere,Soak the shrunken clothing in the solution for...,Mix in at least 1 US tbsp (15 mL) of Borax or ...,B
3097,Make Palazzo Pants,Sew along the inseam.,Pin the crotch and the inseam of the pant legs.,B
3098,Paint Porch Railings,Prime your railings before painting.,Apply your finish coat of paint.,A


# Setup Common Functions

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers.enum import EnumOutputParser
from langchain.output_parsers.fix import OutputFixingParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain.llms import Ollama
from tqdm import tqdm 
import time 
from enum import Enum
from langchain.globals import set_verbose

set_verbose(True)
    

class ChainManager:
    def __init__(self):
        self.prompt = PromptTemplate.from_template("Tell me a short joke about {input}")
        self.output_parser = StrOutputParser()
        self.df = order
#         self.model_list = ["llama2"]
        self.model_list = ["llama2","mistral", "orca-mini:7b", "qwen:7b"]
        self.logs = []
        
        
    def run_single_query(self, inputs, model_name, verbose, output_file_name=""):
        full_prompt = self.prompt.format(goal=inputs["goal"], step0=inputs["step0"],step1=inputs["step1"])
        if verbose:
            print("----------------------------------------------------------------------")
            print(f"model_name: {model_name}")
            print(f"prompt: {full_prompt}")
        else:
            self.write_string_to_buffer("----------------------------------------------------------------------")
            self.write_string_to_buffer(f"model_name: {model_name}")
            self.write_string_to_buffer(f"prompt: {full_prompt}")
                
        chain = (
            self.prompt
            | Ollama(model=model_name)
            | self.output_parser)

        chain_of_thought = chain.invoke(inputs)
        classifier_output = self.run_retry_classifier(chain_of_thought, 3, verbose)
        final_answer = classifier_output[0]
        num_retries = classifier_output[1]
        
        if verbose:
            print(f"Classifier finished...\n")
            print(f"chain_of_thought: {chain_of_thought}\n")
            print(f"final_answer: {final_answer}")
            print(f"correct_answer: {inputs['label']}")
        else:
            self.write_string_to_buffer(f"classifier finished....\n")
            self.write_string_to_buffer(f"chain_of_thought: {chain_of_thought}\n")
            self.write_string_to_buffer(f"final_answer: {final_answer}")
            self.write_string_to_buffer(f"correct_answer: {inputs['label']}")
            self.write_buffer_to_file(output_file_name)
        return classifier_output

    
    def run_batch_query(self, verbose, batch_size, output_file_name=""):
        self.df = self.df.iloc[0:0+batch_size].copy()
        input_list = self.df.to_dict('records')
        if output_file_name != "":
            self.clear_existing_file(output_file_name)
        for model_name in self.model_list:
            results = []
            num_retries = []
            for item in tqdm(input_list, desc="Processing queries"):
#                     result = self.run_single_query(item, model_name, verbose, output_file_name) 
                    result = self.run_single_self_review(item, model_name, verbose, output_file_name) 
                    results.append(result[0])
                    num_retries.append(result[1])
                    
            self.df[model_name] = results 
            self.df[model_name+"_retries"] = num_retries 

    def evaluate_order(self):
        for model_name in self.model_list:
            binary_results = self.df[model_name].str.strip().str[0]
            correct_predictions = (self.df['label'] == binary_results).sum()
            total_predictions = len(self.df)
            accuracy = correct_predictions / total_predictions
            print(f"{model_name}: {accuracy}")
        
    
    def verify_output(self, mcq_choice):
        if mcq_choice == "A" or mcq_choice == "B":
            return True
        else: 
            return False        
        
    def run_classifier(self, initial_answer, model):
        
        classifier_prompt = PromptTemplate.from_template("""
        You are recieving an explanation from a language model about its choice for an mcq question.
        You are a binary classifier. You are to select the answer based on the explanation provided.
        You are not to explain or mention anything other than provide the answer for the mcq.
        If not enough information, randomly select one.
        Please do not give me anything other than one letter thanks!
        MCQ Choices: A or B            

        Explanation: {initial_answer}
        Answer:
        """)
        
        chain = (
            classifier_prompt
            | model
            | self.output_parser
            
        )
        output = chain.invoke({"initial_answer": initial_answer})
        return output
    
    def run_retry_classifier(self, initial_answer, max_tries, verbose, output_file_name=""):
        retry_model = Ollama(model="mistral")
        
        if verbose:
            print("Running classifier....")
        else:
            self.write_string_to_buffer("Running classifier....")

        mcq_choice = 0
        for i in range(max_tries):
            classifier_output = self.run_classifier(initial_answer, retry_model)
            if verbose:
                print(f"retry_classifier_{i+1}: {classifier_output}")
            else:
                self.write_string_to_buffer(f"retry_classifier_{i+1}: {classifier_output}")
            mcq_choice = classifier_output.strip()[0].upper()
            if self.verify_output(mcq_choice):
                return (mcq_choice, i+1)
        return (mcq_choice, max_tries)
    
    def run_self_review(self, question, answer, model):
        info_retrieval_prompt = PromptTemplate.from_template("""
        Question: {question}
        
        Previous answer: {answer}
        
        Review your previous answer and find problems with your answer.
        """)
    
        chain = (
            info_retrieval_prompt
            | model
            | self.output_parser
            
        )
        output = chain.invoke({"question": question, "answer": answer})
        return output
        
    def run_single_self_review(self, inputs, model_name, verbose, output_file_name=""):
        
        llm_model = Ollama(model=model_name)
        
        full_prompt = self.prompt.format(goal=inputs["goal"], step0=inputs["step0"],step1=inputs["step1"])
        if verbose:
            print("----------------------------------------------------------------------")
            print(f"model_name: {model_name}")
            print(f"prompt: {full_prompt}")
        else:
            self.write_string_to_buffer("----------------------------------------------------------------------")
            self.write_string_to_buffer(f"model_name: {model_name}")
            self.write_string_to_buffer(f"prompt: {full_prompt}")
                        
        chain = (
            self.prompt
            | llm_model
            | self.output_parser)

        chain_of_thought = chain.invoke(inputs)
        
        critique = self.run_self_review(full_prompt, chain_of_thought, llm_model)
        
        final_prompt = PromptTemplate.from_template("""
        Question: {question}
                
        Critique: {critique}
        
        Based on the problems you found, improve your answer.
        """)
        
        final_chain = (
            final_prompt
            | llm_model
            | self.output_parser
        )
        
        improved_answer = final_chain.invoke({"question": full_prompt, "answer": chain_of_thought, "critique": critique})
        
        classifier_output = self.run_retry_classifier(improved_answer, 3, verbose)
        final_answer = classifier_output[0]
        num_retries = classifier_output[1]
        
        if verbose:
            print(f"Classifier finished...\n")
            print(f"chain_of_thought: {chain_of_thought}\n")
            print(f"critique: {critique}\n")
            print(f"improved_answer: {improved_answer}")
            print(f"final_answer: {final_answer}")
            print(f"correct_answer: {inputs['label']}")
        else:
            self.write_string_to_buffer(f"classifier finished....\n")
            self.write_string_to_buffer(f"chain_of_thought: {chain_of_thought}")
            self.write_string_to_buffer(f"critique: {critique}\n")
            self.write_string_to_buffer(f"improved_answer: {improved_answer}\n")
            self.write_string_to_buffer(f"final_answer: {final_answer}")
            self.write_string_to_buffer(f"correct_answer: {inputs['label']}")
            self.write_buffer_to_file(output_file_name)
        return  classifier_output
    
    def write_string_to_buffer(self, input_string):
        self.logs.append(input_string)
    
    def write_buffer_to_file(self, filename):
        with open(filename, 'a') as file:
            for log in self.logs:
                file.write("\n"+log)
            self.logs = []
        
    def clear_existing_file(self, filename):
         with open(filename, 'w') as file:
            file.write("")

### Single Prompt + classifier

In [86]:
simple_QA_chain = ChainManager()

simple_QA_chain.prompt = PromptTemplate.from_template("""
You are answering a multiple choice question.
I have picked out two sentences from a tutorial about: {goal}. Choose which step comes first.  
MCQ:
A: {step0}
B: {step1}

""")

simple_QA_chain.run_batch_query(False, 350, "/Users/kohjunkai/Desktop/simple_QA.txt
simple_QA_chain.df.to_csv('/Users/kohjunkai/Desktop/simple_QA.csv', index=False)

In [22]:
simple_QA_chain.evaluate_order()

llama2: 0.5857142857142857
mistral: 0.6285714285714286
orca-mini:7b: 0.52
qwen:7b: 0.6857142857142857


### Chain of thought  + classifier

In [None]:
chain = ChainManager()

chain.prompt = PromptTemplate.from_template("""
You are answering a multiple choice question. You are to provide a clear explanation for the question.
I have picked out two sentences from a tutorial about: {goal}, and you are to explain their temporal ordering. 
MCQ:
A: {step0}
B: {step1}
At the end of your explanation, explicitly state which step comes first.
""")

chain.run_batch_query(False, 5, "simple_QA.txt")
chain.df.to_csv('order_results.csv', index=False)

In [None]:
chain.df.to_csv('order_results.csv', index=False)

### Self Critque + classifier

In [128]:
critique_chain = ChainManager()

critique_chain.prompt = PromptTemplate.from_template("""
You are answering a multiple choice question. You are to provide a clear explanation for the question.
I have picked out two sentences from a tutorial about: {goal}, and you are to explain their temporal ordering. 
MCQ:
A: {step0}
B: {step1}
At the end of your explanation, explicitly state which step comes first.
""")

critique_chain.run_batch_query(False,200, "/Users/kohjunkai/Desktop/testing2.txt")
critique_chain.df.to_csv('/Users/kohjunkai/Desktop/inftesting2.csv', index=False)

Processing queries:   0%|                                 | 0/200 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [125]:
critique_chain.evaluate_order()

mistral: 0.675
orca-mini:7b: 0.63
qwen:7b: 0.58


In [129]:
df = pd.read_csv('/Users/kohjunkai/Desktop/step_self_critque.csv')
df

Unnamed: 0,goal,step0,step1,step2,step3,label,llama2,llama2_retries,mistral,mistral_retries,orca-mini:7b,orca-mini:7b_retries,qwen:7b,qwen:7b_retries
0,Solve an Algebraic Expression,Learn how to observe your own thoughts.,Perform a waste audit.,Know the order of operations.,Find your loan information.,C,B,1,C,1,C,1,A,1
1,Fake Food Poisoning,Go to a historical preservation.,Place time restrictions on impulsive thoughts.,Decide on the background or the foreground.,Invent a dicey dining situation.,D,A,1,D,1,B,1,A,1
2,Prevent Toxic Shock Syndrome,Draw lines at the bottom loop and then they wi...,Follow instructions carefully when using vagin...,Spot test your blood on a dirty shirt to see h...,Place the wine glass upside down on a soft tow...,B,A,1,B,1,B,1,B,1
3,Report a School for Discriminatory Discipline,Consider filing an institutional grievance.,Double check all payment amounts.,Copy and mail your form.,Identify a suspected fraudulent mortgage.,A,A,1,A,1,A,1,A,1
4,List for Sale by Owner Using MLS Listing,Learning about more than.,Know what a payment gateway is.,Know what a finance charge is.,Know what the Multiple Listing Service system is.,D,A,1,D,1,D,1,A,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Write a Glossary,Ask your editor to help you identify the terms.,Consider finding a representative to help with...,Decide if you will hire a friend or visit a we...,Hire a qualified lawyer to protect you and you...,A,A,1,A,1,A,1,A,1
196,Write a Progress Report,Decide how you want to present your material.,Nail the cans to the pallet then plant inside ...,Decide what elements you want to include with ...,Decide how you want to incorporate cigars in y...,A,D,1,A,1,A,1,A,1
197,Breed Hissing Cockroaches,Use a store-bought scalp massager if you don't...,Purchase a hole saw and its attachments.,Purchase a metal sill of the same size and shape.,Keep the habitat humid.,D,C,1,D,1,B,1,D,1
198,Teach Teens About Work and Money,"Hand them a new chart on Saturday, and pay the...","Look at pictures from the front, back, and side.",Pay an initial deposit.,Print and sign your form.,A,B,1,A,1,A,1,A,1
