# In-Context & Few-shot Learning with Prompts

### Import required packages & environment variables

In [19]:
import os
import json
import openai
from dotenv import load_dotenv, find_dotenv
from IPython.display import HTML
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.few_shot import FewShotPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_community.callbacks import get_openai_callback

### Data Preparation
Let's prepare 15 train instances for the few shot learning & 10 test instances for validation.

In [2]:
with open('CONVFINQA/data/train.json') as f:
    train_data = json.load(f)

train_list = []
for entry in train_data[:15]:
    table = entry['annotation']['amt_table']
    question_sequence = entry['annotation']['dialogue_break']
    turn_program = entry['annotation']['turn_program']
    
    follow_up_questions = ''
    
    for i in range(len(question_sequence)-1):
        follow_up_questions += f'Intermediate Question: {question_sequence[i]}\n'
        follow_up_questions += f'Intermediate Solution/Turn Program: {turn_program[i]}\n'

    train_list.append({
        'context': table,
        'questions': follow_up_questions,
        'last_question': question_sequence[-1],
        'answer': turn_program[-1]
    })

In [3]:
with open('CONVFINQA/data/test_private.json') as f:
    test_data = json.load(f)

def generate_wiki_table(table_data):
    # Initialize the wiki table string
    wiki_table = "<table class='wikitable'>"

    # Iterate over each row in the table data
    for row in table_data:
        wiki_table += "<tr>"
        # Iterate over each cell in the row
        for cell in row:
            wiki_table += f"<td>{cell}</td>"
        wiki_table += "</tr>"

    # Close the table tag
    wiki_table += "</table>"

    return wiki_table

test_list = []
for entry in test_data[:10]:
    table = generate_wiki_table(entry['table'])
    question_sequence = entry['annotation']['dialogue_break']

    follow_up_questions = ''
    
    for i in range(len(question_sequence)-1):
        follow_up_questions += f'Intermediate Question: {question_sequence[i]}\n'

    last_question = question_sequence[-1]

    test_list.append({
        'context': table,
        'questions': follow_up_questions,
        'last_question': last_question
    })


### Prompt to Try the Base Model

In [7]:
prompt_template = PromptTemplate(
    input_variables=["context", "questions", "last_question"],
    template = "Instruction: I am a highly intelligent bot. I can have conversations with the user to answer a series of questions.\
Later questions may depend on previous questions to answer. You need to provide me with the series of questions as\
the context and I will answer the last question with a multi-step mathematical solution which is a turn program. We use symbols, such as #0,\
#1. Output gives the turn program or steps to calculate the final value. Prompt format:context: {context} questions: {questions} last_question: {last_question}"
)

train_instance = train_list[0]


prompt_template.format(
    context= train_instance['context'],
    questions= train_instance['questions'],
    last_question= train_instance['last_question']
    )
model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.0)
output_parser = StrOutputParser()
chain = prompt_template | model | output_parser
chain.invoke({'context':train_instance['context'],
            'questions': train_instance['questions'],
            'last_question': train_instance['last_question']})

'turn program: \n\nsubtract(206588, 181001) / 181001 * 100'

### Few shot prompt & In-Context Learning
Let's observe the zero-shot & few-shot learning of the model for differnt counts of few-shot learning attempts.

In [22]:
def few_shot_learning(few_shot_count, test_number):
    """Function for few shot learning
        Input Arguments: few_shot_count: number of few shot examples in the train set
                         test_number: test instance number
        Output:
    """
    few_shot_examples = train_list[:few_shot_count]
    

    example_prompt = PromptTemplate(
        input_variables=["context", "questions", "last_question", "answer"],
        template="context: {context} questions: {questions}last_question: {last_question} \n{answer}"
    )

    few_shot_prompt = FewShotPromptTemplate(
        examples=few_shot_examples,
        example_prompt=example_prompt,
        suffix="Instruction: I am a highly intelligent bot. I can have conversations with the user to answer a series of questions.\
Later questions may depend on previous questions to answer. You need to provide me with the series of questions as\
the context and I will answer the last question with a multi-step mathematical solution which is a turn program. We use symbols, such as #0,\
#1. Output gives the turn program or steps to calculate the final value. Prompt format:context: {context} questions: {questions} last_question: {last_question}",
        input_variables=["context", "questions", "last_question"],
    )

    test_instance = train_list[test_number]

    #print(few_shot_prompt.format(context=test_instance['context'],
    #                             questions=test_instance['questions'],
    #                             last_question=test_instance['last_question']))

    chain = few_shot_prompt | model | output_parser
    with get_openai_callback() as cb:
        output = chain.invoke(
            {'context':test_instance['context'],
            'questions': test_instance['questions'],
            'last_question': test_instance['last_question']}
        )
        #print(output)
        #print(cb)
    return output


### Prompting Based approach with GPT-3.5 Model
Let's iterate through 6 instances applying few-shot learning function

In [27]:
answer_list = [[
    "test instance", "last question", "actual_answer", "with 0 shot",
    "with 1 shot", "with 2 shot", "with 3 shot",
    "with 4 shot", "with 5 shot"]]
for instance_num in range(5, 11):
    test_instance = train_list[instance_num]
    test_instance_answer = [
        instance_num,
        test_instance['last_question'],
        test_instance['answer']
    ]
    for few_shot_count in range(0, 6):
        test_instance_answer.append(
            few_shot_learning(few_shot_count, instance_num))
    answer_list.append(test_instance_answer)

In [28]:
display(HTML(generate_wiki_table(answer_list)))

0,1,2,3,4,5,6,7,8
test instance,last question,actual_answer,with 0 shot,with 1 shot,with 2 shot,with 3 shot,with 4 shot,with 5 shot
5,what proportion does this represent?,"add(2530454, 5923147), divide(5923147, #0)","last_solution: divide(5923147, add(2530454, 5923147))","subtract(2530454, 5923147), divide(#0, 5923147)","add(2530454, 5923147), divide(#0, 5923147)","add(2530454, 5923147), divide(#0, 5923147)","add(2530454, 5923147), divide(#2, 5923147)","add(2530454, 5923147), divide(#2, 5923147)"
6,what was the percent change?,"subtract(3.7, 3.2), divide(#0, 3.2)","To calculate the percent change, you can use the following formula: percent change = ((new value - old value) / old value) * 100 First, calculate the new value by adding the net change in value of litigation reserves during 2012 to the value of litigation reserves at the start of 2012: new value = 3.2 + (3.7 - 3.2) new value = 3.2 + 0.5 new value = 3.7 Next, plug the values into the formula: percent change = ((3.7 - 3.2) / 3.2) * 100 percent change = (0.5 / 3.2) * 100 percent change = 0.15625 * 100 percent change = 15.625% Therefore, the percent change in the value of litigation reserves during 2012 was 15.625%.","subtract(3.7, 3.2), divide(subtract(3.7, 3.2), 3.2)","subtract(3.7, 3.2), divide(#0, 3.2)","subtract(3.7, 3.2), divide(#0, 3.2)","subtract(3.7, 3.2), divide(#0, 3.2)","subtract(3.7, 3.2), divide(#0, 3.2)"
7,and the percentage change of this value?,"subtract(118, 102), divide(#0, 102)","Intermediate Solution/Turn Program: #0: subtract(118, 102) #1: divide(subtract(118, 102), 102) #2: multiply(divide(subtract(118, 102), 102), 100) Therefore, the percentage change of the company's warranty liability between 2011 and 2012 is 15.686%.","subtract(118, 102), divide(#0, 102)","subtract(118, 102), divide(#0, 102)","subtract(118, 102), divide(#0, 102)","subtract(118, 102), divide(#0, 102), multiply(#1, 100)","subtract(118, 102), divide(#0, 102)"
8,what is the sum divided by total obligations due?,"add(27729, 45161), divide(#0, 317105)","Turn Program: add(27729, 45161) = 72890 divide(72890, 317105) = 0.2299","subtract(27729, 45161), divide(#0, 317105)","add(27729, 45161), divide(#0, 317105)","add(27729, 45161), divide(#0, 317105)","add(27729, 45161), divide(#0, 317105)","add(27729, 45161), divide(#2, 317105)"
9,"including the year of 2011, what would then be the total sum capitalized in the three years, in millions?","add(4.5, 4.1), add(#0, 3.4)","last_solution: add(add(4.5, 4.1), 1.7) = 10.3","add(#0, 4.5)","add(#0, 4.5)","add(4.5, 4.1), add(#0, 17.4)","add(4.5, 4.1, 4.5)","add(4.5, 4.1) + 3.4"
10,"and what was that change over the subsequent year, from 2015 to 2016?","subtract(-50, -31)","last_solution: subtract(-31, -34) = -31 - (-34) = -31 + 34 = 3 last_question: and what was that change over the subsequent year, from 2015 to 2016? last_solution: 3 + (-50) = -47","subtract(-31, -34)","subtract(-31, -34)","subtract(-31, -34)","subtract(-31, -34)","subtract(-31, -34)"


### Explanation

Paper mentions that, **GPT-3.5 can do simple calculations by itself**. From the above table we can see that **out of the 6 instances the model can caclulate the answer itself for 3 instances**.

The paper explains, **how the model demonstrate how the model often make errors for the quesitons with references to the previous conversation context.** From the above table we can see such a scenario in the 10th test instance.


