# In-Context & Few-shot Learning with Prompts

### Import required packages & environment variables

In [1]:
import os
import json
import openai
from dotenv import load_dotenv, find_dotenv
from IPython.display import HTML
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.few_shot import FewShotPromptTemplate
from langchain.schema.output_parser import StrOutputParser

### Data Preparation
Let's prepare 15 train instances for the few shot learning & 10 test instances for validation.

In [2]:
with open('CONVFINQA/data/train.json') as f:
    train_data = json.load(f)

train_list = []
for entry in train_data[:15]:
    table = entry['annotation']['amt_table']
    question_sequence = entry['annotation']['dialogue_break']
    turn_program = entry['annotation']['turn_program']
    
    follow_up_questions = ''
    
    for i in range(len(question_sequence)-1):
        follow_up_questions += f'Intermediate Question: {question_sequence[i]}\n'
        follow_up_questions += f'Intermediate Solution/Turn Program: {turn_program[i]}\n'

    train_list.append({
        'context': table,
        'questions': follow_up_questions,
        'last_question': question_sequence[-1],
        'answer': turn_program[-1]
    })

In [3]:
with open('CONVFINQA/data/test_private.json') as f:
    test_data = json.load(f)

def generate_wiki_table(table_data):
    # Initialize the wiki table string
    wiki_table = "<table class='wikitable'>"

    # Iterate over each row in the table data
    for row in table_data:
        wiki_table += "<tr>"
        # Iterate over each cell in the row
        for cell in row:
            wiki_table += f"<td>{cell}</td>"
        wiki_table += "</tr>"

    # Close the table tag
    wiki_table += "</table>"

    return wiki_table

test_list = []
for entry in test_data[:10]:
    table = generate_wiki_table(entry['table'])
    question_sequence = entry['annotation']['dialogue_break']

    follow_up_questions = ''
    
    for i in range(len(question_sequence)-1):
        follow_up_questions += f'Intermediate Question: {question_sequence[i]}\n'

    last_question = question_sequence[-1]

    test_list.append({
        'context': table,
        'questions': follow_up_questions,
        'last_question': last_question
    })


### Prompt to Try the Base Model

In [7]:
prompt_template = PromptTemplate(
    input_variables=["context", "questions", "last_question"],
    template = "Instruction: I am a highly intelligent bot. I can have conversations with the user to answer a series of questions.\
Later questions may depend on previous questions to answer. You need to provide me with the series of questions as\
the context and I will answer the last question with a multi-step mathematical solution which is a turn program. We use symbols, such as #0,\
#1. Output gives the turn program or steps to calculate the final value. Prompt format:context: {context} questions: {questions} last_question: {last_question}"
)

train_instance = train_list[0]


prompt_template.format(
    context= train_instance['context'],
    questions= train_instance['questions'],
    last_question= train_instance['last_question']
    )
model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.0)
output_parser = StrOutputParser()
chain = prompt_template | model | output_parser
chain.invoke({'context':train_instance['context'],
            'questions': train_instance['questions'],
            'last_question': train_instance['last_question']})

'turn program: \n\nsubtract(206588, 181001) / 181001 * 100'

### Few shot prompt & In-Context Learning

In [8]:
def few_shot_learning(few_shot_count, test_number):
    """Function for few shot learning
        Input Arguments: few_shot_count: number of few shot examples in the train set
                         test_number: test instance number
        Output:
    """
    few_shot_examples = train_list[:few_shot_count]
    

    example_prompt = PromptTemplate(
        input_variables=["context", "questions", "last_question", "answer"],
        template="context: {context} questions: {questions}last_question: {last_question} \n{answer}"
    )

    few_shot_prompt = FewShotPromptTemplate(
        examples=few_shot_examples,
        example_prompt=example_prompt,
        suffix="Instruction: I am a highly intelligent bot. I can have conversations with the user to answer a series of questions.\
Later questions may depend on previous questions to answer. You need to provide me with the series of questions as\
the context and I will answer the last question with a multi-step mathematical solution which is a turn program. We use symbols, such as #0,\
#1. Output gives the turn program or steps to calculate the final value. Prompt format:context: {context} questions: {questions} last_question: {last_question}",
        input_variables=["context", "questions", "last_question"],
    )

    test_instance = train_list[test_number]

    #print(few_shot_prompt.format(context=test_instance['context'],
    #                             questions=test_instance['questions'],
    #                             last_question=test_instance['last_question']))

    chain = few_shot_prompt | model | output_parser
    return chain.invoke({'context':test_instance['context'],
                'questions': test_instance['questions'],
                'last_question': test_instance['last_question']})


### Generate answer for the last question in 6th instance in train data file

In [9]:
test_instance = train_list[10]
display(HTML(test_instance['context']))


0,1,2,3,4
1,"years ended december 31,",2016,2015,2014
2,aes corporation,$ -50 ( 50 ),$ -31 ( 31 ),$ -34 ( 34 )
3,chile,-9 ( 9 ),-18 ( 18 ),-30 ( 30 )
4,colombia,-8 ( 8 ),29,17
5,mexico,-8 ( 8 ),-6 ( 6 ),-14 ( 14 )
6,philippines,12,8,11
7,united kingdom,13,11,12
8,argentina,37,124,66
9,other,-2 ( 2 ),-10 ( 10 ),-17 ( 17 )
10,total ( 1 ),$ -15 ( 15 ),$ 107,$ 11


In [10]:
few_shot_learning(0, 10)
few_shot_learning(1, 10)
few_shot_learning(2, 10)
few_shot_learning(3, 10)
few_shot_learning(4, 10)
few_shot_learning(5, 10)

last_solution: subtract(-31, -34) = -31 - (-34) = -31 + 34 = 3

last_question: and what was that change over the subsequent year, from 2015 to 2016?
last_solution: 3
subtract(-31, -34)
subtract(-31, -34)
subtract(-31, -34)
subtract(-31, -34)
subtract(-31, -34)


In [13]:
print(test_instance['last_question'], test_instance['answer'])

and what was that change over the subsequent year, from 2015 to 2016? subtract(-50, -31)


### Explanation
With zero-shot learning the model take a correct attempt to answer the last question.

When there is only one instance for few-shot learning the model generates an incorrect answer.

When only two or three few-shot examples are given, the model may rely heavily on those examples to generate the output, leading to a correct result. In this case the model, might have learned from the additional examples to produce the correct output.

In last two cases, despite the correct answer being 'add(2530454, 5923147), divide(#0, 5923147)' based on the input and few-shot examples, the model generated 'add(2530454, 5923147), divide(#2, 5923147)', which does not accurately reflect the provided context. This discrepancy suggests that the model may have 'hallucinated' or generated incorrect information based on its internal processing rather than the actual input.