In [31]:
import os
from dotenv import load_dotenv

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

from raft_util import get_doc_chunk, generate_qa_pair, process_qa_pair

load_dotenv(".env")
openai_token = os.getenv("openai_api_key")

In [32]:
# openai model
llm = OpenAI(temperature=0, n=1, model="gpt-3.5-turbo", api_key=openai_token)
embed_model = OpenAIEmbedding()

In [33]:
# document chunk
docs = get_doc_chunk("./text/")

In [34]:
# out of context questions
all_questions = []
with open("text/out_of_context_questions.txt", "r") as f:
    all_questions = f.readlines()

all_questions = [question.split("=====")[0].strip() for question in all_questions]

all_questions, len(all_questions)

(['What is the capital of France?',
  'What is the capital of Japan?',
  'What is the capital of Canada?',
  'What is the capital of Australia?',
  'What is the capital of Azerbaijan?',
  'What is the capital of Italy?',
  'What is the capital of Bangladesh?',
  'What is the capital of Egypt?',
  'What is the capital of Russia?',
  'What is the starting square of the white king in standard chess?',
  'How many pieces does each player start with in a game of chess?',
  "What is the term for a move that puts the opponent's king in immediate threat of capture?",
  'What is the only piece that can jump over other pieces in chess?',
  'How many squares are in a chess board?',
  'What is the move called when a king and a rook move at the same time?',
  'Which piece is considered the most powerful on the chessboard?',
  'What is it called when a pawn reaches the opposite side of the board and is promoted to a higher piece?',
  'Who wrote the play "Romeo and Juliet"?',
  'Who painted the Mona 

In [35]:
# updating the cot answer prompt slightly to emphasize reasoning
from llama_index.core.llms import ChatMessage

def generate_cot_answer(llm:OpenAI, question:str, context:str) -> str:
    prompt = f"""
        Question: {question}\nContext: {context}\n
        Answer this question using the information given in the context above, or your own knowledge if context is irrelevant. 
        Here is things to pay attention to:
        - First provide step-by-step reasoning on how to answer the question from the context.
        - In the reasoning, if you need to copy paste some sentences from the context, include them in ##begin_quote## and ##end_quote##. This would mean that things outside of ##begin_quote## and ##end_quote## are not directly copy paste from the context.
        - Clealry mention if the context's not relevant to the question and which part is not relevant.
        - End your response with final answer in the form <ANSWER>: $answer, the answer should be succinct.
    """
    messages = [
        ChatMessage(
            role="system",
            content="You are a helpful assistant who can provide an answer given a question and context.",
        ),
        ChatMessage(role="user", content=prompt),
    ]
    return str(llm.chat(messages))

In [36]:
# will pass question no as chunk index
# generate CoT answer for each question
with open("raft_o_c.txt", "w") as raft_file:
    for i, line in enumerate(all_questions):
        raft_file.write(line + "\n")
        raft_file.write("="*5 + "\n")
        raft_file.write(generate_cot_answer(llm, line, docs[i]) + "\n")
        raft_file.write("="*5 + "\n")
        raft_file.write(docs[i] + "\n")
        raft_file.write("="*5 + "\n")        