# Generate Question-and-Answer Pairs
This notebook generates Question-Answer pairs from an existing base-truth document.

**Attribution:** Code is taken and adapted from [LangChain Auto-Evaluator]().

In [31]:
import itertools
import random
from langchain.llms import Bedrock
from langchain.chains import QAGenerationChain
import boto3

# Define the Bedrock Client
def get_bedrock_client():
    bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")
    return bedrock_client

# Instantiate LLM with Cohere Model
llm = Bedrock(
    client=get_bedrock_client(),
    model_id="cohere.command-text-v14"
)

def generate_eval(text, chunk):
    """
    Generate question-answer pair from input text 
    @param text: text to generate eval set from
    @param chunk: chunk size to draw question from text
    @return: dict with keys "question" and "answer"
    """

    # Check if there are enough characters in the text
    if len(text) < chunk:
        raise ValueError("Input text is too short for the specified chunk size")

    # Generate random starting index in the doc to draw a question from
    num_of_chars = len(text)
    starting_index = random.randint(0, num_of_chars - chunk)
    sub_sequence = text[starting_index:starting_index + chunk]
    
    # Set up QAGenerationChain chain using Cohere Command
    chain = QAGenerationChain.from_llm(llm)
    
    # Catch any QA generation errors and re-try until a QA pair is generated
    awaiting_answer = True
    while awaiting_answer:
        try:
            qa_pair = chain.run(sub_sequence)
            awaiting_answer = False
        except JSONDecodeError:
            print("Error on question")
            starting_index = random.randint(0, num_of_chars - chunk)
            sub_sequence = text[starting_index:starting_index + chunk]
    
    eval_pair = {
        "question": qa_pair[0]["question"],
        "answer": qa_pair[0]["answer"]
    }
    return eval_pair

def generate_eval_pairs(texts, chunk_size, num_pairs):
    """
    Generate a specified number of question-answer pairs.
    @param texts: Input text to generate pairs from
    @param chunk_size: Chunk size to draw questions from the text
    @param num_pairs: Number of pairs to generate
    @return: List of dictionaries with keys "question" and "answer"
    """
    eval_pairs = []
    for _ in range(num_pairs):
        eval_pair = generate_eval(texts, chunk_size)
        eval_pairs.append(eval_pair)
    return eval_pairs

# Specify the number of pairs you want to generate
num_pairs_to_generate = 10

# Generate the specified number of question-answer pairs
generated_pairs = generate_eval_pairs(text, 3000, num_pairs_to_generate)

# Print the generated pairs
for i, pair in enumerate(generated_pairs, start=1):
    print(f"Pair {i}:")
    print(f"Question: {pair['question']}")
    print(f"Answer: {pair['answer']}\n")


Pair 1:
Question: What are the different ways to apply for the Feedback and Complaint mechanism?
Answer: [Using their Feedback and Complaints Hotline: +90 0543 329 61 90, SMS, Whatsapp, Telegram, Signal or Viber. Emailing either geribildirim-sikayet@mhd.org.tr or feedback-complaints@mhd.org.tr. Or filling out one of their online forms, found here: https://docs.google.com/forms/d/e/1FAIpQLScfDQVudLlDha6nMACBoTwDLE2YWNscWjd2OzWOOOnBXIUsTQ/viewform (Turkish), https://docs.google.com/forms/d/e/1FAIpQLSc76wSwaQCenUQD1yKIhRyrs565grkiiO3ndKfleHTuBAfRZg/viewform (English), https://docs.google.com/forms/d/e/1FAIpQLSdTRAMZgfgJ4lnPlMw2kN97U4Jb6ANN5mbphl1glrfFWMzZLQ/viewform (French), https://docs.google.com/forms/d/e/1FAIpQLSdu7Euqd2qeeCJwojRJ3KGQ3wZXUuZlugSe4FxAlcjgytey-Q/viewform (Arabic), or https://docs.google.com/forms/d/e/1FAIpQLSfHq0keZ24iFQwU3FYfDvr8U0d-Q7V3XKcB9f5MG7QUidsowQ/viewform (Farsi)]

Pair 2:
Question: Under what circumstances would I be eligible for free legal aid in Turkey?
An