In [None]:
import datasets
from datasets import load_from_disk, Dataset
from typing import *
def get_react_data(data_dir:str, prompt_prefix:str, cache_dir:Optional[str]=None, num_proc:int=4) -> Dataset:
    """Load the stack-exchange-paired dataset from Hugging Face and convert it to the necessary format.

    The dataset is converted to a dictionary with the following structure:
    {
        'prompt': List[str],
        'inference': List[str]
    }

    Prompts are structured as follows:
      "Question: " + <prompt> + "\n\nAnswer: "
    """
    dataset = load_from_disk(data_dir)
    
    original_columns = dataset.column_names

    def return_prompt_and_responses(samples) -> Dict[str, str]:
        return {
            "prompt": [prompt_prefix + question + "\n\n" for question in samples["failure"]],
            "inference": samples["inference"]
        }

    return dataset.map(
        return_prompt_and_responses,
        batched=True,
        num_proc=num_proc,
        remove_columns=original_columns,
    )