In [1]:
!python --version

Python 3.10.12


In [None]:
MODEL_NAME = 'Qwen/Qwen2.5-0.5B-Instruct-AWQ'
TASK = 'text-generation'

ITER_START = 6
ITER_TRIALS = 5

# activate when feedback_results is saved in current iter
SKIP_FEEDBACK = False
# running test
RUN_LIMITED_TEST = False

FILEDIR_PREFIX = "/kaggle/working/qwen2-0.5B/iter_"
DATASET_FILEPATH_PREFIX = "/kaggle/input/flan-t5/qwen2-0.5B/iter_"

HF_TOKEN = "HF_TOKEN" # input huggingface token

# Load Dependencies

In [3]:
from langchain_core.prompts import PromptTemplate
from tqdm import tqdm  # For progress bars

In [4]:
!pip install -qU transformers
!pip install -qU accelerate
!pip install -qU bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m75.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m481.4/481.4 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m354.7/354.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
!pip install autoawq

Collecting autoawq
  Downloading autoawq-0.2.8.tar.gz (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.6/71.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting triton (from autoawq)
  Downloading triton-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (1.5 kB)
Collecting transformers<=4.47.1,>=4.45.0 (from autoawq)
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting zstandard (from autoawq)
  Downloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Downloading transformers-4.47.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m72.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading triton-3.3.0-cp310-cp310

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, pipeline, BitsAndBytesConfig
from transformers.pipelines.pt_utils import KeyDataset
import torch
import numpy as np
import os
import math
from datasets import concatenate_datasets, Dataset
from tqdm import tqdm
import time

In [7]:
# setting env vars
set_seed(1234)
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
import os

os.environ["HF_TOKEN"] = HF_TOKEN

In [9]:
!huggingface-cli login --token $HF_TOKEN

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
The token `Auth` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


# Prep Functions

In [10]:
def query_model(data, column_to_prompt, num_new_tokens = 200, batch_size=40):
    print(f"...processing in batches of {batch_size}")
    responses = []
    
    for i in range(0, len(data[column_to_prompt]), batch_size):
        batch = data[column_to_prompt][i:i+batch_size]
        # print(f"...processing batch starting with #{i}. Number of examples: {len(batch)}")

        formatted_input = [tokenizer.apply_chat_template(
                                convo,
                                tokenize=False,
                                add_generation_prompt=True
                            ) for convo in batch
                          ]
        
        model_inputs = tokenizer(formatted_input, padding=True, truncation=True, return_tensors="pt").to(model.device)
    
        with torch.no_grad():
            generated_ids = model.generate(
                **model_inputs,
                max_new_tokens=num_new_tokens,
            )
        
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        
        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        responses += response
    
        # cleaning up memory
        del model_inputs
        del generated_ids
        torch.cuda.empty_cache()

    return responses

In [11]:
def append_to_all_answers(example):
    example['all_answers'].append(example['predicted_answer'])

    return example

## Feedback functions

In [12]:
def generate_feedback_prompts(example, template, instruction, fs):
    prompt = []

    prompt.append({"role": "system",
                  "content": instruction})
    
    for fs_example in fs:
        prompt.append({"role": "user",
                      "content": (template.invoke(fs_example).text)})
        prompt.append({"role": "assistant",
                      "content": fs_example['predicted_answer_feedback']})
    
    prompt.append({"role": "user",
                  "content": (template.invoke(dict(example)).text)})
     
    example['feedback_prompt'] = prompt

    return example

In [13]:
def prompt_for_feedback(ds):
    start = time.time()
    output_100 = query_model(ds.select(range(100)), "feedback_prompt", 200)
    end = time.time()
    print(f"Processing 100 records took {end-start} secs. Estimated completion: {(len(ds) / 100)*(end-start)} secs [{((len(ds) / 100)*(end-start)) / 60} mins]")
    output_next = query_model(ds.select(range(100, len(ds))), "feedback_prompt", 200)
    print('Done. Combining outputs')
    output = output_100 + output_next
    # cleaned_output = [out[0]['generated_text'][2]['content'].strip() for out in output]
    cleaned_output = [out.strip() for out in output]

    if 'predicted_answer_feedback' in ds.column_names:
        ds = ds.remove_columns(['predicted_answer_feedback'])

    ds = ds.add_column('predicted_answer_feedback', cleaned_output)
    return ds

In [14]:
def generate_feedback(dataset, feedback_instruction, feedback_prompt_template, feedback_fs, filedir_prefix_saveto):
    # generating feedback prompts
    dataset['train'] = dataset['train'].map(generate_feedback_prompts, 
                                            fn_kwargs={'template':feedback_prompt_template,
                                                       'instruction': feedback_instruction,
                                                       'fs': feedback_fs})
    dataset['train'].to_parquet(f"{filedir_prefix_saveto}feedback_prompts.parquet")
    # obtaining feedback
    dataset['train'] = prompt_for_feedback(dataset['train'])

    return dataset

## Refine functions

In [15]:
def generate_refine_prompts(example, template, instruction, fs):
    prompt = []
    
    prompt.append({"role": "system",
                  "content": instruction})

    for fs_example in fs:
        prompt.append({"role": "user",
                      "content": (template.invoke(fs_example).text)})
        prompt.append({"role": "assistant",
                      "content": fs_example['refined_answer']})
    
    prompt.append({"role": "user",
                  "content": (template.invoke(dict(example)).text)})
     
    example['refine_prompt'] = prompt

    return example

In [16]:
def prompt_for_refinement(ds):
    start = time.time()
    output_100 = query_model(ds.select(range(100)), "refine_prompt", 50)
    end = time.time()
    print(f"Processing 100 records took {end-start} secs. Estimated completion: {(len(ds) / 100)*(end-start)} secs [{((len(ds) / 100)*(end-start)) / 60} mins]")
    output_next = query_model(ds.select(range(100, len(ds))), "refine_prompt", 50)
    print('Done. Combining outputs')
    output = output_100 + output_next
    # cleaned_output = [out[0]['generated_text'][2]['content'].strip() for out in output]
    cleaned_output = [out.strip() for out in output]

    if 'predicted_answer' in ds.column_names:
        ds = ds.remove_columns(['predicted_answer'])

    ds = ds.add_column('predicted_answer', cleaned_output)
    
    return ds

In [17]:
def generate_refined(dataset, refine_instruction, refine_prompt_template, refine_fs, filedir_prefix_saveto):
    # generating refine prompts
    dataset['train'] = dataset['train'].map(generate_refine_prompts, 
                                            fn_kwargs={'template':refine_prompt_template,
                                                      'instruction': refine_instruction,
                                                       'fs': refine_fs})
    dataset['train'].to_parquet(f"{filedir_prefix_saveto}refine_prompts.parquet")
    # obtaining refined prompts
    dataset['train'] = prompt_for_refinement(dataset['train'])
    return dataset

# Load Dataset

In [18]:
from datasets import load_dataset

In [19]:
static_fs = [
    {
        'context': "Dominic's education at Palencia gave him the knowledge he needed to overcome the Manicheans. With charity, the other concept that most defines the work and spirituality of the order, study became the method most used by the Dominicans in working to defend the Church against the perils that hounded it, and also of enlarging its authority over larger areas of the known world. In Dominic's thinking, it was impossible for men to preach what they did not or could not understand. When the brethren left Prouille, then, to begin their apostolic work, Dominic sent Matthew of Paris to establish a school near the University of Paris. This was the first of many Dominican schools established by the brethren, some near large universities throughout Europe.",
        'question': "What method was not used by the Dominicans in working to defend the Church against peril?",
        'predicted_answer': "Charity",
        'predicted_answer_feedback': """To assess the quality of the answer, we assign a score to each trait:
1. Relevancy - how well does the answer address the question: Very well. Charity is a method that feasibly could have been used to defend the Church against peril. 10/10
2. Consistency - does the answer match what is said in the context: No, the context explicitly states that charity and study were both used by the Dominicans in working to defend the Church against peril. The answer contradicts the context. 0/10
3. Brevity - does the answer only include the answer and nothing else: Yes, the answer is concise and to the point and only includes the answer to the question. 10/10""",
        'refined_answer': "Cannot be determined."
    },
#     {
#         'context': "In Canada, the traditional ceremony for granting assent in parliament was regularly used until the 21st century, long after it had been discontinued in the United Kingdom and other Commonwealth realms. One result, conceived as part of a string of royal duties intended to demonstrate Canada's status as an independent kingdom, was that King George VI personally assented to nine bills of the Canadian parliament during the 1939 royal tour of Canada\u201485 years after his great-grandmother, Queen Victoria, had last granted royal assent personally in the United Kingdom. Under the Royal Assent Act 2002, however, the alternative practice of granting assent in writing, with each house being notified separately ( the Speaker of the Senate or a representative reads to the senators the letters from the governor general regarding the written declaration of Royal Assent), was brought into force. As the act also provides, royal assent is to be signified\u2014by the governor general, or, more often, by a deputy, usually a Justice of the Supreme Court, at least twice each calendar year: for the first appropriation measure and for at least one other act, usually the first non-appropriation measure passed. However, the act provides that a grant of royal assent is not rendered invalid by a failure to employ the traditional ceremony where required.",
#         'question': "When did Canada finally cease to use the traditional ceremony for granting assent as regular practice?",
#         'predicted_answer': "21st century",
#         'predicted_answer_feedback': """To assess the quality of the answer, we assign a score to each trait:
# 1. Relevancy - how well does the answer address the question: Very well. The answer provides a date which would answer the question for when did Canada finally cease to use the traditional ceremony for granting assent as regular practice. 10/10
# 2. Consistency - does the answer match what is said in the context: Yes, the context explicitly states that the traditional ceremony was regularly used until the 21st century. 10/10
# 3. Brevity - does the answer only include the answer and nothing else: Yes, the answer is concise and to the point and only includes the answer to the question. 10/10""",
#         'refined_answer': "21st century"
#     },
    {
        'context': "Scholars in the United Kingdom and the United States developed somewhat different versions of cultural studies after the late 1970s. The British version of cultural studies had originated in the 1950s and 1960s, mainly under the influence first of Richard Hoggart, E. P. Thompson, and Raymond Williams, and later that of Stuart Hall and others at the Centre for Contemporary Cultural Studies at the University of Birmingham. This included overtly political, left-wing views, and criticisms of popular culture as \"capitalist\" mass culture; it absorbed some of the ideas of the Frankfurt School critique of the \"culture industry\" (i.e. mass culture). This emerges in the writings of early British cultural-studies scholars and their influences: see the work of (for example) Raymond Williams, Stuart Hall, Paul Willis, and Paul Gilroy.",
        'question': "When did the British version of cultural studies emerge?",
        'predicted_answer': "The British version of cultural studies emerged in the 1950s and 1960s.",
        'predicted_answer_feedback': """To assess the quality of the answer, we assign a score to each trait:
1. Relevancy - how well does the answer address the question: Very well. The answer provides dates which would answer the question for when the British version of cultural studies emerged. 10/10
2. Consistency - does the answer match what is said in the context: Yes, the context explicitly states that those "cultural studies had originated in the 1950s and 1960s". 10/10
3. Brevity - does the answer only include the answer and nothing else: No, the answer includes the response to the question and a part of the question. The latter part can be omitted for brevity. 5/10""",
        'refined_answer': "1950s and 1960s"
    },
    {
        'context': "The very large and ornate School Hall and School Library (by L K. Hall) were erected in 1906-8 across the road from Upper School as the school's memorial to the Etonians who had died in the Boer War. Many tablets in the cloisters and chapel commemorate the large number of dead Etonians of the Great War. A bomb destroyed part of Upper School in World War Two and blew out many windows in the Chapel. The college commissioned replacements by Evie Hone (1949–52) and by John Piper and Patrick Reyntiens (1959 onwards).",
        'question': "Who was one of the prominent Etonians who died in the Great War?",
        'predicted_answer': "Etonians who die in the Great War",
        'predicted_answer_feedback': """To assess the quality of the answer, we assign a score to each trait:
1. Relevancy - how well does the answer address the question: Not well. The answer repeats the question without providing a clear response. Etonians who die in the Great War cannot be one of the prominent Etonians who died in the Great War. 0/10
2. Consistency - does the answer match what is said in the context: No. The context has no mention of the prominent Etonians who died in the Great War, only that Etonians died in the Great War. 0/10
3. Brevity - does the answer only include the answer and nothing else: It says Etonians who die in the Great War are one of the prominent Etonians who died in the Great War, which is redundant. The entirety of the answer can be omitted. 0/10""",
        'refined_answer': "Cannot be determined."
    },
    
]

In [20]:
feedback_instruction = """We want to iteratively improve the provided answers. To help improve, score each answer on a scale of 1 to 10 on the traits:
1) Relevancy - how well does the answer address the question. 10 out of 10 means the answer directly responds to the question. 5 out of 10 means the answer partially responds to the question but not entirely. 0 out of 10 means the answer does not respond to the question.
2) Consistency - does the answer match what is said in the context. 10 out of 10 means the answer of the question is explicitly stated in the context. 5 out of 10 means the answer to the question is not supported or proven by the context. 0 out of 10 means the answer to the question contradicts what is written in the context. 
3) Brevity - does the answer only include the answer and nothing else. 10 out of 10 means the answer only contains the response to the question. 5 out of 10 means the answer contains the response to the question and a portion of the question. 0 out of 10 means the answer contains unnecessarily details that can be omitted.

Note that if the answer to the question cannot be determined from the context, the answer should say 'Cannot be determined'."""

feedback_template = """Context:
{context}

Question:
{question}

Answer:
{predicted_answer}

Scores:
"""

feedback_prompt_template = PromptTemplate.from_template(feedback_template)

In [21]:
refine_instruction = "Use the feedback to improve the answer to the question in such a way that obtains a perfect score for each evaluated trait. Output only the refined answer and nothing else. If the answer achieved a perfect score, simply return the initial answer. Note that if the answer cannot be determined from the context, only say 'Cannot be determined' and nothing else."

refine_template = """Context:
{context}

Question:
{question}

Answer:
{predicted_answer}

Feedback:
{predicted_answer_feedback}

Refined Answer:
"""

refine_prompt_template = PromptTemplate.from_template(refine_template)

In [22]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16, 
    bnb_4bit_quant_type="nf4",             
    bnb_4bit_use_double_quant=True,       
)

In [23]:
print(f"LOADING IN MODEL: {MODEL_NAME}")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    # quantization_config=quantization_config,
    torch_dtype=torch.float16,
    cache_dir="/kaggle/working/cache",
    device_map="auto",
    # use_sliding_window=False,
    # max_memory={"cuda:0": "16GB", "cuda:1": "16GB"},
    trust_remote_code=True
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token_id = 128001
tokenizer.padding_side='left'

LOADING IN MODEL: Qwen/Qwen2.5-0.5B-Instruct-AWQ


config.json:   0%|          | 0.00/837 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/731M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [24]:
print(f"STARTING PIPELINE")
# generator = pipeline(TASK, model=model, tokenizer = tokenizer)

STARTING PIPELINE


In [25]:
for iter_i in range(ITER_START, ITER_START + ITER_TRIALS):
    print(f"RUNNING ITERATION {iter_i}")
    if iter_i == ITER_START:
        # loading appropriate dataset from trial 0
        dataset_filepath = f"{DATASET_FILEPATH_PREFIX}{iter_i-1}/results.parquet"
    else:
        dataset_filepath = f"{FILEDIR_PREFIX}{iter_i-1}/results.parquet"
    
    dataset = load_dataset("parquet", data_files=dataset_filepath)

    if RUN_LIMITED_TEST:
        dataset['train'] = dataset['train'].select(range(150))
        print("Running a test: computing 150 examples.")
        
    filedir_prefix_saveto = f"{FILEDIR_PREFIX}{iter_i}/"
    # feedback
    print("- Generating feedback...")
    if iter_i == ITER_START and SKIP_FEEDBACK:
        dataset = load_dataset("parquet", data_files=f"{DATASET_FILEPATH_PREFIX}{iter_i}/results_feedback.parquet")
        print('SKIPPED: Retrieved feedback responses from dataset.')
    else:
        dataset = generate_feedback(dataset,
                                    feedback_instruction,
                                    feedback_prompt_template,
                                    static_fs,
                                    filedir_prefix_saveto)
        
        dataset['train'].to_parquet(f"{filedir_prefix_saveto}results_feedback.parquet")
    # refine
    print("- Refining answer...")
    dataset = generate_refined(dataset,
                              refine_instruction,
                              refine_prompt_template,
                              static_fs,
                              filedir_prefix_saveto)
    
    print("- Adding to all answers...")
    dataset["train"] = dataset["train"].map(append_to_all_answers)
    dataset['train'].to_parquet(f"{filedir_prefix_saveto}results.parquet")
    
    print('Done.')

RUNNING ITERATION 6


Generating train split: 0 examples [00:00, ? examples/s]

- Generating feedback...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 73.1451187133789 secs. Estimated completion: 3657.2559356689453 secs [60.95426559448242 mins]
...processing in batches of 40
Done. Combining outputs


Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

- Refining answer...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 19.00028705596924 secs. Estimated completion: 950.0143527984619 secs [15.833572546641031 mins]
...processing in batches of 40
Done. Combining outputs
- Adding to all answers...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

Done.
RUNNING ITERATION 7


Generating train split: 0 examples [00:00, ? examples/s]

- Generating feedback...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 73.2113311290741 secs. Estimated completion: 3660.566556453705 secs [61.009442607561745 mins]
...processing in batches of 40
Done. Combining outputs


Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

- Refining answer...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 18.01622748374939 secs. Estimated completion: 900.8113741874695 secs [15.01352290312449 mins]
...processing in batches of 40
Done. Combining outputs
- Adding to all answers...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

Done.
RUNNING ITERATION 8


Generating train split: 0 examples [00:00, ? examples/s]

- Generating feedback...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 73.60628414154053 secs. Estimated completion: 3680.3142070770264 secs [61.33857011795044 mins]
...processing in batches of 40
Done. Combining outputs


Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

- Refining answer...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 19.226601600646973 secs. Estimated completion: 961.3300800323486 secs [16.022168000539143 mins]
...processing in batches of 40
Done. Combining outputs
- Adding to all answers...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

Done.
RUNNING ITERATION 9


Generating train split: 0 examples [00:00, ? examples/s]

- Generating feedback...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 71.8057062625885 secs. Estimated completion: 3590.285313129425 secs [59.83808855215708 mins]
...processing in batches of 40
Done. Combining outputs


Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

- Refining answer...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 22.834271669387817 secs. Estimated completion: 1141.7135834693909 secs [19.02855972448985 mins]
...processing in batches of 40
Done. Combining outputs
- Adding to all answers...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

Done.
RUNNING ITERATION 10


Generating train split: 0 examples [00:00, ? examples/s]

- Generating feedback...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 73.05705785751343 secs. Estimated completion: 3652.8528928756714 secs [60.880881547927856 mins]
...processing in batches of 40
Done. Combining outputs


Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

- Refining answer...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

...processing in batches of 40
Processing 100 records took 20.50690770149231 secs. Estimated completion: 1025.3453850746155 secs [17.08908975124359 mins]
...processing in batches of 40
Done. Combining outputs
- Adding to all answers...


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

Done.
