In [1]:
import sys
import os
import json

from langchain_community.llms import DeepInfra
import datasets

# Add the project root directory to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../../..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.prompts.llama_prompts import MathQAPrompt, ContextualQAPrompt
from src.utils.data_generation import generate_responses_concurrently_api, nested_split_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
PROC_NUM = 4 # number of processes to use for data generation
DATA_SPLIT = "train" # "train" or "test"

### Model setup

In [3]:
model_name = "Meta-Llama-3.1-8B-Instruct"
llm = DeepInfra(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct")
llm.model_kwargs = {
    "temperature": 0.7,
    "max_new_tokens": 512,
}

### UMWP

In [4]:
# divide into at least 4 parts
num_major_chunks = 8
dataset_name = "UMWP"
path = f"../../../dataset/raw_data/{dataset_name}/{DATA_SPLIT}.json"

data = datasets.load_dataset("json", data_files=path)
data = data["train"]

# data = data.shuffle(seed=42).select(range(100)) # take smaller sample for testing
nested_data_split = nested_split_dataset(data, num_major_chunks=num_major_chunks, num_minor_chunks=PROC_NUM)


Generating train split: 4420 examples [00:00, 402877.77 examples/s]


In [5]:
response_dict = {
    "task_info": {
        "type": "QA",
        "dataset": dataset_name,
    },
    "additional_info": {
        "model": model_name,
        "domain": "Math"
    }
}

In [6]:
def process_data_chunk(data_chunk: dict) -> tuple[list[dict], list[dict]]:
    model_input = []
    additional_info = []
    for i in range(len(data_chunk["question"])):
        model_input.append({
            "query": data_chunk["question"][i]
        })
        additional_info.append({
            "question": data_chunk["question"][i],
            "answer": data_chunk["answer"][i],
            "answerable": data_chunk["answerable"][i],
            "source": data_chunk["source"][i]
        })
    return model_input, additional_info

In [None]:
final_results = []
for i, data_chunks in enumerate(nested_data_split):
        print(f"Processing chunk {i+1} of {len(nested_data_split)}")
        all_results = await generate_responses_concurrently_api(
                llm=llm,
                prompt_class=MathQAPrompt,
                data_chunks=data_chunks,
                response_dict_format=response_dict,
                data_processing_function=process_data_chunk,
                prompt_repetitions=10,
        )
        final_results.extend(all_results)

Processing chunk 1 of 8


[32m2025-08-04 13:58:59.894[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 45 in chunk: DeepInfra Server: Rate limit exceeded[0m
[32m2025-08-04 13:59:01.222[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 44 in chunk: DeepInfra Server: Rate limit exceeded[0m
[32m2025-08-04 14:00:56.929[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 46 in chunk: DeepInfra Server: Rate limit exceeded[0m
[32m2025-08-04 14:01:00.868[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 45 in chunk: DeepInfra Server: Rate limit exceeded[0m


Processing chunk 2 of 8


[32m2025-08-04 14:57:49.710[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 75 in chunk: [0m
[32m2025-08-04 14:57:56.711[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 75 in chunk: [0m


Processing chunk 3 of 8
Processing chunk 4 of 8
Processing chunk 5 of 8
Processing chunk 6 of 8
Processing chunk 7 of 8


[32m2025-08-04 18:24:02.439[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 38 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [502 Bad Gateway]>
<CIMultiDictProxy('Server': 'nginx/1.24.0 (Ubuntu)', 'Date': 'Mon, 04 Aug 2025 15:24:02 GMT', 'Content-Type': 'text/html', 'Content-Length': '166', 'Connection': 'keep-alive')>
>[0m


Processing chunk 8 of 8


In [8]:
final_results[:5]

[{'task_info': {'type': 'QA', 'dataset': 'UMWP'},
  'additional_info': {'model': 'Meta-Llama-3.1-8B-Instruct',
   'domain': 'Math',
   'question': " A waiter at 'The Greasy Spoon' restaurant had 39 customers to wait on. During the lunch rush he added another 12 customers. If 49 of the customers didn't leave him a tip, how many customers did leave a tip? ",
   'answer': [2.0],
   'answerable': True,
   'source': 'MultiArith'},
  'input': "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a meticulous AI mathematician. Your task is to solve the following math problem.\n\nFollow these steps carefully:\n1. **Analyze the problem:** First, understand the given information and what is being asked.\n2. **Assess solvability:** Determine if the problem is solvable. A problem might be unsolvable if it's illogical, contains contradictions, or lacks necessary information.\n3. **Solve or Explain:**\n   - **If solvable:** Provide a step-by-step solution, showing all your reasonin

In [None]:
with open(f"../../../dataset/raw_model_responses/{DATA_SPLIT}/{DATA_SPLIT}_{dataset_name}.json", "w") as f:
    json.dump(final_results, f, indent=4)

### SQUAD

In [None]:
# divide into at least 8 parts
num_major_chunks = 8
dataset_name = "rajpurkar_squad"
path = f"../../../dataset/raw_data/{dataset_name}/{DATA_SPLIT}.parquet"

data = datasets.load_dataset("parquet", data_files=path)
data = data["train"]

# data = data.shuffle(seed=42).select(range(100)) # take smaller sample for testing
nested_data_split = nested_split_dataset(data, num_major_chunks=num_major_chunks, num_minor_chunks=PROC_NUM)

In [5]:
response_dict = {
    "task_info": {
        "type": "Contextual QA",
        "dataset": dataset_name,
    },
    "additional_info": {
        "model": model_name,
    }
}

In [6]:
def process_data_chunk(data_chunk: dict) -> tuple[list[dict], list[dict]]:
    model_input = []
    additional_info = []
    for i in range(len(data_chunk["question"])):
        model_input.append({
            "query": data_chunk["question"][i],
            "context": data_chunk["context"][i]
        })
        additional_info.append({
            "question": data_chunk["question"][i],
            "context": data_chunk["context"][i],
            "answer": data_chunk["answers"][i]["text"],
            "title": data_chunk["title"][i],
        })
    return model_input, additional_info

In [None]:
final_results = []
for i, data_chunks in enumerate(nested_data_split):
        print(f"Processing chunk {i+1} of {len(nested_data_split)}")
        all_results = await generate_responses_concurrently_api(
                llm=llm,
                prompt_class=ContextualQAPrompt,
                data_chunks=data_chunks,
                response_dict_format=response_dict,
                data_processing_function=process_data_chunk,
                prompt_repetitions=10,
        )

        final_results.extend(all_results)

Processing chunk 1 of 8


[32m2025-08-05 07:46:27.781[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 460 in chunk: [Errno 60] Operation timed out[0m
[32m2025-08-05 07:46:47.294[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 460 in chunk: [0m
[32m2025-08-05 08:47:22.372[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 776 in chunk: [0m
[32m2025-08-05 08:47:25.368[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 794 in chunk: [0m
[32m2025-08-05 09:38:56.378[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 1586 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientR

Processing chunk 2 of 8


[32m2025-08-05 12:43:35.077[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 1566 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 09:43:35 GMT', 'Content-Type': 'application/json', 'Content-Length': '28', 'Connection': 'keep-alive', 'Server': 'uvicorn', 'x-robots-tag': 'noindex')>
>[0m
[32m2025-08-05 13:23:08.849[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 2204 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 10:23:08 GMT', 'Content-Type': '

Processing chunk 3 of 8


[32m2025-08-05 14:21:14.345[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 277 in chunk: Cannot connect to host api.deepinfra.com:443 ssl:default [nodename nor servname provided, or not known][0m
[32m2025-08-05 15:16:17.669[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 1116 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 12:16:17 GMT', 'Content-Type': 'application/json', 'Content-Length': '28', 'Connection': 'keep-alive', 'Server': 'uvicorn', 'x-robots-tag': 'noindex')>
>[0m
[32m2025-08-05 15:16:17.675[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError process

Processing chunk 4 of 8


[32m2025-08-05 18:57:35.595[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 1406 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 15:57:35 GMT', 'Content-Type': 'application/json', 'Content-Length': '28', 'Connection': 'keep-alive', 'Server': 'uvicorn', 'x-robots-tag': 'noindex')>
>[0m
[32m2025-08-05 18:57:35.643[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 1399 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 15:57:35 GMT', 'Content-Type': '

Processing chunk 5 of 8
Processing chunk 6 of 8


[32m2025-08-06 00:14:43.325[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 160 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 21:14:43 GMT', 'Content-Type': 'application/json', 'Content-Length': '28', 'Connection': 'keep-alive', 'Server': 'uvicorn', 'x-robots-tag': 'noindex')>
>[0m
[32m2025-08-06 00:14:43.347[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 160 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Tue, 05 Aug 2025 21:14:43 GMT', 'Content-Type': 'ap

Processing chunk 7 of 8


[32m2025-08-06 03:43:28.147[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 410 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Wed, 06 Aug 2025 00:43:28 GMT', 'Content-Type': 'application/json', 'Content-Length': '35', 'Connection': 'keep-alive', 'Server': 'uvicorn')>
>[0m
[32m2025-08-06 05:42:05.748[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 2192 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Wed, 06 Aug 2025 02:42:05 GMT', 'Content-Type': 'application/json', 'Content-

Processing chunk 8 of 8


[32m2025-08-06 06:42:07.748[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 238 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Wed, 06 Aug 2025 03:42:07 GMT', 'Content-Type': 'application/json', 'Content-Length': '28', 'Connection': 'keep-alive', 'Server': 'uvicorn', 'x-robots-tag': 'noindex')>
>[0m
[32m2025-08-06 06:42:08.095[0m | [31m[1mERROR   [0m | [36msrc.utils.data_generation[0m:[36mprocess_item[0m:[36m270[0m - [31m[1mError processing item 242 in chunk: DeepInfra Server: Error <bound method ClientResponse.text of <ClientResponse(https://api.deepinfra.com/v1/inference/meta-llama/Meta-Llama-3.1-8B-Instruct) [500 Internal Server Error]>
<CIMultiDictProxy('Date': 'Wed, 06 Aug 2025 03:42:08 GMT', 'Content-Type': 'ap

In [8]:
final_results[:5]

[{'task_info': {'type': 'Contextual QA', 'dataset': 'rajpurkar_squad'},
  'additional_info': {'model': 'Meta-Llama-3.1-8B-Instruct',
   'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
   'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
   'answer': ['Saint Bernadette Soubirous'],
   'title': 'Unive

In [None]:
with open(f"../../../dataset/raw_model_responses/{DATA_SPLIT}/{DATA_SPLIT}_{dataset_name}.json", "w") as f:
    json.dump(final_results, f, indent=4)