In [6]:
import ollama

In [7]:
def classify_english_question(english_question):
    prompt = f"""You are given a question in English. Your task is to classify it as either SINGLEHOP or MULTIHOP.

Definitions:
- A question is **SINGLEHOP** if it can be answered using a single fact, sentence, or document. The question may be long, but if it doesn't require combining or reasoning over multiple distinct pieces of information, it's SINGLEHOP.
- A question is **MULTIHOP** if answering it requires combining multiple facts, reasoning over several steps, or connecting pieces of information from different sources.

⚠️ Important:
- A question's length does not determine its type. A long, descriptive question can still be SINGLEHOP.
- MULTIHOP questions typically require you to first find one piece of information, then use that to find the next.

Examples:

🔹 Example 1 (SINGLEHOP - factual entity lookup):
Question: "Who wrote Pride and Prejudice?"
Answer: SINGLEHOP

🔹 Example 2 (SINGLEHOP - simple date):
Question: "When did the Berlin Wall fall?"
Answer: SINGLEHOP

🔹 Example 3 (SINGLEHOP - location fact):
Question: "Where is the Eiffel Tower located?"
Answer: SINGLEHOP

🔹 Example 4 (SINGLEHOP - biography-style):
Question: "What is the profession of Elon Musk?"
Answer: SINGLEHOP

🔹 Example 5 (SINGLEHOP - short explanation):
Question: "What is photosynthesis?"
Answer: SINGLEHOP

🔸 Example 6 (MULTIHOP - indirect relationship):
Question: "Which country was ruled by the emperor who built the Taj Mahal?"
Answer: MULTIHOP

🔸 Example 7 (MULTIHOP - event inference):
Question: "Which city hosted the Olympics where Michael Phelps won 8 gold medals?"
Answer: MULTIHOP

🔸 Example 8 (MULTIHOP - entity resolution):
Question: "Who is the father of the current King of the United Kingdom?"
Answer: MULTIHOP

🔸 Example 9 (MULTIHOP - causal historical link):
Question: "What war led to the independence of the United States?"
Answer: MULTIHOP

🔸 Example 10 (MULTIHOP - layered facts):
Question: "Which university did the author of 'The Selfish Gene' attend, and what subject did he study there?"
Answer: MULTIHOP

---

Now read the following English question and classify it:

Question: "{english_question}"
Answer:"""

    try:
        response = ollama.generate(
            model='llama3:8b',
            prompt=prompt
        )
        reply = response['response'].strip().lower()

        # Normalize and return classification
        if 'multihop' in reply:
            return 'multihop'
        elif 'singlehop' in reply or 'simple' in reply:
            return 'singlehop'

        print(f"⚠️ Unexpected response: {reply}")
        return "unknown"

    except Exception as e:
        print(f"❌ Error processing question: {english_question}\n↪ {e}")
        return "error"


In [8]:
def decompose_english_query(english_query: str) -> dict:
    """Returns dictionary with q1 and q2 keys containing sub-questions"""
    refined_prompt = f"""
You are an expert in breaking down complex English questions into two logically ordered sub-questions.

Rules:
- Extract exactly 2 sub-questions.
- q1 should logically precede q2.
- Use clear and grammatically correct English.
- Output only in this format:
  q1: [First sub-question]
  q2: [Second sub-question]

Input: {english_query}
Output:
"""

    try:
        response = ollama.generate(
            model='llama3:8b',
            prompt=refined_prompt,
            options={
                'temperature': 0.5,
                'num_ctx': 2048  # Now you have more room for longer inputs
            }
        )

        output = response['response'].strip()

        result = {}
        for line in output.split('\n'):
            line = line.strip()
            if line.startswith('q1:'):
                result['q1'] = line[3:].strip()
            elif line.startswith('q2:'):
                result['q2'] = line[3:].strip()

        return result if len(result) == 2 else {}

    except Exception as e:
        print(f"Decomposition error: {str(e)}")
        return {}


In [9]:
def query_context_relevance_check_en(query: str, context: str) -> bool:
    prompt = f"""
You are a binary classifier.

Your task is to decide whether the following *context* is relevant to the *question*. You must answer ONLY with **True** or **False** — no explanation, no commentary, just one word: True or False.

Criteria:
- If the context helps answer the question directly or indirectly, reply: True
- If the context is unrelated, confusing, or insufficient, reply: False

IMPORTANT:
- Do NOT explain your answer.
- Do NOT include any additional comments.
- Just respond with: True or False

---

Question: {query}

Context: {context}

Answer (True/False):
"""

    try:
        import ollama
        response = ollama.chat(
            model="llama3:8b",
            messages=[{"role": "user", "content": prompt}]
        )
        answer = response['message']['content'].strip().lower()
        return answer == 'true'
    except Exception as e:
        print(f"Error during relevance check: {e}")
        return False


In [10]:
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import torch

def load_retriever(
    index_path: str,
    chunks_path: str
):
    # Initialize device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Load SentenceTransformer MiniLM model (lighter than intfloat/e5-large)
    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)
    
    # You can optionally configure max length
    model.max_seq_length = 512  # if needed for long sentences
    model.tokenizer.do_lower_case = False  # Keep for Urdu if using custom tokenizer

    # Load FAISS index
    index = faiss.read_index(index_path)
    
    # Load stored chunks
    with open(chunks_path, "rb") as f:
        chunks_list = pickle.load(f)
    
    return model, index, chunks_list, device


  from .autonotebook import tqdm as notebook_tqdm


In [12]:
model, index, chunks_list, device = load_retriever(
    index_path="../../vector_db/paragraphs/5884_paras/5884_paras_index.faiss",
    chunks_path="../../data_storage/Paragraph_chunks/5884_paragraphs/5884_parachunks.pkl"
)

In [13]:
def retrieve_documents(query, k=3):
    # Encode the query using MiniLM model
    query_embedding = model.encode([query])
    
    # Search the FAISS index
    _, indices = index.search(query_embedding, k)
    
    # Return the top-k retrieved chunks
    return [chunks_list[i] for i in indices[0]]

In [14]:
def generate_using_llama3(context, query):
    prompt = f"""You are a helpful assistant designed to generate precise and informative answers based strictly on the given context.

Query:
{query}

Retrieved Context:
{context}

Instruction:
Answer the query using only the information present in the retrieved context. If the answer is not directly stated, make the best possible inference from the available context. Do not say "no information available", "cannot answer", or provide disclaimers. Only return a clear and direct answer — no introductions, no explanations, and no repetition of the query."""


    try:
        response = ollama.generate(
            model='llama3:8b',
            prompt=prompt
        )
        return response['response'].strip()
    except Exception as e:
        print("Error during generation:", e)
        return "Error generating answer."


In [15]:
def get_context_of_multihop_without_parallel(query,model=model,index=index,chunks_list=chunks_list,k=3):


    classification = classify_english_question(query)


    if classification == "singlehop":
        retrieved_context = retrieve_documents(query,k)
        return retrieved_context
        

    if classification == "multihop":
        decomposition = decompose_english_query(query)
        q1 = decomposition.get("q1", "")
        q2 = decomposition.get("q2", "")

        main_context = retrieve_documents(q1, k)

        for i in range(min(len(main_context), k)):
            intermediate_ctx = main_context[i]
            
            combined_query = q1 + intermediate_ctx + q2
            
            second_hop_contexts = retrieve_documents(combined_query, k)
            
            for ctx in second_hop_contexts:
                if query_context_relevance_check_en(query, ctx):
                    main_context.append(ctx)
        
        return main_context    
 


In [16]:

import time
from concurrent.futures import ThreadPoolExecutor, as_completed

def expand_multihop_context(intermediate_ctx, query, q1, q2, k):
    try:
        combined_query = q1 + intermediate_ctx + q2
        second_hop_contexts = retrieve_documents(combined_query, k)

        relevant_contexts = []

        with ThreadPoolExecutor() as inner_executor:
            futures = [
                inner_executor.submit(query_context_relevance_check_en, query, ctx)
                for ctx in second_hop_contexts
            ]

            for i, future in enumerate(as_completed(futures)):
                try:
                    if future.result():
                        relevant_contexts.append(second_hop_contexts[i])
                except Exception as e:
                    print("Error during relevance check:", e)

        return relevant_contexts

    except Exception as e:
        print("Error in expand_multihop_context:", e)
        return []


def get_context_of_multihop(query, type, model=model, index=index, chunks_list=chunks_list, k=3):
    # Measure classification time
    start_classification = time.time()
    classification = classify_english_question(query)
    classification_time = time.time() - start_classification

    if type == "easy":
        decomposition_time = 0.0
        start_retrieval = time.time()
        context = retrieve_documents(query, k)
        retrieval_time = time.time() - start_retrieval
        return context, classification, classification_time, decomposition_time, retrieval_time

    else:
        start_decomposition = time.time()
        decomposition = decompose_english_query(query)
        q1 = decomposition.get("q1", "")
        q2 = decomposition.get("q2", "")
        decomposition_time = time.time() - start_decomposition

        start_retrieval = time.time()
        main_context = retrieve_documents(q1, k)
        additional_contexts = []

        with ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(expand_multihop_context, ctx, query, q1, q2, k)
                for ctx in main_context[:k]
            ]

            for future in as_completed(futures):
                result = future.result()
                additional_contexts.extend(result)

        main_context.extend(additional_contexts)
        retrieval_time = time.time() - start_retrieval

        return main_context, classification, classification_time, decomposition_time, retrieval_time


In [17]:
import time

def multihop_handling_LQR(query, type, model=model, index=index, chunks_list=chunks_list, k=3):
    # Step 1: Get context and timings
    context, classification, classification_time, decomposition_time, retrieval_time = get_context_of_multihop(
        query, type, model=model, index=index, chunks_list=chunks_list, k=k
    )

    # Flatten context if it's a list of strings
    if isinstance(context, list):
        combined_context = "\n".join(context)
    else:
        combined_context = context

    # Step 2: Generate answer and measure time
    start_gen = time.time()
    final_answer = generate_using_llama3(combined_context,query)
    generation_time = time.time() - start_gen

    # Step 3: Compute total time
    total_time = classification_time + decomposition_time + retrieval_time + generation_time

    return {
        "classification": classification,
        "retrieved_context": context,
        "final_answer": final_answer,
        "timings": {
            "classification_time": classification_time,
            "decomposition_time": decomposition_time,
            "retrieval_time": retrieval_time,
            "generation_time": generation_time,
            "total_time": total_time
        }
    }


In [18]:
import pandas as pd
from tqdm import tqdm
import time

# Load your source CSV
df = pd.read_csv("../../../Dataset_code_csvs/hotpotQA/hotpotQA_dataset_versions/5884paras_598queries/English/598_QnAs.csv")

# Output CSV path
output_path = "../../results/pipeline results/5884paras_598qna/LQR_processed_results_en.csv"

# Initialize variables
results = []
batch_times = []
total_start = time.time()

# Loop over each question in the DataFrame
for idx, row in tqdm(df.iterrows(), total=len(df)):
    query = row["question"]
    answer = row["answer"]
    question_type = row["level"]

    start_time = time.time()

    try:
        # Run the pipeline
        result = multihop_handling_LQR(query, question_type)

        classification = result["classification"]
        retrieved_context = result["retrieved_context"]
        final_answer = result["final_answer"]
        timings = result["timings"]
        total_time_one = timings["total_time"]

    except Exception as e:
        print(f"Error processing query {idx}: {e}")
        classification = "Error"
        retrieved_context = "Error"
        final_answer = "Error"
        timings = {
            "classification_time": 0,
            "decomposition_time": 0,
            "retrieval_time": 0,
            "generation_time": 0,
            "total_time": 0
        }
        total_time_one = 0

    elapsed = time.time() - start_time
    batch_times.append(elapsed)

    results.append({
        "question": query,
        "answer": answer,
        "classification": classification,
        "retrieved_context": retrieved_context,
        "final_answer": final_answer,
        "classification_time": timings["classification_time"],
        "decomposition_time": timings["decomposition_time"],
        "retrieval_time": timings["retrieval_time"],
        "generation_time": timings["generation_time"],
        "total_time": timings["total_time"],
        "level": question_type
    })

    print(f"Processed record {idx+1}/{len(df)} in {elapsed:.2f} seconds.")

    # Save and report every 100 queries
    if (idx + 1) % 100 == 0:
        pd.DataFrame(results).to_csv(output_path, mode='a', header=not bool(idx), index=False, encoding="utf-8-sig")
        avg_batch_time = sum(batch_times) / len(batch_times)
        print(f"\n--- Saved batch up to record {idx+1}")
        print(f"Average time for last 100 records: {avg_batch_time:.2f} seconds\n")
        results = []
        batch_times = []

# Save any remaining results at the end
if results:
    pd.DataFrame(results).to_csv(output_path, mode='a', header=not bool(len(df) % 100), index=False, encoding="utf-8-sig")

# Final stats
total_elapsed = time.time() - total_start
avg_total_time = total_elapsed / len(df)
print(f"\n✅ All records processed.")
print(f"Total processing time: {total_elapsed:.2f} seconds.")
print(f"Average time per record: {avg_total_time:.2f} seconds.")


  0%|          | 0/598 [00:00<?, ?it/s]

  0%|          | 1/598 [00:19<3:14:38, 19.56s/it]

Processed record 1/598 in 19.56 seconds.


  0%|          | 2/598 [00:39<3:14:12, 19.55s/it]

Processed record 2/598 in 19.54 seconds.


  1%|          | 3/598 [00:52<2:46:21, 16.78s/it]

Processed record 3/598 in 13.47 seconds.


  1%|          | 4/598 [01:02<2:17:33, 13.89s/it]

Processed record 4/598 in 9.48 seconds.


  1%|          | 5/598 [01:16<2:17:48, 13.94s/it]

Processed record 5/598 in 14.03 seconds.


  1%|          | 6/598 [01:35<2:36:32, 15.87s/it]

Processed record 6/598 in 19.59 seconds.


  1%|          | 7/598 [01:53<2:43:14, 16.57s/it]

Processed record 7/598 in 18.03 seconds.


  1%|▏         | 8/598 [02:05<2:26:48, 14.93s/it]

Processed record 8/598 in 11.41 seconds.


  2%|▏         | 9/598 [02:20<2:27:39, 15.04s/it]

Processed record 9/598 in 15.27 seconds.


  2%|▏         | 10/598 [02:39<2:40:20, 16.36s/it]

Processed record 10/598 in 19.31 seconds.


  2%|▏         | 11/598 [02:53<2:32:05, 15.55s/it]

Processed record 11/598 in 13.70 seconds.


  2%|▏         | 12/598 [03:04<2:18:44, 14.21s/it]

Processed record 12/598 in 11.14 seconds.


  2%|▏         | 13/598 [03:16<2:12:46, 13.62s/it]

Processed record 13/598 in 12.27 seconds.


  2%|▏         | 14/598 [03:27<2:04:50, 12.83s/it]

Processed record 14/598 in 11.00 seconds.


  3%|▎         | 15/598 [03:48<2:27:34, 15.19s/it]

Processed record 15/598 in 20.66 seconds.


  3%|▎         | 16/598 [04:03<2:25:50, 15.04s/it]

Processed record 16/598 in 14.68 seconds.


  3%|▎         | 17/598 [04:18<2:26:55, 15.17s/it]

Processed record 17/598 in 15.49 seconds.


  3%|▎         | 18/598 [04:25<2:01:46, 12.60s/it]

Processed record 18/598 in 6.60 seconds.


  3%|▎         | 19/598 [04:42<2:13:35, 13.84s/it]

Processed record 19/598 in 16.74 seconds.


  3%|▎         | 20/598 [04:52<2:03:03, 12.77s/it]

Processed record 20/598 in 10.28 seconds.


  4%|▎         | 21/598 [05:03<1:59:09, 12.39s/it]

Processed record 21/598 in 11.49 seconds.


  4%|▎         | 22/598 [05:22<2:17:52, 14.36s/it]

Processed record 22/598 in 18.96 seconds.


  4%|▍         | 23/598 [05:35<2:12:02, 13.78s/it]

Processed record 23/598 in 12.42 seconds.


  4%|▍         | 24/598 [05:44<1:58:58, 12.44s/it]

Processed record 24/598 in 9.30 seconds.


  4%|▍         | 25/598 [05:53<1:50:12, 11.54s/it]

Processed record 25/598 in 9.45 seconds.


  4%|▍         | 26/598 [06:03<1:43:29, 10.86s/it]

Processed record 26/598 in 9.26 seconds.


  5%|▍         | 27/598 [06:12<1:39:05, 10.41s/it]

Processed record 27/598 in 9.38 seconds.


  5%|▍         | 28/598 [06:23<1:39:36, 10.49s/it]

Processed record 28/598 in 10.66 seconds.


  5%|▍         | 29/598 [06:35<1:43:26, 10.91s/it]

Processed record 29/598 in 11.89 seconds.


  5%|▌         | 30/598 [06:55<2:11:31, 13.89s/it]

Processed record 30/598 in 20.86 seconds.


  5%|▌         | 31/598 [07:06<2:02:01, 12.91s/it]

Processed record 31/598 in 10.62 seconds.


  5%|▌         | 32/598 [07:16<1:54:17, 12.12s/it]

Processed record 32/598 in 10.26 seconds.


  6%|▌         | 33/598 [07:32<2:03:09, 13.08s/it]

Processed record 33/598 in 15.33 seconds.


  6%|▌         | 34/598 [07:45<2:04:06, 13.20s/it]

Processed record 34/598 in 13.49 seconds.


  6%|▌         | 35/598 [07:56<1:56:57, 12.46s/it]

Processed record 35/598 in 10.74 seconds.


  6%|▌         | 36/598 [08:07<1:52:31, 12.01s/it]

Processed record 36/598 in 10.96 seconds.


  6%|▌         | 37/598 [08:22<2:01:25, 12.99s/it]

Processed record 37/598 in 15.26 seconds.


  6%|▋         | 38/598 [08:32<1:53:33, 12.17s/it]

Processed record 38/598 in 10.25 seconds.


  7%|▋         | 39/598 [08:44<1:50:56, 11.91s/it]

Processed record 39/598 in 11.30 seconds.


  7%|▋         | 40/598 [09:01<2:05:56, 13.54s/it]

Processed record 40/598 in 17.35 seconds.


  7%|▋         | 41/598 [09:14<2:04:58, 13.46s/it]

Processed record 41/598 in 13.27 seconds.


  7%|▋         | 42/598 [09:25<1:56:30, 12.57s/it]

Processed record 42/598 in 10.50 seconds.


  7%|▋         | 43/598 [09:46<2:19:49, 15.12s/it]

Processed record 43/598 in 21.05 seconds.


  7%|▋         | 44/598 [09:56<2:06:32, 13.71s/it]

Processed record 44/598 in 10.41 seconds.


  8%|▊         | 45/598 [10:06<1:55:05, 12.49s/it]

Processed record 45/598 in 9.65 seconds.


  8%|▊         | 46/598 [10:19<1:55:35, 12.56s/it]

Processed record 46/598 in 12.74 seconds.


  8%|▊         | 47/598 [10:28<1:45:39, 11.51s/it]

Processed record 47/598 in 9.03 seconds.


  8%|▊         | 48/598 [10:41<1:49:39, 11.96s/it]

Processed record 48/598 in 13.03 seconds.


  8%|▊         | 49/598 [10:50<1:42:33, 11.21s/it]

Processed record 49/598 in 9.45 seconds.


  8%|▊         | 50/598 [11:00<1:39:40, 10.91s/it]

Processed record 50/598 in 10.22 seconds.


  9%|▊         | 51/598 [11:11<1:38:58, 10.86s/it]

Processed record 51/598 in 10.72 seconds.


  9%|▊         | 52/598 [11:21<1:34:54, 10.43s/it]

Processed record 52/598 in 9.43 seconds.


  9%|▉         | 53/598 [11:39<1:55:49, 12.75s/it]

Processed record 53/598 in 18.17 seconds.


  9%|▉         | 54/598 [11:51<1:54:18, 12.61s/it]

Processed record 54/598 in 12.27 seconds.


  9%|▉         | 55/598 [12:03<1:51:17, 12.30s/it]

Processed record 55/598 in 11.58 seconds.


  9%|▉         | 56/598 [12:12<1:43:55, 11.50s/it]

Processed record 56/598 in 9.65 seconds.


 10%|▉         | 57/598 [12:22<1:38:06, 10.88s/it]

Processed record 57/598 in 9.42 seconds.


 10%|▉         | 58/598 [12:33<1:39:43, 11.08s/it]

Processed record 58/598 in 11.54 seconds.


 10%|▉         | 59/598 [12:42<1:33:57, 10.46s/it]

Processed record 59/598 in 9.01 seconds.


 10%|█         | 60/598 [12:57<1:46:38, 11.89s/it]

Processed record 60/598 in 15.24 seconds.


 10%|█         | 61/598 [13:08<1:43:34, 11.57s/it]

Processed record 61/598 in 10.82 seconds.


 10%|█         | 62/598 [13:25<1:57:00, 13.10s/it]

Processed record 62/598 in 16.66 seconds.


 11%|█         | 63/598 [13:36<1:51:28, 12.50s/it]

Processed record 63/598 in 11.11 seconds.


 11%|█         | 64/598 [13:48<1:48:37, 12.20s/it]

Processed record 64/598 in 11.51 seconds.


 11%|█         | 65/598 [13:59<1:46:30, 11.99s/it]

Processed record 65/598 in 11.48 seconds.


 11%|█         | 66/598 [14:11<1:47:15, 12.10s/it]

Processed record 66/598 in 12.35 seconds.


 11%|█         | 67/598 [14:21<1:40:59, 11.41s/it]

Processed record 67/598 in 9.81 seconds.


 11%|█▏        | 68/598 [14:31<1:37:03, 10.99s/it]

Processed record 68/598 in 10.00 seconds.


 12%|█▏        | 69/598 [14:41<1:33:17, 10.58s/it]

Processed record 69/598 in 9.63 seconds.


 12%|█▏        | 70/598 [14:57<1:47:05, 12.17s/it]

Processed record 70/598 in 15.87 seconds.


 12%|█▏        | 71/598 [15:08<1:43:55, 11.83s/it]

Processed record 71/598 in 11.05 seconds.


 12%|█▏        | 72/598 [15:17<1:37:34, 11.13s/it]

Processed record 72/598 in 9.49 seconds.


 12%|█▏        | 73/598 [15:32<1:46:09, 12.13s/it]

Processed record 73/598 in 14.47 seconds.


 12%|█▏        | 74/598 [15:41<1:39:40, 11.41s/it]

Processed record 74/598 in 9.74 seconds.


 13%|█▎        | 75/598 [15:54<1:43:31, 11.88s/it]

Processed record 75/598 in 12.95 seconds.


 13%|█▎        | 76/598 [16:13<2:02:00, 14.02s/it]

Processed record 76/598 in 19.04 seconds.


 13%|█▎        | 77/598 [16:24<1:52:26, 12.95s/it]

Processed record 77/598 in 10.44 seconds.


 13%|█▎        | 78/598 [16:34<1:45:38, 12.19s/it]

Processed record 78/598 in 10.41 seconds.


 13%|█▎        | 79/598 [16:45<1:42:43, 11.88s/it]

Processed record 79/598 in 11.14 seconds.


 13%|█▎        | 80/598 [16:57<1:40:28, 11.64s/it]

Processed record 80/598 in 11.08 seconds.


 14%|█▎        | 81/598 [17:08<1:38:58, 11.49s/it]

Processed record 81/598 in 11.13 seconds.


 14%|█▎        | 82/598 [17:18<1:36:57, 11.27s/it]

Processed record 82/598 in 10.78 seconds.


 14%|█▍        | 83/598 [17:27<1:29:44, 10.46s/it]

Processed record 83/598 in 8.55 seconds.


 14%|█▍        | 84/598 [17:49<2:00:32, 14.07s/it]

Processed record 84/598 in 22.51 seconds.


 14%|█▍        | 85/598 [18:03<2:00:08, 14.05s/it]

Processed record 85/598 in 14.00 seconds.


 14%|█▍        | 86/598 [18:15<1:53:37, 13.32s/it]

Processed record 86/598 in 11.60 seconds.


 15%|█▍        | 87/598 [18:26<1:46:53, 12.55s/it]

Processed record 87/598 in 10.77 seconds.


 15%|█▍        | 88/598 [18:44<2:00:24, 14.17s/it]

Processed record 88/598 in 17.93 seconds.


 15%|█▍        | 89/598 [18:50<1:40:40, 11.87s/it]

Processed record 89/598 in 6.50 seconds.


 15%|█▌        | 90/598 [18:59<1:32:38, 10.94s/it]

Processed record 90/598 in 8.78 seconds.


 15%|█▌        | 91/598 [19:17<1:50:51, 13.12s/it]

Processed record 91/598 in 18.20 seconds.


 15%|█▌        | 92/598 [19:31<1:51:57, 13.28s/it]

Processed record 92/598 in 13.64 seconds.


 16%|█▌        | 93/598 [19:40<1:41:56, 12.11s/it]

Processed record 93/598 in 9.40 seconds.


 16%|█▌        | 94/598 [19:51<1:37:33, 11.61s/it]

Processed record 94/598 in 10.45 seconds.


 16%|█▌        | 95/598 [20:00<1:31:09, 10.87s/it]

Processed record 95/598 in 9.14 seconds.


 16%|█▌        | 96/598 [20:13<1:37:12, 11.62s/it]

Processed record 96/598 in 13.35 seconds.


 16%|█▌        | 97/598 [20:30<1:49:33, 13.12s/it]

Processed record 97/598 in 16.63 seconds.


 16%|█▋        | 98/598 [20:41<1:45:02, 12.61s/it]

Processed record 98/598 in 11.40 seconds.


 17%|█▋        | 99/598 [21:17<2:41:46, 19.45s/it]

Processed record 99/598 in 35.42 seconds.


 17%|█▋        | 100/598 [21:43<2:59:19, 21.60s/it]

Processed record 100/598 in 26.60 seconds.

--- Saved batch up to record 100
Average time for last 100 records: 13.04 seconds



 17%|█▋        | 101/598 [22:18<3:32:13, 25.62s/it]

Processed record 101/598 in 34.99 seconds.


 17%|█▋        | 102/598 [22:51<3:49:22, 27.75s/it]

Processed record 102/598 in 32.71 seconds.


 17%|█▋        | 103/598 [23:18<3:47:04, 27.53s/it]

Processed record 103/598 in 27.01 seconds.


 17%|█▋        | 104/598 [23:43<3:40:13, 26.75s/it]

Processed record 104/598 in 24.94 seconds.


 18%|█▊        | 105/598 [24:14<3:50:22, 28.04s/it]

Processed record 105/598 in 31.04 seconds.


 18%|█▊        | 106/598 [24:41<3:47:41, 27.77s/it]

Processed record 106/598 in 27.14 seconds.


 18%|█▊        | 107/598 [25:12<3:54:35, 28.67s/it]

Processed record 107/598 in 30.77 seconds.


 18%|█▊        | 108/598 [25:37<3:45:46, 27.65s/it]

Processed record 108/598 in 25.26 seconds.


 18%|█▊        | 109/598 [26:03<3:40:11, 27.02s/it]

Processed record 109/598 in 25.55 seconds.


 18%|█▊        | 110/598 [26:30<3:40:29, 27.11s/it]

Processed record 110/598 in 27.32 seconds.


 19%|█▊        | 111/598 [27:04<3:56:50, 29.18s/it]

Processed record 111/598 in 34.01 seconds.


 19%|█▊        | 112/598 [27:31<3:50:53, 28.51s/it]

Processed record 112/598 in 26.93 seconds.


 19%|█▉        | 113/598 [28:09<4:13:19, 31.34s/it]

Processed record 113/598 in 37.95 seconds.


 19%|█▉        | 114/598 [28:36<4:02:31, 30.07s/it]

Processed record 114/598 in 27.09 seconds.


 19%|█▉        | 115/598 [29:05<3:59:08, 29.71s/it]

Processed record 115/598 in 28.87 seconds.


 19%|█▉        | 116/598 [29:34<3:56:53, 29.49s/it]

Processed record 116/598 in 28.98 seconds.


 20%|█▉        | 117/598 [30:09<4:10:03, 31.19s/it]

Processed record 117/598 in 35.16 seconds.


 20%|█▉        | 117/598 [30:29<2:05:22, 15.64s/it]


KeyboardInterrupt: 