In [15]:
import ollama


In [16]:
# --- Classification Function ---
def classify_urdu_question(urdu_question):
    prompt = f"""You are given a question in Urdu. Your task is to classify it as either SINGLEHOP or MULTIHOP.

Definitions:
- A question is **SINGLEHOP** if it can be answered using a single fact, sentence, or document. The question may be long, but if it doesn't require combining or reasoning over multiple distinct pieces of information, it's SINGLEHOP.
- A question is **MULTIHOP** if answering it requires combining multiple facts, reasoning over several steps, or connecting pieces of information from different sources.

⚠️ Important:
- A question's length does not determine its type. A long, descriptive question can still be SINGLEHOP.
- MULTIHOP questions typically require you to first find one piece of information, then use that to find the next.

Examples:

🔹 Example 1 (SINGLEHOP - short and factual):
سوال: "پاکستان کے وزیر اعظم کا نام کیا ہے؟"
جواب: SINGLEHOP

🔹 Example 2 (SINGLEHOP - long, descriptive):
سوال: "میں بلیک ہیٹ پہنتا ہوں: بدمعاشوں کے ساتھ جدوجہد (حقیقی اور خیالی) ایک کتاب ہے جو چاک کلوسٹر مین نے لکھی ہے ، جو پہلی بار اسکربر نے شائع کی تھی۔"
جواب: SINGLEHOP

🔹 Example 3 (SINGLEHOP - event description):
سوال: "یہ فلم سنہ 2010 میں ریلیز ہوئی تھی اور اس میں 16 قتلوں کی کہانی بیان کی گئی ہے جو سنہ 1828 میں سکاٹ لینڈ کے شہر ایڈنبرا میں ہوئے تھے۔"
جواب: SINGLEHOP

🔹 Example 4 (SINGLEHOP - company and product):
سوال: "والگریریا فرانزی ایک اطالوی چمڑے کے بیگ اور سامان کی کمپنی تھی جس کی بنیاد 1864 میں روکو فرانزی نے رکھی تھی۔ گچی نے اس کمپنی کے لئے کام کیا جب تک کہ اس نے گچی کو قائم نہیں کیا۔"
جواب: SINGLEHOP

🔸 Example 5 (MULTIHOP - indirect question):
سوال: "جارج واشنگٹن کی پیدائش کے وقت امریکہ کا صدر کون تھا؟"
جواب: MULTIHOP

🔸 Example 6 (MULTIHOP - location relation):
سوال: "نلسن منڈیلا کو کس جیل میں رکھا گیا اور وہ جیل کس جزیرے پر تھی؟"
جواب: MULTIHOP

🔸 Example 7 (MULTIHOP - historical causality):
سوال: "انگریزوں کے خلاف 1857 کی جنگ کے بعد ہندوستان میں کونسا سیاسی نظام قائم ہوا؟"
جواب: MULTIHOP

🔸 Example 8 (MULTIHOP - lengthy, layered facts):
سوال: "ایپل کمپنی کے شریک بانی اسٹیو جابز نے کس یونیورسٹی سے تعلیم حاصل کی، اور اس یونیورسٹی کے سب سے معروف گریجویٹس میں سے ایک کون ہے؟"
جواب: MULTIHOP

🔸 Example 9 (MULTIHOP - lengthy with connections):
سوال: "شیکسپیئر کے وہ ڈرامے جو بادشاہوں پر مبنی ہیں، ان میں سے ایک کے مرکزی کردار کی موت کس جنگ میں ہوئی تھی؟"
جواب: MULTIHOP

---

Now read the following Urdu question and classify it:

سوال: "{urdu_question}"
جواب:"""
    try:
        response = ollama.chat(
            model='llama3:8b',
            messages=[{"role": "user", "content": prompt}]
        )
        reply = response['message']['content'].strip().lower()

        # Normalize output
        if 'multihop' in reply:
            return 'multihop'
        elif 'singlehop' in reply or 'simple' in reply:
            return 'singlehop'

        print(f"⚠️ Unexpected response: {reply}")
        return "unknown"

    except Exception as e:
        print(f"❌ Error processing question: {urdu_question}\n↪ {e}")
        return "error"


In [17]:
def classify_urdu_question_gemma(urdu_question):
    prompt = f"""You are given a question in Urdu. Classify it as SINGLEHOP or MULTIHOP.

Definitions:
- SINGLEHOP: Can be answered using one fact, sentence, or document.
- MULTIHOP: Requires combining multiple facts or reasoning over multiple steps.

Examples:

سوال: "پاکستان کے وزیر اعظم کا نام کیا ہے؟"
جواب: SINGLEHOP

سوال: "جارج واشنگٹن کی پیدائش کے وقت امریکہ کا صدر کون تھا؟"
جواب: MULTIHOP

سوال: "ایپل کمپنی کے شریک بانی نے کہاں تعلیم حاصل کی، اور اس یونیورسٹی کا مشہور فارغ التحصیل کون ہے؟"
جواب: MULTIHOP

سوال: "یہ فلم 2010 میں ریلیز ہوئی اور سکاٹ لینڈ میں 1828 کے قتلوں پر مبنی ہے۔"
جواب: SINGLEHOP

---

Now classify the following:

سوال: "{urdu_question}"
جواب:"""


    try:
        response = ollama.chat(
            model='gemma3:4b',
            messages=[{"role": "user", "content": prompt}]
        )
        reply = response['message']['content'].strip().lower()

        if 'multihop' in reply:
            return 'multihop'
        elif 'singlehop' in reply or 'simple' in reply:
            return 'singlehop'

        print(f"⚠️ Unexpected response: {reply}")
        return "unknown"

    except Exception as e:
        print(f"❌ Error processing question: {urdu_question}\n↪ {e}")
        return "error"


In [18]:
def decompose_urdu_query(urdu_query: str) -> dict:
    """Returns dictionary with q1 and q2 keys containing sub-questions"""
    refined_prompt = f"""
**Role**: You are an expert Urdu linguistic analyst specializing in question decomposition. Your task is to break down complex Urdu questions into their fundamental components.

**Task Instructions**:
1. Carefully analyze the given Urdu question to identify its core components
2. Extract exactly 2 sub-questions that:
   - Are necessary to answer the main question
   - Cover distinct aspects of the problem
   - Have clear logical progression (answer to q1 helps answer q2)
3. Both sub-questions must:
   - Be in proper Urdu language
   - Be grammatically correct
   - Be clear and concise
   - Use relevant domain terminology

**Output Format Requirements**:
- Use EXACTLY this format:
  q1: [پہلا ذیلی سوال]
  q2: [دوسرا ذیلی سوال]
- Each sub-question must be on a new line
- Do not include any additional commentary or explanation
- Do not number the questions (use only q1:/q2: prefixes)

**Example 1**:
Input: اگر لاہور میں فضائی آلودگی کی سطح دہلی سے زیادہ ہے اور فضائی آلودگی پھیپھڑوں کے کینسر کا سبب بن سکتی ہے، تو لاہور کے رہائشیوں کو کس قسم کے طبی چیک اپ کروانے چاہئیں؟
Output:
q1: لاہور اور دہلی میں فضائی آلودگی کی سطح کا موازنہ کیا ہے؟
q2: فضائی آلودگی پھیپھڑوں کے کینسر کا سبب کیسے بنتی ہے؟

**Example 2**:
Input: اگر کراچی میں بجلی کے نرخ 30% بڑھ گئے ہیں اور یہ صنعتوں کو متاثر کر رہا ہے، تو حکومت کو کون سی سبسڈیاں دینی چاہئیں؟
Output:
q1: کراچی میں بجلی کے نرخوں میں اضافے کی موجودہ شرح کیا ہے؟
q2: بجلی کے مہنگے ہونے سے صنعتوں پر کس قسم کے اثرات مرتب ہو رہے ہیں؟

**Current Task**:
Input: {urdu_query}
Output:
"""
    
    try:
        response = ollama.generate(
            model='llama3:8b',
            prompt=refined_prompt,
            options={
                'temperature': 0.5,
                'num_ctx': 2048
            }
        )
        
        output = response['response'].strip()
        
        result = {}
        for line in output.split('\n'):
            line = line.strip()
            if line.startswith('q1:'):
                result['q1'] = line[3:].strip()
            elif line.startswith('q2:'):
                result['q2'] = line[3:].strip()
        
        return result if len(result) == 2 else {}
    
    except Exception as e:
        print(f"Decomposition error: {str(e)}")
        return {}


In [19]:
def decompose_urdu_query_gemma(urdu_query: str) -> dict:
    """Decomposes an Urdu query into two sub-questions using Gemma model."""
    prompt = f"""
آپ ایک ماہر اردو زبان کے تجزیہ کار ہیں جو پیچیدہ سوالات کو آسان سب سوالات میں تقسیم کرتا ہے۔

ہدایت:
- دیے گئے سوال کو غور سے پڑھیں۔
- بالکل دو سب سوال نکالیں جو سوال کے جواب کے لیے ضروری ہوں۔
- سب سوال واضح، مختصر، اور درست اردو میں ہوں۔
- ہر سب سوال نئی لائن پر لکھیں اور یوں شروع ہوں:
  q1: [پہلا سب سوال]
  q2: [دوسرا سب سوال]
- کوئی اضافی تبصرہ نہ کریں۔

مثال:

Input: اگر لاہور میں فضائی آلودگی دہلی سے زیادہ ہے تو لاہور کے رہائشیوں کو کون سے طبی معائنے کروانے چاہئیں؟
Output:
q1: لاہور اور دہلی میں فضائی آلودگی کا موازنہ کیا ہے؟
q2: فضائی آلودگی کس طرح طبی مسائل پیدا کرتی ہے؟

Input: {urdu_query}
Output:
"""

    try:
        response = ollama.generate(
            model='gemma3:4b',
            prompt=prompt,
            options={
                'temperature': 0.5,
                'num_ctx': 2048
            }
        )
        output = response['response'].strip()
        
        result = {}
        for line in output.split('\n'):
            line = line.strip()
            if line.startswith('q1:'):
                result['q1'] = line[3:].strip()
            elif line.startswith('q2:'):
                result['q2'] = line[3:].strip()
        
        return result if len(result) == 2 else {}

    except Exception as e:
        print(f"Decomposition error: {str(e)}")
        return {}


In [20]:
def query_context_relevance_check(query_urdu: str, context_urdu: str) -> bool:
    prompt = f"""
You are a binary classifier.

Your task is to decide whether the following Urdu *context* is relevant to the Urdu *question*. You must answer ONLY with **True** or **False** — no explanation, no commentary, just one word: True or False.

Criteria:
- If the context helps answer the question directly or indirectly, reply: True
- If the context is unrelated, confusing, or insufficient, reply: False

IMPORTANT:
- Do NOT explain your answer.
- Do NOT include any additional comments.
- Just respond with: True or False

---

Question (Urdu): {query_urdu}

Context (Urdu): {context_urdu}

Answer (True/False):
"""

    try:
        response = ollama.chat(
            model="llama3:8b",
            messages=[{"role": "user", "content": prompt}]
        )
        answer = response['message']['content'].strip().lower()
        return answer == 'true'
    except Exception as e:
        print(f"Error during relevance check: {e}")
        return False


In [21]:
def query_context_relevance_check_gemma(query_urdu: str, context_urdu: str) -> bool:
    prompt = f"""
Decide if the following Urdu *context* is relevant to the Urdu *question*. 

If it helps answer the question directly or indirectly, reply: True  
If it’s unrelated, unclear, or insufficient, reply: False  

Important:  
- Reply only with one word: True or False  
- No explanation or extra text

Question: {query_urdu}  
Context: {context_urdu}  

Answer:
"""
    try:
        response = ollama.chat(
            model="gemma3:4b",
            messages=[{"role": "user", "content": prompt}]
        )
        answer = response['message']['content'].strip().lower()
        return answer == 'true'
    except Exception as e:
        print(f"Error during relevance check (Gemma): {e}")
        return False


In [22]:
from sentence_transformers import SentenceTransformer
import pickle
import faiss
import os
import torch


def load_retriever(
    index_path: str,
    chunks_path: str
):
    # Initialize device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Load SentenceTransformer model (E5-large)
    model = SentenceTransformer("intfloat/e5-large", device=device)
    
    # Configure for Urdu text
    model.max_seq_length = 512  # Set based on your earlier analysis
    model.tokenizer.do_lower_case = False  # Preserve Urdu characters
    
    # Load FAISS index
    index = faiss.read_index(index_path)
    
    # Load stored chunks
    with open(chunks_path, "rb") as f:
        chunks_list = pickle.load(f)
    
    return model, index, chunks_list, device


In [23]:
print(model.device)

cuda:0


In [9]:
model, index, chunks_list, device = load_retriever(
    index_path="../../vector_db/paragraphs/5884_paras/5884_paras_faiss_index.index",
    chunks_path="../../data_storage/Paragraph_chunks/5884_paragraphs/5884_chunks.pkl"
)

In [10]:
def retrieve_documents(query,k=3):
   
    query_embedding = model.encode(
        [query],
        convert_to_tensor=False,  # Return numpy array for FAISS
        normalize_embeddings=True,
        show_progress_bar=False
    )
    query_embedding = model.encode(
        [query],
        convert_to_tensor=False,
        normalize_embeddings=True,
        show_progress_bar=False
    )
    
    # Search FAISS index
    _, indices = index.search(query_embedding, k)  # Dummy variable _ for distances
    
    # Return only the chunks
    return [chunks_list[i] for i in indices[0]]


In [11]:
def get_context_of_multihop_without_parallel(query,model=model,index=index,chunks_list=chunks_list,k=3):


    classification = classify_urdu_question_gemma(query)


    if classification == "singlehop":
        retrieved_context = retrieve_documents(query,k)
        return retrieved_context
        

    if classification == "multihop":
        decomposition = decompose_urdu_query(query)
        q1 = decomposition.get("q1", "")
        q2 = decomposition.get("q2", "")

        main_context = retrieve_documents(q1, k)

        for i in range(min(len(main_context), k)):
            intermediate_ctx = main_context[i]
            
            combined_query = q1 + intermediate_ctx + q2
            
            second_hop_contexts = retrieve_documents(combined_query, k)
            
            for ctx in second_hop_contexts:
                if query_context_relevance_check(query, ctx):
                    main_context.append(ctx)
        
        return main_context    
 


In [12]:

import time
from concurrent.futures import ThreadPoolExecutor, as_completed

def expand_multihop_context(intermediate_ctx, query, q1, q2, k):
    try:
        combined_query = q1 + intermediate_ctx + q2
        second_hop_contexts = retrieve_documents(combined_query, k)

        relevant_contexts = []

        with ThreadPoolExecutor() as inner_executor:
            futures = [
                inner_executor.submit(query_context_relevance_check, query, ctx)
                for ctx in second_hop_contexts
            ]

            for i, future in enumerate(as_completed(futures)):
                try:
                    if future.result():
                        relevant_contexts.append(second_hop_contexts[i])
                except Exception as e:
                    print("Error during relevance check:", e)

        return relevant_contexts

    except Exception as e:
        print("Error in expand_multihop_context:", e)
        return []


def get_context_of_multihop(query, type, model=model, index=index, chunks_list=chunks_list, k=3):
    # Measure classification time
    start_classification = time.time()
    classification = classify_urdu_question(query)
    classification_time = time.time() - start_classification

    if type == "easy":
        decomposition_time = 0.0
        start_retrieval = time.time()
        context = retrieve_documents(query, k)
        retrieval_time = time.time() - start_retrieval
        return context, classification, classification_time, decomposition_time, retrieval_time

    else:
        start_decomposition = time.time()
        decomposition = decompose_urdu_query(query)
        q1 = decomposition.get("q1", "")
        q2 = decomposition.get("q2", "")
        decomposition_time = time.time() - start_decomposition

        start_retrieval = time.time()
        main_context = retrieve_documents(q1, k)
        additional_contexts = []

        with ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(expand_multihop_context, ctx, query, q1, q2, k)
                for ctx in main_context[:k]
            ]

            for future in as_completed(futures):
                result = future.result()
                additional_contexts.extend(result)

        main_context.extend(additional_contexts)
        retrieval_time = time.time() - start_retrieval

        return main_context, classification, classification_time, decomposition_time, retrieval_time


In [13]:
import ollama

def generate_using_llama3(context, query):
    prompt = f"""You are a helpful assistant. You will be given a context and a question, both in Urdu.
Your task is to answer the question using the context only. 
Your answer should be **clear, concise, and entirely in Urdu**.
Context:
{context}

Question:
{query}

Answer in Urdu:"""

    try:
        response = ollama.chat(
            model='llama3:8b',
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        return response['message']['content'].strip()
    except Exception as e:
        print("Error during generation:", e)
        return "جواب پیدا کرنے میں خرابی ہوئی۔"

import ollama

def generate_using_alif(context, query, alif_model='hf.co/large-traversaal/Alif-1.0-8B-Instruct:f16'):


    prompt = f"""آپ کو ایک سوال اور اس سے متعلق ایک سیاق و سباق دیا گیا ہے۔ براہ کرم سیاق و سباق کا بغور مطالعہ کریں اور اسی کی بنیاد پر درست، مختصر اور جامع جواب دیں۔

### سوال:
{query}

### سیاق و سباق:
{context}

### جواب:
"""

    response = ollama.chat(
        model=alif_model,
        messages=[
            {"role": "user", "content": prompt}
        ],
        stream=False
    )

    return response['message']['content']

def prompt_llama_gemma_balanced_english(query, context):
    return f"""
You are an expert assistant. Please answer the following question in fluent and clear Urdu. If possible, avoid using English words. Do not use bullet points or lists. Write your response in concise paragraphs.

### Question:
{query}

### Context:
{context}

### Answer:
"""

def generate_using_gemma(query, context):
    model_name = "gemma3:4b"
    prompt = prompt_llama_gemma_balanced_english(query, context)

    try:
        response = ollama.chat(
            model=model_name,
            messages=[{"role": "user", "content": prompt}],
            stream=False
        )
        output = response['message']['content'].strip()
    except Exception as e:
        output = f"Error: {e}"

    return output



In [14]:
import time

def multihop_handling_LQR(query, type, model=model, index=index, chunks_list=chunks_list, k=3):
    # Step 1: Get context and timings
    context, classification, classification_time, decomposition_time, retrieval_time = get_context_of_multihop(
        query, type, model=model, index=index, chunks_list=chunks_list, k=k
    )

    # Flatten context if it's a list of strings
    if isinstance(context, list):
        combined_context = "\n".join(context)
    else:
        combined_context = context

    # Step 2: Generate answer and measure time
    start_gen = time.time()
    final_answer = generate_using_gemma(combined_context,query)
    generation_time = time.time() - start_gen

    # Step 3: Compute total time
    total_time = classification_time + decomposition_time + retrieval_time + generation_time

    return {
        "classification": classification,
        "retrieved_context": context,
        "final_answer": final_answer,
        "timings": {
            "classification_time": classification_time,
            "decomposition_time": decomposition_time,
            "retrieval_time": retrieval_time,
            "generation_time": generation_time,
            "total_time": total_time
        }
    }


In [None]:
import pandas as pd
from tqdm import tqdm
import time

# Load your source CSV
df = pd.read_csv("../../../Dataset_code_csvs/hotpotQA/hotpotQA_dataset_versions/5884paras_598queries/Urdu/598_QnAs_translated.csv")

# Output CSV path
output_path = "../../results/pipeline results/5884paras_598qna/LQR_processed_results.csv"

# Initialize variables
results = []
batch_times = []
total_start = time.time()

# Loop over each question in the DataFrame
for idx, row in tqdm(df.iterrows(), total=len(df)):
    query = row["translated_question"]
    answer = row["translated_answer"]
    question_type = row["level"]

    start_time = time.time()

    try:
        # Run the pipeline
        result = multihop_handling_LQR(query, question_type)

        classification = result["classification"]
        retrieved_context = result["retrieved_context"]
        final_answer = result["final_answer"]
        timings = result["timings"]
        total_time_one = timings["total_time"]

    except Exception as e:
        print(f"Error processing query {idx}: {e}")
        classification = "Error"
        retrieved_context = "Error"
        final_answer = "Error"
        timings = {
            "classification_time": 0,
            "decomposition_time": 0,
            "retrieval_time": 0,
            "generation_time": 0,
            "total_time": 0
        }
        total_time_one = 0

    elapsed = time.time() - start_time
    batch_times.append(elapsed)

    results.append({
        "translated_question": query,
        "translated_answer": answer,
        "classification": classification,
        "retrieved_context": retrieved_context,
        "final_answer": final_answer,
        "classification_time": timings["classification_time"],
        "decomposition_time": timings["decomposition_time"],
        "retrieval_time": timings["retrieval_time"],
        "generation_time": timings["generation_time"],
        "total_time": timings["total_time"],
        "level": question_type
    })

    print(f"Processed record {idx+1}/{len(df)} in {elapsed:.2f} seconds.")

    # Save and report every 100 queries
    if (idx + 1) % 100 == 0:
        pd.DataFrame(results).to_csv(output_path, mode='a', header=not bool(idx), index=False, encoding="utf-8-sig")
        avg_batch_time = sum(batch_times) / len(batch_times)
        print(f"\n--- Saved batch up to record {idx+1}")
        print(f"Average time for last 100 records: {avg_batch_time:.2f} seconds\n")
        results = []
        batch_times = []

# Save any remaining results at the end
if results:
    pd.DataFrame(results).to_csv(output_path, mode='a', header=not bool(len(df) % 100), index=False, encoding="utf-8-sig")

# Final stats
total_elapsed = time.time() - total_start
avg_total_time = total_elapsed / len(df)
print(f"\n✅ All records processed.")
print(f"Total processing time: {total_elapsed:.2f} seconds.")
print(f"Average time per record: {avg_total_time:.2f} seconds.")


  0%|          | 1/598 [01:23<13:49:48, 83.40s/it]

Processed record 1/598 in 83.40 seconds.


  0%|          | 2/598 [02:18<11:05:48, 67.03s/it]

Processed record 2/598 in 55.57 seconds.


  1%|          | 3/598 [03:26<11:08:09, 67.38s/it]

Processed record 3/598 in 67.79 seconds.


  1%|          | 4/598 [04:17<10:03:33, 60.96s/it]

Processed record 4/598 in 51.13 seconds.


  1%|          | 5/598 [05:15<9:50:34, 59.75s/it] 

Processed record 5/598 in 57.61 seconds.


  1%|          | 6/598 [06:22<10:15:06, 62.34s/it]

Processed record 6/598 in 67.37 seconds.


  1%|          | 7/598 [07:38<10:55:23, 66.54s/it]

Processed record 7/598 in 75.17 seconds.


  1%|▏         | 8/598 [08:54<11:24:56, 69.66s/it]

Processed record 8/598 in 76.33 seconds.


  2%|▏         | 9/598 [10:04<11:26:24, 69.92s/it]

Processed record 9/598 in 70.51 seconds.


  2%|▏         | 10/598 [11:09<11:09:09, 68.28s/it]

Processed record 10/598 in 64.61 seconds.


  2%|▏         | 11/598 [12:06<10:34:51, 64.89s/it]

Processed record 11/598 in 57.21 seconds.


  2%|▏         | 12/598 [13:05<10:15:45, 63.05s/it]

Processed record 12/598 in 58.83 seconds.


  2%|▏         | 13/598 [13:46<9:10:36, 56.47s/it] 

Processed record 13/598 in 41.34 seconds.


  2%|▏         | 14/598 [14:46<9:18:45, 57.41s/it]

Processed record 14/598 in 59.56 seconds.


  3%|▎         | 15/598 [15:43<9:17:48, 57.41s/it]

Processed record 15/598 in 57.41 seconds.


  3%|▎         | 16/598 [17:04<10:25:03, 64.44s/it]

Processed record 16/598 in 80.77 seconds.


  3%|▎         | 17/598 [18:02<10:05:50, 62.57s/it]

Processed record 17/598 in 58.21 seconds.


  3%|▎         | 18/598 [18:52<9:27:25, 58.70s/it] 

Processed record 18/598 in 49.70 seconds.


  3%|▎         | 19/598 [20:13<10:30:30, 65.34s/it]

Processed record 19/598 in 80.80 seconds.


  3%|▎         | 20/598 [21:25<10:48:56, 67.36s/it]

Processed record 20/598 in 72.09 seconds.


  4%|▎         | 21/598 [22:37<11:00:16, 68.66s/it]

Processed record 21/598 in 71.68 seconds.


  4%|▎         | 22/598 [23:43<10:52:21, 67.95s/it]

Processed record 22/598 in 66.31 seconds.


  4%|▍         | 23/598 [24:41<10:22:25, 64.95s/it]

Processed record 23/598 in 57.94 seconds.


  4%|▍         | 24/598 [25:34<9:46:22, 61.29s/it] 

Processed record 24/598 in 52.77 seconds.


  4%|▍         | 25/598 [26:40<10:00:40, 62.90s/it]

Processed record 25/598 in 66.64 seconds.


  4%|▍         | 26/598 [27:35<9:37:24, 60.57s/it] 

Processed record 26/598 in 55.13 seconds.


  5%|▍         | 27/598 [28:38<9:41:43, 61.13s/it]

Processed record 27/598 in 62.43 seconds.


  5%|▍         | 28/598 [30:13<11:17:57, 71.36s/it]

Processed record 28/598 in 95.25 seconds.


  5%|▍         | 29/598 [31:13<10:44:29, 67.96s/it]

Processed record 29/598 in 60.02 seconds.


  5%|▌         | 30/598 [32:09<10:10:15, 64.46s/it]

Processed record 30/598 in 56.30 seconds.


  5%|▌         | 31/598 [33:02<9:35:39, 60.92s/it] 

Processed record 31/598 in 52.64 seconds.


  5%|▌         | 32/598 [33:52<9:03:33, 57.62s/it]

Processed record 32/598 in 49.93 seconds.


  6%|▌         | 33/598 [35:05<9:46:52, 62.32s/it]

Processed record 33/598 in 73.30 seconds.


  6%|▌         | 34/598 [35:54<9:07:55, 58.29s/it]

Processed record 34/598 in 48.88 seconds.


  6%|▌         | 35/598 [36:46<8:47:50, 56.25s/it]

Processed record 35/598 in 51.50 seconds.


  6%|▌         | 36/598 [37:53<9:17:01, 59.47s/it]

Processed record 36/598 in 66.97 seconds.


  6%|▌         | 37/598 [38:37<8:34:09, 54.99s/it]

Processed record 37/598 in 44.54 seconds.


  6%|▋         | 38/598 [39:27<8:18:06, 53.37s/it]

Processed record 38/598 in 49.59 seconds.


  7%|▋         | 39/598 [40:26<8:32:20, 54.99s/it]

Processed record 39/598 in 58.78 seconds.


  7%|▋         | 40/598 [41:23<8:37:41, 55.67s/it]

Processed record 40/598 in 57.24 seconds.


  7%|▋         | 41/598 [42:35<9:24:00, 60.76s/it]

Processed record 41/598 in 72.63 seconds.


  7%|▋         | 42/598 [43:23<8:47:08, 56.89s/it]

Processed record 42/598 in 47.86 seconds.


  7%|▋         | 43/598 [44:30<9:14:02, 59.90s/it]

Processed record 43/598 in 66.92 seconds.


  7%|▋         | 44/598 [45:24<8:55:26, 57.99s/it]

Processed record 44/598 in 53.54 seconds.


  8%|▊         | 45/598 [46:22<8:56:38, 58.22s/it]

Processed record 45/598 in 58.77 seconds.


  8%|▊         | 46/598 [47:33<9:29:29, 61.90s/it]

Processed record 46/598 in 70.48 seconds.


  8%|▊         | 47/598 [48:41<9:45:27, 63.75s/it]

Processed record 47/598 in 68.07 seconds.


  8%|▊         | 48/598 [50:08<10:47:53, 70.68s/it]

Processed record 48/598 in 86.85 seconds.


  8%|▊         | 49/598 [50:51<9:30:36, 62.36s/it] 

Processed record 49/598 in 42.95 seconds.


  8%|▊         | 50/598 [51:46<9:09:21, 60.15s/it]

Processed record 50/598 in 54.98 seconds.


  9%|▊         | 51/598 [52:47<9:09:55, 60.32s/it]

Processed record 51/598 in 60.73 seconds.


  9%|▊         | 52/598 [53:59<9:41:15, 63.87s/it]

Processed record 52/598 in 72.16 seconds.


  9%|▉         | 53/598 [55:01<9:36:48, 63.50s/it]

Processed record 53/598 in 62.63 seconds.


  9%|▉         | 54/598 [56:09<9:46:47, 64.72s/it]

Processed record 54/598 in 67.56 seconds.


  9%|▉         | 55/598 [57:10<9:35:52, 63.63s/it]

Processed record 55/598 in 61.10 seconds.


  9%|▉         | 56/598 [58:14<9:35:27, 63.70s/it]

Processed record 56/598 in 63.87 seconds.


 10%|▉         | 57/598 [59:06<9:02:48, 60.20s/it]

Processed record 57/598 in 52.02 seconds.


 10%|▉         | 58/598 [1:00:20<9:40:09, 64.46s/it]

Processed record 58/598 in 74.41 seconds.


 10%|▉         | 59/598 [1:01:09<8:56:49, 59.76s/it]

Processed record 59/598 in 48.78 seconds.


 10%|█         | 60/598 [1:02:18<9:19:46, 62.43s/it]

Processed record 60/598 in 68.66 seconds.


 10%|█         | 61/598 [1:03:37<10:03:46, 67.46s/it]

Processed record 61/598 in 79.20 seconds.


 10%|█         | 62/598 [1:04:26<9:12:38, 61.86s/it] 

Processed record 62/598 in 48.80 seconds.


 11%|█         | 63/598 [1:05:46<10:00:59, 67.40s/it]

Processed record 63/598 in 80.32 seconds.


 11%|█         | 64/598 [1:06:43<9:33:10, 64.40s/it] 

Processed record 64/598 in 57.40 seconds.


 11%|█         | 65/598 [1:08:01<10:07:44, 68.41s/it]

Processed record 65/598 in 77.78 seconds.


 11%|█         | 66/598 [1:09:02<9:47:13, 66.23s/it] 

Processed record 66/598 in 61.13 seconds.


 11%|█         | 67/598 [1:09:53<9:03:51, 61.45s/it]

Processed record 67/598 in 50.31 seconds.


 11%|█▏        | 68/598 [1:10:42<8:29:44, 57.71s/it]

Processed record 68/598 in 48.97 seconds.


 12%|█▏        | 69/598 [1:11:26<7:54:44, 53.85s/it]

Processed record 69/598 in 44.83 seconds.


 12%|█▏        | 70/598 [1:12:43<8:55:01, 60.80s/it]

Processed record 70/598 in 77.02 seconds.


 12%|█▏        | 71/598 [1:13:54<9:18:57, 63.64s/it]

Processed record 71/598 in 70.27 seconds.


 12%|█▏        | 72/598 [1:14:54<9:09:54, 62.73s/it]

Processed record 72/598 in 60.60 seconds.


 12%|█▏        | 73/598 [1:15:42<8:29:32, 58.23s/it]

Processed record 73/598 in 47.75 seconds.


 12%|█▏        | 74/598 [1:16:59<9:16:44, 63.75s/it]

Processed record 74/598 in 76.62 seconds.


 13%|█▎        | 75/598 [1:17:45<8:29:36, 58.46s/it]

Processed record 75/598 in 46.13 seconds.


 13%|█▎        | 76/598 [1:19:25<10:16:15, 70.83s/it]

Processed record 76/598 in 99.70 seconds.


 13%|█▎        | 77/598 [1:20:08<9:04:27, 62.70s/it] 

Processed record 77/598 in 43.73 seconds.


 13%|█▎        | 78/598 [1:21:10<9:00:00, 62.31s/it]

Processed record 78/598 in 61.39 seconds.


 13%|█▎        | 79/598 [1:21:54<8:11:58, 56.88s/it]

Processed record 79/598 in 44.20 seconds.


 13%|█▎        | 80/598 [1:22:57<8:26:35, 58.68s/it]

Processed record 80/598 in 62.89 seconds.


 14%|█▎        | 81/598 [1:23:55<8:23:24, 58.42s/it]

Processed record 81/598 in 57.82 seconds.


 14%|█▎        | 82/598 [1:24:40<7:48:41, 54.50s/it]

Processed record 82/598 in 45.34 seconds.


 14%|█▍        | 83/598 [1:25:45<8:13:52, 57.54s/it]

Processed record 83/598 in 64.64 seconds.


 14%|█▍        | 84/598 [1:26:44<8:17:33, 58.08s/it]

Processed record 84/598 in 59.34 seconds.


 14%|█▍        | 85/598 [1:27:42<8:15:37, 57.97s/it]

Processed record 85/598 in 57.70 seconds.


 14%|█▍        | 86/598 [1:28:45<8:29:20, 59.69s/it]

Processed record 86/598 in 63.70 seconds.


 15%|█▍        | 87/598 [1:29:44<8:25:31, 59.36s/it]

Processed record 87/598 in 58.58 seconds.


 15%|█▍        | 88/598 [1:31:02<9:12:22, 64.98s/it]

Processed record 88/598 in 78.12 seconds.


 15%|█▍        | 89/598 [1:32:08<9:15:06, 65.43s/it]

Processed record 89/598 in 66.48 seconds.


 15%|█▌        | 90/598 [1:32:51<8:16:52, 58.69s/it]

Processed record 90/598 in 42.94 seconds.


 15%|█▌        | 91/598 [1:34:00<8:39:57, 61.53s/it]

Processed record 91/598 in 68.18 seconds.


 15%|█▌        | 92/598 [1:35:21<9:29:09, 67.49s/it]

Processed record 92/598 in 81.39 seconds.


 16%|█▌        | 93/598 [1:36:12<8:46:24, 62.54s/it]

Processed record 93/598 in 51.01 seconds.


 16%|█▌        | 94/598 [1:37:20<8:59:18, 64.20s/it]

Processed record 94/598 in 68.07 seconds.


 16%|█▌        | 95/598 [1:38:21<8:50:39, 63.30s/it]

Processed record 95/598 in 61.19 seconds.


 16%|█▌        | 96/598 [1:39:44<9:37:46, 69.06s/it]

Processed record 96/598 in 82.49 seconds.


 16%|█▌        | 97/598 [1:40:29<8:36:14, 61.82s/it]

Processed record 97/598 in 44.95 seconds.


 16%|█▋        | 98/598 [1:41:18<8:04:51, 58.18s/it]

Processed record 98/598 in 49.69 seconds.


 17%|█▋        | 99/598 [1:42:58<9:47:43, 70.67s/it]

Processed record 99/598 in 99.80 seconds.


 17%|█▋        | 100/598 [1:44:37<10:55:23, 78.96s/it]

Processed record 100/598 in 98.31 seconds.

--- Saved batch up to record 100
Average time for last 100 records: 62.77 seconds



 17%|█▋        | 101/598 [1:46:33<12:26:35, 90.13s/it]

Processed record 101/598 in 116.19 seconds.


 17%|█▋        | 102/598 [1:48:47<14:13:32, 103.25s/it]

Processed record 102/598 in 133.86 seconds.


 17%|█▋        | 103/598 [1:50:25<14:00:10, 101.84s/it]

Processed record 103/598 in 98.55 seconds.


 17%|█▋        | 104/598 [1:51:59<13:37:41, 99.32s/it] 

Processed record 104/598 in 93.43 seconds.


 18%|█▊        | 105/598 [1:53:23<12:58:20, 94.73s/it]

Processed record 105/598 in 84.02 seconds.


 18%|█▊        | 106/598 [1:54:57<12:54:52, 94.50s/it]

Processed record 106/598 in 93.96 seconds.


 18%|█▊        | 107/598 [1:56:19<12:24:43, 91.00s/it]

Processed record 107/598 in 82.85 seconds.


 18%|█▊        | 108/598 [1:57:36<11:48:38, 86.77s/it]

Processed record 108/598 in 76.90 seconds.


 18%|█▊        | 109/598 [1:59:13<12:12:45, 89.91s/it]

Processed record 109/598 in 97.23 seconds.


 18%|█▊        | 110/598 [2:01:06<13:05:14, 96.55s/it]

Processed record 110/598 in 112.03 seconds.


 19%|█▊        | 111/598 [2:03:03<13:55:18, 102.91s/it]

Processed record 111/598 in 117.77 seconds.


 19%|█▊        | 112/598 [2:04:32<13:18:34, 98.59s/it] 

Processed record 112/598 in 88.50 seconds.


 19%|█▉        | 113/598 [2:06:16<13:30:08, 100.22s/it]

Processed record 113/598 in 104.04 seconds.


 19%|█▉        | 114/598 [2:07:54<13:23:21, 99.59s/it] 

Processed record 114/598 in 98.11 seconds.


 19%|█▉        | 115/598 [2:09:35<13:26:03, 100.13s/it]

Processed record 115/598 in 101.40 seconds.


 19%|█▉        | 116/598 [2:11:17<13:28:22, 100.63s/it]

Processed record 116/598 in 101.79 seconds.


 20%|█▉        | 117/598 [2:12:54<13:17:37, 99.50s/it] 

Processed record 117/598 in 96.85 seconds.


 20%|█▉        | 118/598 [2:14:29<13:04:49, 98.10s/it]

Processed record 118/598 in 94.85 seconds.


 20%|█▉        | 119/598 [2:16:40<14:23:10, 108.12s/it]

Processed record 119/598 in 131.50 seconds.


 20%|██        | 120/598 [2:18:16<13:52:17, 104.47s/it]

Processed record 120/598 in 95.96 seconds.


 20%|██        | 121/598 [2:19:46<13:15:50, 100.11s/it]

Processed record 121/598 in 89.92 seconds.


 20%|██        | 122/598 [2:21:47<14:03:13, 106.29s/it]

Processed record 122/598 in 120.72 seconds.


 21%|██        | 123/598 [2:23:23<13:37:41, 103.29s/it]

Processed record 123/598 in 96.28 seconds.


 21%|██        | 124/598 [2:24:54<13:05:23, 99.42s/it] 

Processed record 124/598 in 90.39 seconds.


 21%|██        | 125/598 [2:26:31<12:59:05, 98.83s/it]

Processed record 125/598 in 97.45 seconds.


 21%|██        | 126/598 [2:27:57<12:28:00, 95.09s/it]

Processed record 126/598 in 86.36 seconds.


 21%|██        | 127/598 [2:29:33<12:27:52, 95.27s/it]

Processed record 127/598 in 95.70 seconds.


 21%|██▏       | 128/598 [2:31:06<12:20:47, 94.57s/it]

Processed record 128/598 in 92.94 seconds.


 22%|██▏       | 129/598 [2:32:51<12:44:21, 97.78s/it]

Processed record 129/598 in 105.29 seconds.


 22%|██▏       | 130/598 [2:34:38<13:03:19, 100.43s/it]

Processed record 130/598 in 106.59 seconds.


 22%|██▏       | 131/598 [2:36:16<12:56:34, 99.77s/it] 

Processed record 131/598 in 98.25 seconds.


 22%|██▏       | 132/598 [2:38:20<13:51:17, 107.03s/it]

Processed record 132/598 in 123.97 seconds.


 22%|██▏       | 133/598 [2:39:57<13:24:50, 103.85s/it]

Processed record 133/598 in 96.42 seconds.


 22%|██▏       | 134/598 [2:41:43<13:28:50, 104.59s/it]

Processed record 134/598 in 106.32 seconds.


 23%|██▎       | 135/598 [2:43:51<14:21:57, 111.70s/it]

Processed record 135/598 in 128.29 seconds.


 23%|██▎       | 136/598 [2:45:44<14:23:19, 112.12s/it]

Processed record 136/598 in 113.09 seconds.


 23%|██▎       | 137/598 [2:47:02<13:02:25, 101.83s/it]

Processed record 137/598 in 77.83 seconds.


 23%|██▎       | 138/598 [2:49:04<13:46:39, 107.83s/it]

Processed record 138/598 in 121.81 seconds.


 23%|██▎       | 139/598 [2:51:10<14:26:08, 113.22s/it]

Processed record 139/598 in 125.81 seconds.


 23%|██▎       | 140/598 [2:52:52<13:59:21, 109.96s/it]

Processed record 140/598 in 102.35 seconds.


 24%|██▎       | 141/598 [2:54:33<13:37:41, 107.36s/it]

Processed record 141/598 in 101.28 seconds.


 24%|██▎       | 142/598 [2:56:24<13:44:31, 108.49s/it]

Processed record 142/598 in 111.14 seconds.


 24%|██▍       | 143/598 [2:57:49<12:47:18, 101.18s/it]

Processed record 143/598 in 84.14 seconds.


 24%|██▍       | 144/598 [2:59:15<12:12:05, 96.75s/it] 

Processed record 144/598 in 86.41 seconds.


 24%|██▍       | 145/598 [3:00:56<12:20:17, 98.05s/it]

Processed record 145/598 in 101.08 seconds.


 24%|██▍       | 146/598 [3:02:26<12:00:23, 95.63s/it]

Processed record 146/598 in 89.97 seconds.


 25%|██▍       | 147/598 [3:04:12<12:22:01, 98.72s/it]

Processed record 147/598 in 105.93 seconds.


 25%|██▍       | 148/598 [3:05:56<12:32:07, 100.28s/it]

Processed record 148/598 in 103.94 seconds.


 25%|██▍       | 149/598 [3:07:43<12:46:23, 102.41s/it]

Processed record 149/598 in 107.38 seconds.


 25%|██▌       | 150/598 [3:09:11<12:12:16, 98.07s/it] 

Processed record 150/598 in 87.94 seconds.


 25%|██▌       | 151/598 [3:10:36<11:41:01, 94.10s/it]

Processed record 151/598 in 84.82 seconds.


 25%|██▌       | 152/598 [3:11:54<11:02:50, 89.17s/it]

Processed record 152/598 in 77.67 seconds.


 26%|██▌       | 153/598 [3:13:22<10:59:13, 88.88s/it]

Processed record 153/598 in 88.22 seconds.


 26%|██▌       | 154/598 [3:15:08<11:34:49, 93.89s/it]

Processed record 154/598 in 105.58 seconds.


 26%|██▌       | 155/598 [3:16:23<10:53:04, 88.45s/it]

Processed record 155/598 in 75.75 seconds.


 26%|██▌       | 156/598 [3:17:50<10:47:38, 87.91s/it]

Processed record 156/598 in 86.66 seconds.


 26%|██▋       | 157/598 [3:19:52<12:01:31, 98.17s/it]

Processed record 157/598 in 122.09 seconds.


 26%|██▋       | 158/598 [3:21:08<11:11:02, 91.50s/it]

Processed record 158/598 in 75.96 seconds.


 27%|██▋       | 159/598 [3:22:23<10:32:40, 86.47s/it]

Processed record 159/598 in 74.73 seconds.


 27%|██▋       | 160/598 [3:23:51<10:35:34, 87.07s/it]

Processed record 160/598 in 88.45 seconds.


 27%|██▋       | 161/598 [3:25:15<10:26:23, 86.00s/it]

Processed record 161/598 in 83.52 seconds.


 27%|██▋       | 162/598 [3:27:02<11:12:20, 92.52s/it]

Processed record 162/598 in 107.74 seconds.


 27%|██▋       | 163/598 [3:28:54<11:52:53, 98.33s/it]

Processed record 163/598 in 111.88 seconds.


 27%|██▋       | 164/598 [3:30:58<12:46:03, 105.91s/it]

Processed record 164/598 in 123.58 seconds.


 28%|██▊       | 165/598 [3:32:18<11:47:22, 98.02s/it] 

Processed record 165/598 in 79.62 seconds.


 28%|██▊       | 166/598 [3:34:25<12:48:15, 106.70s/it]

Processed record 166/598 in 126.96 seconds.


 28%|██▊       | 167/598 [3:36:00<12:22:25, 103.35s/it]

Processed record 167/598 in 95.54 seconds.


 28%|██▊       | 168/598 [3:37:32<11:56:20, 99.95s/it] 

Processed record 168/598 in 92.02 seconds.


 28%|██▊       | 169/598 [3:38:57<11:23:07, 95.54s/it]

Processed record 169/598 in 85.24 seconds.


 28%|██▊       | 170/598 [3:40:48<11:53:58, 100.09s/it]

Processed record 170/598 in 110.70 seconds.


 29%|██▊       | 171/598 [3:42:33<12:02:40, 101.55s/it]

Processed record 171/598 in 104.94 seconds.


 29%|██▉       | 172/598 [3:44:05<11:41:32, 98.81s/it] 

Processed record 172/598 in 92.42 seconds.


 29%|██▉       | 173/598 [3:45:48<11:48:15, 99.99s/it]

Processed record 173/598 in 102.74 seconds.


 29%|██▉       | 174/598 [3:47:21<11:32:25, 97.98s/it]

Processed record 174/598 in 93.31 seconds.


 29%|██▉       | 175/598 [3:48:57<11:26:22, 97.36s/it]

Processed record 175/598 in 95.89 seconds.


 29%|██▉       | 176/598 [3:50:56<12:09:23, 103.71s/it]

Processed record 176/598 in 118.52 seconds.


 30%|██▉       | 177/598 [3:52:34<11:56:42, 102.14s/it]

Processed record 177/598 in 98.50 seconds.


 30%|██▉       | 178/598 [3:54:17<11:55:02, 102.15s/it]

Processed record 178/598 in 102.16 seconds.


 30%|██▉       | 179/598 [3:55:59<11:53:24, 102.16s/it]

Processed record 179/598 in 102.18 seconds.


 30%|███       | 180/598 [3:58:07<12:45:26, 109.87s/it]

Processed record 180/598 in 127.87 seconds.


 30%|███       | 181/598 [4:00:00<12:50:11, 110.82s/it]

Processed record 181/598 in 113.03 seconds.


 30%|███       | 182/598 [4:01:47<12:41:55, 109.89s/it]

Processed record 182/598 in 107.73 seconds.


 31%|███       | 183/598 [4:03:47<13:00:22, 112.83s/it]

Processed record 183/598 in 119.67 seconds.


 31%|███       | 184/598 [4:05:47<13:12:22, 114.84s/it]

Processed record 184/598 in 119.53 seconds.


 31%|███       | 185/598 [4:07:18<12:21:15, 107.69s/it]

Processed record 185/598 in 91.01 seconds.


 31%|███       | 186/598 [4:09:00<12:08:30, 106.09s/it]

Processed record 186/598 in 102.37 seconds.


 31%|███▏      | 187/598 [4:10:37<11:47:48, 103.33s/it]

Processed record 187/598 in 96.88 seconds.


 31%|███▏      | 188/598 [4:12:07<11:18:43, 99.33s/it] 

Processed record 188/598 in 89.98 seconds.


 32%|███▏      | 189/598 [4:14:02<11:49:22, 104.06s/it]

Processed record 189/598 in 115.12 seconds.


 32%|███▏      | 190/598 [4:15:55<12:06:00, 106.77s/it]

Processed record 190/598 in 113.07 seconds.


 32%|███▏      | 191/598 [4:17:33<11:47:02, 104.23s/it]

Processed record 191/598 in 98.32 seconds.


 32%|███▏      | 192/598 [4:19:32<12:15:31, 108.70s/it]

Processed record 192/598 in 119.12 seconds.


 32%|███▏      | 193/598 [4:21:12<11:56:09, 106.10s/it]

Processed record 193/598 in 100.02 seconds.


 32%|███▏      | 194/598 [4:22:56<11:49:27, 105.37s/it]

Processed record 194/598 in 103.66 seconds.


 33%|███▎      | 195/598 [4:24:42<11:48:38, 105.50s/it]

Processed record 195/598 in 105.83 seconds.


 33%|███▎      | 196/598 [4:26:36<12:04:12, 108.09s/it]

Processed record 196/598 in 114.12 seconds.


 33%|███▎      | 197/598 [4:28:08<11:29:51, 103.22s/it]

Processed record 197/598 in 91.86 seconds.


 33%|███▎      | 198/598 [4:29:52<11:29:53, 103.48s/it]

Processed record 198/598 in 104.09 seconds.


 33%|███▎      | 199/598 [4:31:29<11:15:10, 101.53s/it]

Processed record 199/598 in 96.97 seconds.


 33%|███▎      | 200/598 [4:33:33<11:58:07, 108.26s/it]

Processed record 200/598 in 123.96 seconds.

--- Saved batch up to record 200
Average time for last 100 records: 101.36 seconds



 34%|███▎      | 201/598 [4:35:24<12:02:02, 109.13s/it]

Processed record 201/598 in 111.15 seconds.


 34%|███▍      | 202/598 [4:37:31<12:34:50, 114.37s/it]

Processed record 202/598 in 126.60 seconds.


 34%|███▍      | 203/598 [4:38:58<11:38:51, 106.15s/it]

Processed record 203/598 in 86.99 seconds.


 34%|███▍      | 204/598 [4:40:44<11:37:59, 106.29s/it]

Processed record 204/598 in 106.62 seconds.


 34%|███▍      | 205/598 [4:42:22<11:19:28, 103.74s/it]

Processed record 205/598 in 97.77 seconds.


 34%|███▍      | 206/598 [4:44:47<12:38:25, 116.09s/it]

Processed record 206/598 in 144.90 seconds.


 35%|███▍      | 207/598 [4:46:27<12:05:47, 111.37s/it]

Processed record 207/598 in 100.38 seconds.


 35%|███▍      | 208/598 [4:48:03<11:32:35, 106.55s/it]

Processed record 208/598 in 95.30 seconds.


 35%|███▍      | 209/598 [4:49:45<11:22:44, 105.31s/it]

Processed record 209/598 in 102.40 seconds.


 35%|███▌      | 210/598 [4:51:28<11:16:13, 104.57s/it]

Processed record 210/598 in 102.86 seconds.


 35%|███▌      | 211/598 [4:53:21<11:31:33, 107.22s/it]

Processed record 211/598 in 113.40 seconds.


 35%|███▌      | 212/598 [4:55:24<11:58:41, 111.71s/it]

Processed record 212/598 in 122.20 seconds.


 36%|███▌      | 213/598 [4:57:03<11:33:08, 108.02s/it]

Processed record 213/598 in 99.40 seconds.


 36%|███▌      | 214/598 [4:59:12<12:12:39, 114.48s/it]

Processed record 214/598 in 129.54 seconds.


 36%|███▌      | 215/598 [5:00:50<11:38:46, 109.47s/it]

Processed record 215/598 in 97.79 seconds.


 36%|███▌      | 216/598 [5:02:47<11:50:49, 111.65s/it]

Processed record 216/598 in 116.73 seconds.


 36%|███▋      | 217/598 [5:04:17<11:08:38, 105.30s/it]

Processed record 217/598 in 90.48 seconds.


 36%|███▋      | 218/598 [5:06:06<11:13:32, 106.35s/it]

Processed record 218/598 in 108.80 seconds.


 37%|███▋      | 219/598 [5:07:53<11:12:31, 106.47s/it]

Processed record 219/598 in 106.75 seconds.


 37%|███▋      | 220/598 [5:09:53<11:35:57, 110.47s/it]

Processed record 220/598 in 119.81 seconds.


 37%|███▋      | 221/598 [5:11:19<10:47:58, 103.12s/it]

Processed record 221/598 in 85.99 seconds.


 37%|███▋      | 222/598 [5:12:58<10:38:38, 101.91s/it]

Processed record 222/598 in 99.08 seconds.


 37%|███▋      | 223/598 [5:40:05<58:16:09, 559.39s/it]

Processed record 223/598 in 1626.83 seconds.


 37%|███▋      | 224/598 [5:42:15<44:45:01, 430.75s/it]

Processed record 224/598 in 130.61 seconds.


 38%|███▊      | 225/598 [5:44:33<35:31:10, 342.82s/it]

Processed record 225/598 in 137.63 seconds.


 38%|███▊      | 226/598 [5:46:13<27:53:10, 269.87s/it]

Processed record 226/598 in 99.65 seconds.


 38%|███▊      | 227/598 [5:47:43<22:16:29, 216.14s/it]

Processed record 227/598 in 90.79 seconds.


 38%|███▊      | 228/598 [5:49:36<19:01:43, 185.14s/it]

Processed record 228/598 in 112.81 seconds.


 38%|███▊      | 229/598 [5:51:06<16:02:18, 156.47s/it]

Processed record 229/598 in 89.57 seconds.


 38%|███▊      | 230/598 [5:52:48<14:20:07, 140.24s/it]

Processed record 230/598 in 102.36 seconds.


 39%|███▊      | 231/598 [5:54:21<12:51:37, 126.15s/it]

Processed record 231/598 in 93.28 seconds.


 39%|███▉      | 232/598 [5:55:57<11:52:54, 116.87s/it]

Processed record 232/598 in 95.21 seconds.


 39%|███▉      | 233/598 [5:57:51<11:46:55, 116.21s/it]

Processed record 233/598 in 114.66 seconds.


 39%|███▉      | 234/598 [5:59:10<10:37:41, 105.11s/it]

Processed record 234/598 in 79.23 seconds.


 39%|███▉      | 235/598 [6:00:42<10:11:06, 101.01s/it]

Processed record 235/598 in 91.44 seconds.


 39%|███▉      | 236/598 [6:02:22<10:08:07, 100.79s/it]

Processed record 236/598 in 100.29 seconds.


 40%|███▉      | 237/598 [6:03:57<9:55:50, 99.03s/it]  

Processed record 237/598 in 94.92 seconds.


 40%|███▉      | 238/598 [6:05:31<9:44:59, 97.50s/it]

Processed record 238/598 in 93.92 seconds.


 40%|███▉      | 239/598 [6:07:48<10:54:43, 109.43s/it]

Processed record 239/598 in 137.26 seconds.


 40%|████      | 240/598 [6:09:52<11:18:57, 113.79s/it]

Processed record 240/598 in 123.98 seconds.


 40%|████      | 241/598 [6:11:53<11:28:51, 115.78s/it]

Processed record 241/598 in 120.40 seconds.


 40%|████      | 242/598 [6:13:47<11:24:54, 115.43s/it]

Processed record 242/598 in 114.64 seconds.


 41%|████      | 243/598 [6:15:38<11:14:40, 114.03s/it]

Processed record 243/598 in 110.75 seconds.


 41%|████      | 244/598 [6:17:31<11:11:16, 113.77s/it]

Processed record 244/598 in 113.18 seconds.


 41%|████      | 245/598 [6:19:25<11:10:08, 113.90s/it]

Processed record 245/598 in 114.21 seconds.


 41%|████      | 246/598 [6:21:00<10:34:51, 108.22s/it]

Processed record 246/598 in 94.94 seconds.


 41%|████▏     | 247/598 [6:22:50<10:35:36, 108.65s/it]

Processed record 247/598 in 109.67 seconds.


 41%|████▏     | 248/598 [6:25:07<11:23:24, 117.16s/it]

Processed record 248/598 in 137.00 seconds.


 42%|████▏     | 249/598 [6:26:59<11:12:58, 115.70s/it]

Processed record 249/598 in 112.30 seconds.


 42%|████▏     | 250/598 [6:28:31<10:29:09, 108.47s/it]

Processed record 250/598 in 91.62 seconds.


 42%|████▏     | 251/598 [6:30:02<9:56:21, 103.12s/it] 

Processed record 251/598 in 90.61 seconds.


 42%|████▏     | 252/598 [6:32:00<10:20:41, 107.64s/it]

Processed record 252/598 in 118.18 seconds.


 42%|████▏     | 253/598 [8:04:20<166:30:00, 1737.39s/it]

Processed record 253/598 in 5540.16 seconds.


 42%|████▏     | 254/598 [8:06:31<119:57:20, 1255.35s/it]

Processed record 254/598 in 130.59 seconds.


 43%|████▎     | 255/598 [8:08:07<86:29:14, 907.74s/it]  

Processed record 255/598 in 96.65 seconds.


 43%|████▎     | 256/598 [8:10:21<64:10:38, 675.55s/it]

Processed record 256/598 in 133.78 seconds.


 43%|████▎     | 257/598 [8:12:21<48:12:42, 508.98s/it]

Processed record 257/598 in 120.32 seconds.


 43%|████▎     | 258/598 [8:14:15<36:51:26, 390.25s/it]

Processed record 258/598 in 113.23 seconds.


 43%|████▎     | 259/598 [8:15:50<28:25:27, 301.85s/it]

Processed record 259/598 in 95.57 seconds.


 43%|████▎     | 260/598 [8:17:29<22:36:54, 240.87s/it]

Processed record 260/598 in 98.59 seconds.


 44%|████▎     | 261/598 [8:19:11<18:38:44, 199.18s/it]

Processed record 261/598 in 101.91 seconds.


 44%|████▍     | 262/598 [8:20:58<16:00:39, 171.55s/it]

Processed record 262/598 in 107.06 seconds.


 44%|████▍     | 263/598 [8:22:51<14:19:59, 154.03s/it]

Processed record 263/598 in 113.15 seconds.


 44%|████▍     | 264/598 [8:24:56<13:28:34, 145.25s/it]

Processed record 264/598 in 124.77 seconds.


 44%|████▍     | 265/598 [8:27:05<13:00:05, 140.56s/it]

Processed record 265/598 in 129.61 seconds.


 44%|████▍     | 266/598 [8:29:03<12:20:42, 133.86s/it]

Processed record 266/598 in 118.24 seconds.


 45%|████▍     | 267/598 [8:30:39<11:15:56, 122.53s/it]

Processed record 267/598 in 96.08 seconds.


In [None]:
import pandas as pd
from tqdm import tqdm
import time

# Load your source CSV
df = pd.read_csv("../../../Dataset_code_csvs/hotpotQA/hotpotQA_dataset_versions/5884paras_598queries/Urdu/598_QnAs_translated.csv")

# Output CSV path
output_path = "../../results/pipeline results/5884paras_598qna/LQR_processed_results.csv"

# Start processing after the first 200 records
start_index = 200

# Initialize variables
results = []
batch_times = []
total_start = time.time()

# Loop over each question starting from index 200
for idx, row in tqdm(df.iloc[start_index:].iterrows(), total=len(df) - start_index):
    actual_idx = idx + start_index  # Since df.iloc resets index
    query = row["translated_question"]
    answer = row["translated_answer"]
    question_type = row["level"]

    start_time = time.time()

    try:
        # Run the pipeline
        result = multihop_handling_LQR(query, question_type)

        classification = result["classification"]
        retrieved_context = result["retrieved_context"]
        final_answer = result["final_answer"]
        timings = result["timings"]
        total_time_one = timings["total_time"]

    except Exception as e:
        print(f"Error processing query {actual_idx}: {e}")
        classification = "Error"
        retrieved_context = "Error"
        final_answer = "Error"
        timings = {
            "classification_time": 0,
            "decomposition_time": 0,
            "retrieval_time": 0,
            "generation_time": 0,
            "total_time": 0
        }
        total_time_one = 0

    elapsed = time.time() - start_time
    batch_times.append(elapsed)

    results.append({
        "translated_question": query,
        "translated_answer": answer,
        "classification": classification,
        "retrieved_context": retrieved_context,
        "final_answer": final_answer,
        "classification_time": timings["classification_time"],
        "decomposition_time": timings["decomposition_time"],
        "retrieval_time": timings["retrieval_time"],
        "generation_time": timings["generation_time"],
        "total_time": timings["total_time"],
        "level": question_type
    })

    print(f"Processed record {actual_idx+1}/{len(df)} in {elapsed:.2f} seconds.")

    # Save and report every 100 records
    if (actual_idx + 1) % 100 == 0:
        pd.DataFrame(results).to_csv(output_path, mode='a', header=False, index=False, encoding="utf-8-sig")
        avg_batch_time = sum(batch_times) / len(batch_times)
        print(f"\n--- Saved batch up to record {actual_idx+1}")
        print(f"Average time for last 100 records: {avg_batch_time:.2f} seconds\n")
        results = []
        batch_times = []

# Save any remaining results
if results:
    pd.DataFrame(results).to_csv(output_path, mode='a', header=False, index=False, encoding="utf-8-sig")

# Final stats
total_elapsed = time.time() - total_start
avg_total_time = total_elapsed / (len(df) - start_index)
print(f"\n✅ Finished remaining records.")
print(f"Total time: {total_elapsed:.2f} seconds.")
print(f"Average time per record: {avg_total_time:.2f} seconds.")


  0%|          | 1/398 [01:41<11:13:18, 101.76s/it]

Processed record 401/598 in 101.76 seconds.
