In [None]:
import asyncio
import json
import logging
import os 
import time 
from datetime import datetime, timedelta 
from collections import defaultdict 
from typing import List, Optional 
from dataclasses import dataclass 
import backoff 
from dotenv import load_dotenv  
from groq import Groq 
from tqdm import tqdm  
from tqdm.asyncio import tqdm_asyncio 
import nest_asyncio 
from openai import AzureOpenAI 

In [3]:
# Load passages from Irish dataset
passages = {}

with open("../../all_data.json", encoding='utf-8') as f:
    all_data = json.load(f)

seen = set()
for doc in all_data: 
    for psg in doc['Passages']:
        psg_id = f"{psg['DocumentID']}-{psg['PassageID']}"
        if psg_id not in seen:
            passage_text = psg['PassageID'] + " " + psg['Passage']
            if len(passage_text) > 100:
                passages[psg_id] = psg['Passage']
                seen.add(psg_id)

print(f"Loaded {len(passages)} passages")

Loaded 32810 passages


In [None]:
rankings_dict = defaultdict(list) 

# Load the rankings file in memory
with open('data/rankings_hybrid_rerank.trec', 'r') as f:
    for line in f:
        parts = line.strip().split()
        question_id = parts[0]
        document_id = parts[2]
        rank = int(parts[3])
        score = float(parts[4])
        rankings_dict[question_id].append({
            'doc': document_id,
            'score': score
        })

In [None]:
import pickle

with open('./data/corpus.pkl', 'rb') as f:
    corpus = pickle.load(f)

print(f"Corpus size: {len(corpus)}")
print(f"Passages dict size: {len(passages)}")
print(f"Match: {len(corpus) == len(passages)}")

# Check for missing passages in rankings
missing_count = 0
for qid in rankings_dict:
    for ranking in rankings_dict[qid][:10]:
        if ranking['doc'] not in passages:
            missing_count += 1
            print(f"Missing: {ranking['doc']}")
            if missing_count >= 5: 
                break
    if missing_count >= 5:
        break

Corpus size: 32810
Passages dict size: 32810
Match: True


In [6]:
def extract_passages(question_id: str, rankings_dict: dict = rankings_dict) -> list[str]:

    retrieved_passages = []
    should_stop = False
    
    for i in range(len(rankings_dict[question_id])):
        if should_stop or len(retrieved_passages) == 10:
            break
            
        if len(retrieved_passages) == 0:
            retrieved_passages.append(rankings_dict[question_id][i]["doc"])
            continue
                
        if i < len(rankings_dict[question_id]) - 1 and rankings_dict[question_id][i]["score"] - rankings_dict[question_id][i+1]["score"] > 0.1:
                should_stop = True

        if rankings_dict[question_id][i]["score"] < 0.72:
            break

        retrieved_passages.append(rankings_dict[question_id][i]["doc"])
        
    retrieved_passages = [passages[doc] for doc in retrieved_passages if doc in passages]
    
    return retrieved_passages

In [7]:
def build_prompt(question: str, relevant_passages: list[str], system_prompt: str = None) -> tuple[str, str]:
    # Default system prompt if none is provided
    default_system_prompt = ("You are a regulatory compliance assistant. Provide a **complete**, **coherent**, and"
    "**correct** response to the given question by synthesizing the information from the provided passages. "
    "Your answer should **fully integrate all relevant obligations, practices, and insights**, and directly"
    "address the question. The passages are presented in order of relevance, so **prioritize the information"
    "accordingly** and ensure consistency in your response, avoiding any contradictions. Additionally, reference"
    "**specific regulations and key compliance requirements** outlined in the regulatory content to support your"
    "answer. **Do not use any extraneous or external knowledge** outside of the provided passages when crafting"
    "your response.")
    
    
    # Use provided system prompt or fall back to default
    system_prompt = system_prompt if system_prompt is not None else default_system_prompt

    user_prompt = f"Question: {question}\n\n"
    for passage in relevant_passages:
        user_prompt += f"Passage: {passage}\n\n"
        
    return (system_prompt, user_prompt)

build_prompt("question", ["passage"])

('You are a regulatory compliance assistant. Provide a **complete**, **coherent**, and**correct** response to the given question by synthesizing the information from the provided passages. Your answer should **fully integrate all relevant obligations, practices, and insights**, and directlyaddress the question. The passages are presented in order of relevance, so **prioritize the informationaccordingly** and ensure consistency in your response, avoiding any contradictions. Additionally, reference**specific regulations and key compliance requirements** outlined in the regulatory content to support youranswer. **Do not use any extraneous or external knowledge** outside of the provided passages when craftingyour response.',
 'Question: question\n\nPassage: passage\n\n')

In [10]:
system_prompt_few_shot = ("As a regulatory compliance assistant for Irish statutory instruments. Provide a **complete**, **coherent**, and "
"**correct** response to the given question by synthesizing the information from the provided passages. "
"Your answer should **fully integrate all relevant obligations, practices, and insights**, and directly "
"address the question. The passages are presented in order of relevance, so **prioritize the information "
"accordingly** and ensure consistency in your response, avoiding any contradictions. Additionally, reference "
"**specific regulations and statutory instrument numbers** outlined in the regulatory content to support your "
"answer. **Do not use any extraneous or external knowledge** outside of the provided passages when crafting "
"your response."
"\n\nHere are a few examples."
"\n\nQuestion: When does a person who purchases a dwelling on or after the commencement of these Regulations cease to qualify as a 'relevant owner' for the scheme?"
"\n\nPassage: (2) Where a person purchases a dwelling on or after the date of the coming into operation of these Regulations, he or she shall not be a relevant owner for the purposes of these Regulations where he or she knew, or ought to have known, that defective concrete blocks were used in the construction of the dwelling."
"\n\nYour response should read:"
"\n\nA person who purchases a dwelling on or after the commencement of these Regulations ceases to qualify as a 'relevant owner' for the scheme if they knew, or ought to have known, that defective concrete blocks were used in the construction of the dwelling at the time of purchase."
"\n\nQuestion: What is the maximum grant amount available under the Dwelling Grant Scheme?"
"\n\nPassage: The maximum grant available under the Dwelling Grant Scheme is €420,000 per dwelling."
"\n\nYour response should read:"
"\n\nThe maximum grant amount available under the Dwelling Grant Scheme is €420,000 per dwelling."
"\n\nQuestion: Who must submit evidence of defective concrete blocks to qualify for the scheme?"
"\n\nPassage: An applicant must submit evidence from a qualified engineer confirming the presence of defective concrete blocks in the dwelling to qualify for the scheme."
"\n\nYour response should read:"
"\n\nTo qualify for the scheme, an applicant must submit evidence from a qualified engineer that confirms the presence of defective concrete blocks in the dwelling.")

In [13]:
# Check which passage IDs are missing
with open("./data/rankings_hybrid_rerank.trec", 'r') as f:
    ranking_passage_ids = set()
    for line in f:
        parts = line.strip().split()
        doc_id = parts[2]
        ranking_passage_ids.add(doc_id)

print(f"Unique passage IDs in rankings: {len(ranking_passage_ids)}")
print(f"Passage IDs in passages dict: {len(passages)}")

missing = ranking_passage_ids - set(passages.keys())
print(f"Missing passage IDs: {len(missing)}")
print("First 10 missing:")
for pid in list(missing)[:10]:
    print(f"  {pid}")

Unique passage IDs in rankings: 7557
Passage IDs in passages dict: 32810
Missing passage IDs: 0
First 10 missing:


In [None]:
# Check if all questions from QnA_complete.json are in rankings_hybrid_rerank.trec
with open("./QnA_complete.json", encoding='utf-8') as f:
    test_questions = json.load(f)

question_ids_in_test = {q["QuestionID"] for q in test_questions}
question_ids_in_rankings = set(rankings_dict.keys())

print(f"Questions in test file: {len(question_ids_in_test)}")
print(f"Questions in rankings: {len(question_ids_in_rankings)}")

missing_from_rankings = question_ids_in_test - question_ids_in_rankings
print(f"Questions missing from rankings: {len(missing_from_rankings)}")
print("First 10 missing question IDs:")
for qid in list(missing_from_rankings)[:10]:
    print(f"  {qid}")

Questions in test file: 240
Questions in rankings: 240
Questions missing from rankings: 0
First 10 missing question IDs:


## Groq API - Regular deployment

Use Groq's API to synthesize the retrieved passages for each question using `llama-3.1-8b-instant`. We leverage Groq's high-performance infrastructure to process queries with minimal latency.

In [None]:
import logging
import asyncio
import os
import json
from datetime import datetime, timedelta
from dataclasses import dataclass
from typing import List, Optional
from collections import defaultdict
from groq import Groq
from tqdm.asyncio import tqdm as tqdm_asyncio
from dotenv import load_dotenv
import nest_asyncio

# Configure minimal logging
logging.basicConfig(
    level=logging.WARNING,
    format='%(message)s'
)
logger = logging.getLogger(__name__)
logging.getLogger("httpx").setLevel(logging.WARNING)

@dataclass
class Question:
    id: str
    text: str

@dataclass
class ProcessedAnswer:
    question_id: str
    retrieved_passages: List[str]
    answer: str
    error: Optional[str] = None

class GroqRateLimiter:
    def __init__(self):
        # Track when each key was last rate-limited
        self.key_cooldowns = {} 
        self.cooldown_duration = timedelta(minutes=2)  
        self.lock = asyncio.Lock()
    
    async def check_key_available(self, key_index: int) -> bool:
        async with self.lock:
            if key_index not in self.key_cooldowns:
                return True
            
            now = datetime.now()
            cooldown_end = self.key_cooldowns[key_index] + self.cooldown_duration
            
            if now >= cooldown_end:
                # Cooldown expired, remove it
                del self.key_cooldowns[key_index]
                return True
            
            return False
    
    async def mark_key_rate_limited(self, key_index: int):
        async with self.lock:
            self.key_cooldowns[key_index] = datetime.now()
            print(f"Key {key_index + 1} marked as rate-limited until {datetime.now() + self.cooldown_duration}")
    
    async def wait_for_available_key(self, current_key: int, total_keys: int) -> int:
        while True:
            for i in range(total_keys):
                next_key = (current_key + i + 1) % total_keys
                if await self.check_key_available(next_key):
                    return next_key
            
            async with self.lock:
                if self.key_cooldowns:
                    earliest_key = min(self.key_cooldowns.items(), key=lambda x: x[1])
                    wait_time = (earliest_key[1] + self.cooldown_duration - datetime.now()).total_seconds()
                    wait_time = max(1, wait_time)
                    print(f"All keys in cooldown. Waiting {wait_time:.1f}s for key {earliest_key[0] + 1} to be available...")
                    await asyncio.sleep(wait_time)
                else:
                    await asyncio.sleep(1)

class GroqProcessor:
    
    def __init__(self):
        # Load all API keys
        self.api_keys = [
            os.getenv('GROQ_API_KEY_1'),
            os.getenv('GROQ_API_KEY_2'),
            os.getenv('GROQ_API_KEY_3'),
            os.getenv('GROQ_API_KEY_4')
        ]
        self.api_keys = [key for key in self.api_keys if key]
        if not self.api_keys:
            raise ValueError("No GROQ_API_KEY environment variables are defined")
        
        self.current_key_index = 0
        self.groq_client = self._initialize_groq()
        self.rate_limiter = GroqRateLimiter()
        
    def _initialize_groq(self) -> Groq:
        return Groq(api_key=self.api_keys[self.current_key_index])
    
    async def rotate_key(self):
        await self.rate_limiter.mark_key_rate_limited(self.current_key_index)
        
        next_key = await self.rate_limiter.wait_for_available_key(
            self.current_key_index, 
            len(self.api_keys)
        )
        
        self.current_key_index = next_key
        self.groq_client = self._initialize_groq()
        print(f"Rotated to API key {self.current_key_index + 1}/{len(self.api_keys)}")
        
        await asyncio.sleep(5)

    @staticmethod
    def extract_passages(question_id: str, passages: dict, rankings_dict: dict) -> list[str]:
        retrieved_passages = []
        should_stop = False
        
        for i in range(len(rankings_dict[question_id])):
            if should_stop or len(retrieved_passages) == 10:
                break
                
            if len(retrieved_passages) == 0:
                retrieved_passages.append(rankings_dict[question_id][i]["doc"])
                continue
                    
            if i < len(rankings_dict[question_id]) - 1 and rankings_dict[question_id][i]["score"] - rankings_dict[question_id][i+1]["score"] > 0.1:
                    should_stop = True

            if rankings_dict[question_id][i]["score"] < 0.72:
                break

            retrieved_passages.append(rankings_dict[question_id][i]["doc"])
            
        return [passages[doc] for doc in retrieved_passages if doc in passages]

    async def process_question(self, question: str, passages: List[str], max_passages: int = None) -> str:
        if max_passages:
            passages = passages[:max_passages]
        
        if not passages:
            return "Unable to answer: No relevant passages found in the corpus."
            
        (system_prompt, user_prompt) = build_prompt(question, passages, system_prompt_few_shot)
        
        await asyncio.sleep(4)
        
        try:
            completion = await asyncio.to_thread(
                self.groq_client.chat.completions.create,
                model="llama-3.1-8b-instant",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.25,
                max_tokens=800,
                top_p=1,
                stream=False
            )
            return completion.choices[0].message.content
            
        except Exception as e:
            error_str = str(e).lower()
            
            if "request too large" in error_str or "tokens per minute" in error_str:
                if max_passages is None or max_passages > 1:
                    new_max = 3 if max_passages is None else max(1, max_passages // 2)
                    print(f"Prompt too large, retrying with {new_max} passage(s)...")
                    return await self.process_question(question, passages, max_passages=new_max)
                else:
                    raise Exception(f"Cannot reduce passages further. Prompt still too large with {max_passages} passage(s).")
            
            # Handle rate limit errors by rotating keys
            if "rate limit" in error_str:
                print(f"Rate limit hit, rotating to next available key...")
                await self.rotate_key()
                return await self.process_question(question, passages, max_passages=max_passages)
            
            raise

async def main():
    # Load environment variables
    load_dotenv()
    
    # Initialize processor
    processor = GroqProcessor()
    
    output_file = "data/answers-llama3.1.json"
    processed_ids = set()
    answers = []
    
    # Track statistics
    questions_with_missing_passages = []
    questions_with_no_passages = []
    
    if os.path.exists(output_file):
        print(f"Found existing output file. Loading to resume...")
        with open(output_file, 'r', encoding='utf-8') as f:
            answers = json.load(f)
            processed_ids = {ans["QuestionID"] for ans in answers}
        print(f"Resuming from question {len(answers) + 1} (already processed {len(answers)} questions)")
    
    try:
        # Load necessary data
        print("Loading passages...")
        with open('../../all_data.json', 'r', encoding='utf-8') as f:  # ← CHANGED
            all_data = json.load(f)

        passages = {}
        for doc in all_data:  # ← CHANGED from 'q' to 'doc'
            for psg in doc['Passages']:  # ← Changed
                psg_id = f"{psg['DocumentID']}-{psg['PassageID']}"
                passage_text = psg['PassageID'] + " " + psg['Passage']
                if len(passage_text) > 100:  # ← Add same filter
                    passages[psg_id] = psg['Passage']

        print("Loading rankings...")
        rankings_dict = defaultdict(list)
        with open('data/rankings_hybrid_rerank.trec', 'r') as f:
            for line in f:
                parts = line.strip().split()
                rankings_dict[parts[0]].append({
                    'doc': parts[2],
                    'score': float(parts[4])
                })

        # Process questions
        print("Processing questions...")
        with open("./QnA_complete.json", encoding='utf-8') as f:
            questions = json.load(f)
            
            # Filter out already processed questions
            questions_to_process = [q for q in questions if q["QuestionID"] not in processed_ids]
            print(f"Total questions: {len(questions)}")
            print(f"Questions remaining to process: {len(questions_to_process)}")
            
            # Create progress bar for only unprocessed questions
            for q in tqdm_asyncio(questions_to_process, desc="Processing"):
                try:
                    # Extract passages with the fixed method
                    question_id = q["QuestionID"]
                    
                    # Get document IDs from rankings
                    doc_ids_from_rankings = [r["doc"] for r in rankings_dict[question_id][:10]]
                    
                    # Extract only available passages
                    retrieved_passages = processor.extract_passages(
                        question_id, 
                        passages, 
                        rankings_dict
                    )
                    
                    # Track missing passages
                    missing_docs = [doc for doc in doc_ids_from_rankings if doc not in passages]
                    if missing_docs:
                        questions_with_missing_passages.append({
                            "question_id": question_id,
                            "missing_count": len(missing_docs),
                            "missing_docs": missing_docs[:3]  # Just store first 3
                        })
                    
                    if not retrieved_passages:
                        questions_with_no_passages.append(question_id)
                    
                    # Process question even if some or all passages are missing
                    answer = await processor.process_question(
                        q["Question"], 
                        retrieved_passages
                    )
                    
                    answers.append({
                        "QuestionID": question_id,
                        "RetrievedPassages": retrieved_passages,
                        "Answer": answer,
                        "MissingPassagesCount": len(missing_docs) if missing_docs else 0
                    })
                    
                    # Save progress every 10 questions
                    if len(answers) % 10 == 0:
                        with open(output_file, "w", encoding='utf-8') as f:
                            json.dump(answers, f, indent=2, ensure_ascii=False)
                        print(f"\nProgress saved: {len(answers)}/{len(questions)} questions completed")
                            
                except Exception as e:
                    print(f"\nError processing question {q['QuestionID']}: {e}")
                    # Still save the question with an error message
                    answers.append({
                        "QuestionID": q["QuestionID"],
                        "RetrievedPassages": [],
                        "Answer": f"ERROR: {str(e)}",
                        "Error": str(e)
                    })
                    # Save progress on error
                    if answers:
                        with open(output_file, "w", encoding='utf-8') as f:
                            json.dump(answers, f, indent=2, ensure_ascii=False)

        # Save final results
        print("\nSaving final results...")
        with open(output_file, "w", encoding='utf-8') as f:
            json.dump(answers, f, indent=2, ensure_ascii=False)
        
        # Save statistics
        stats_file = "data/processing_statistics.json"
        statistics = {
            "total_questions": len(questions),
            "successfully_processed": len([a for a in answers if "Error" not in a]),
            "questions_with_errors": len([a for a in answers if "Error" in a]),
            "questions_with_missing_passages": len(questions_with_missing_passages),
            "questions_with_no_passages": len(questions_with_no_passages),
            "missing_passage_details": questions_with_missing_passages[:10],  # First 10
            "questions_with_no_passages_list": questions_with_no_passages
        }
        
        with open(stats_file, "w", encoding='utf-8') as f:
            json.dump(statistics, f, indent=2)
            
        print("\n" + "="*60)
        print("PROCESSING COMPLETE!")
        print("="*60)
        print(f"Total questions processed: {len(answers)}/{len(questions)}")
        print(f"Successfully processed: {statistics['successfully_processed']}")
        print(f"Questions with errors: {statistics['questions_with_errors']}")
        print(f"\nResults saved to: {output_file}")
        print(f"Statistics saved to: {stats_file}")
        print("="*60)
        
    except Exception as e:
        print(f"\nFatal error during processing: {e}")
        # Save partial results if available
        if answers:
            with open(output_file, "w", encoding='utf-8') as f:
                json.dump(answers, f, indent=2, ensure_ascii=False)
            print(f"Partial results saved: {len(answers)} questions")
        raise

# Run the processor
nest_asyncio.apply()
await main()

Loading passages...
Loading rankings...
Processing questions...
Total questions: 240
Questions remaining to process: 240


Processing:   4%|▍         | 10/240 [01:04<34:48,  9.08s/it]


Progress saved: 10/240 questions completed


Processing:   8%|▊         | 20/240 [02:54<51:42, 14.10s/it]


Progress saved: 20/240 questions completed


Processing:  12%|█▎        | 30/240 [04:16<27:13,  7.78s/it]


Progress saved: 30/240 questions completed


Processing:  17%|█▋        | 40/240 [06:54<44:48, 13.44s/it]  


Progress saved: 40/240 questions completed


Processing:  21%|██        | 50/240 [08:47<34:05, 10.77s/it]


Progress saved: 50/240 questions completed


Processing:  25%|██▌       | 60/240 [10:24<27:01,  9.01s/it]


Progress saved: 60/240 questions completed


Processing:  27%|██▋       | 65/240 [11:50<42:57, 14.73s/it]  

Prompt too large, retrying with 3 passage(s)...
Prompt too large, retrying with 1 passage(s)...


Processing:  28%|██▊       | 66/240 [12:02<40:35, 14.00s/it]


Error processing question a3c2d8b5-fc5e-4eab-b15d-cf726e239a88: Cannot reduce passages further. Prompt still too large with 1 passage(s).


Processing:  29%|██▉       | 70/240 [12:51<45:08, 15.93s/it]


Progress saved: 70/240 questions completed


Processing:  33%|███▎      | 80/240 [16:04<39:25, 14.79s/it]  


Progress saved: 80/240 questions completed


Processing:  38%|███▊      | 90/240 [17:30<22:38,  9.06s/it]


Progress saved: 90/240 questions completed


Processing:  42%|████▏     | 100/240 [20:01<23:36, 10.12s/it]


Progress saved: 100/240 questions completed


Processing:  44%|████▍     | 106/240 [20:57<20:46,  9.30s/it]

Prompt too large, retrying with 3 passage(s)...
Prompt too large, retrying with 1 passage(s)...


Processing:  46%|████▌     | 110/240 [21:41<24:47, 11.44s/it]


Progress saved: 110/240 questions completed


Processing:  50%|█████     | 120/240 [23:33<22:47, 11.40s/it]


Progress saved: 120/240 questions completed


Processing:  54%|█████▍    | 130/240 [25:34<23:15, 12.68s/it]


Progress saved: 130/240 questions completed


Processing:  58%|█████▊    | 140/240 [27:06<14:22,  8.63s/it]


Progress saved: 140/240 questions completed


Processing:  62%|██████▎   | 150/240 [30:13<20:34, 13.72s/it]


Progress saved: 150/240 questions completed


Processing:  67%|██████▋   | 160/240 [32:09<23:51, 17.90s/it]


Progress saved: 160/240 questions completed


Processing:  71%|███████   | 170/240 [33:44<09:53,  8.47s/it]


Progress saved: 170/240 questions completed


Processing:  75%|███████▌  | 180/240 [35:29<10:16, 10.27s/it]


Progress saved: 180/240 questions completed


Processing:  79%|███████▉  | 190/240 [37:19<07:48,  9.36s/it]


Progress saved: 190/240 questions completed


Processing:  83%|████████▎ | 200/240 [38:51<05:43,  8.59s/it]


Progress saved: 200/240 questions completed


Processing:  88%|████████▊ | 210/240 [40:32<04:46,  9.54s/it]


Progress saved: 210/240 questions completed


Processing:  92%|█████████▏| 220/240 [42:15<03:46, 11.31s/it]


Progress saved: 220/240 questions completed


Processing:  92%|█████████▏| 221/240 [42:57<06:26, 20.36s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 20:36:45.771009
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 20:36:54.917246
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 20:37:04.041395
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 20:37:13.177859
All keys in cooldown. Waiting 92.6s for key 1 to be available...
Rotated to API key 1/4


Processing:  92%|█████████▎| 222/240 [45:10<16:16, 54.27s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 20:38:59.180084
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 20:39:08.324846
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 20:39:17.485173
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 20:39:26.641137
All keys in cooldown. Waiting 92.5s for key 1 to be available...
Rotated to API key 1/4


Processing:  93%|█████████▎| 223/240 [48:04<25:32, 90.14s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 20:41:53.010107
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 20:42:02.175085
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 20:42:11.317921
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 20:42:20.482179
All keys in cooldown. Waiting 92.5s for key 1 to be available...
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 20:44:02.173857
All keys in cooldown. Waiting 1.0s for key 2 to be available...
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 20:44:12.344882
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marke

Processing:  93%|█████████▎| 224/240 [59:50<1:13:18, 274.88s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 20:53:38.935710
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 20:53:48.076261
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 20:53:57.223012
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 20:54:06.360704
All keys in cooldown. Waiting 92.6s for key 1 to be available...
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 20:55:48.092995
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 20:55:57.230529
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 20:56:06.356330
All keys in c

Processing:  94%|█████████▍| 225/240 [1:07:00<1:20:23, 321.55s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:00:49.398639
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:00:58.539085
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:01:07.664264
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:01:16.806022
All keys in cooldown. Waiting 92.6s for key 1 to be available...
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:02:58.539365
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:03:07.689568
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:03:16.813979
Rotated to AP

Processing:  94%|█████████▍| 226/240 [1:11:28<1:11:15, 305.40s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:05:17.099620
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:05:26.233963
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:05:35.357909
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:05:44.483268
All keys in cooldown. Waiting 92.6s for key 1 to be available...
Rotated to API key 1/4


Processing:  95%|█████████▍| 227/240 [1:13:52<55:39, 256.89s/it]  

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:07:40.814065
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:07:49.922742
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:07:59.050524
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:08:08.177493
All keys in cooldown. Waiting 92.6s for key 1 to be available...
Rotated to API key 1/4


Processing:  95%|█████████▌| 228/240 [1:16:48<46:34, 232.83s/it]

Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:10:37.518311
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:10:46.711697
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:10:55.856711
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:11:05.014925
All keys in cooldown. Waiting 92.5s for key 1 to be available...
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:12:46.699847
All keys in cooldown. Waiting 1.0s for key 2 to be available...
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:12:56.854576
Rotated to API key 3/4


Processing:  95%|█████████▌| 229/240 [1:20:21<41:35, 226.87s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:14:10.481844
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:14:19.605019
All keys in cooldown. Waiting 27.1s for key 1 to be available...
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:14:55.832027
All keys in cooldown. Waiting 1.0s for key 2 to be available...
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:15:05.987683
All keys in cooldown. Waiting 64.5s for key 3 to be available...
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:16:19.615390
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:16:28.739057
All keys in cooldow

Processing:  96%|█████████▌| 230/240 [1:25:16<41:13, 247.35s/it]


Progress saved: 230/240 questions completed
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:19:05.608775
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:19:14.730968
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:19:23.898807
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:19:33.038778
All keys in cooldown. Waiting 92.6s for key 3 to be available...
Rotated to API key 3/4


Processing:  96%|█████████▋| 231/240 [1:27:54<33:03, 220.44s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:21:43.357605
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:21:52.519736
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:22:01.653322
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:22:10.800137
All keys in cooldown. Waiting 92.6s for key 3 to be available...
Rotated to API key 3/4


Processing:  97%|█████████▋| 232/240 [1:30:08<25:54, 194.37s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:23:56.800860
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:24:05.931488
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:24:15.044354
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:24:24.180720
All keys in cooldown. Waiting 92.6s for key 3 to be available...
Rotated to API key 3/4


Processing:  97%|█████████▋| 233/240 [1:32:21<20:32, 176.11s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:26:10.303438
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:26:19.458000
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:26:28.588387
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:26:37.737609
All keys in cooldown. Waiting 92.6s for key 3 to be available...
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:28:19.465270
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:28:28.588936
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:28:37.721453
All keys in c

Processing:  98%|█████████▊| 234/240 [1:38:53<24:05, 240.84s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:32:42.198541
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:32:51.314845
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:33:00.432466
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:33:09.544290
All keys in cooldown. Waiting 92.7s for key 3 to be available...
Rotated to API key 3/4


Processing:  98%|█████████▊| 235/240 [1:41:07<17:23, 208.64s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:34:55.692436
Rotated to API key 4/4
Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:35:04.817502
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:35:13.930303
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:35:23.051591
All keys in cooldown. Waiting 92.6s for key 3 to be available...
Rotated to API key 3/4


Processing:  98%|█████████▊| 236/240 [1:43:20<12:24, 186.06s/it]

Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:37:09.076789
Rotated to API key 4/4


Processing:  99%|█████████▉| 237/240 [1:44:28<07:32, 150.79s/it]

Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:38:17.572035
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:38:26.702878
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:38:35.860697
All keys in cooldown. Waiting 33.2s for key 3 to be available...
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:39:18.196759
All keys in cooldown. Waiting 59.4s for key 4 to be available...
Rotated to API key 4/4


Processing:  99%|█████████▉| 238/240 [1:46:55<04:59, 149.56s/it]

Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:40:44.277720
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:40:53.401474
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:41:02.516046
All keys in cooldown. Waiting 15.7s for key 3 to be available...
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:41:27.327824
All keys in cooldown. Waiting 76.9s for key 4 to be available...
Rotated to API key 4/4


Processing: 100%|█████████▉| 239/240 [1:49:09<02:24, 144.75s/it]

Rate limit hit, rotating to next available key...
Key 4 marked as rate-limited until 2025-11-19 21:42:57.795088
Rotated to API key 1/4
Rate limit hit, rotating to next available key...
Key 1 marked as rate-limited until 2025-11-19 21:43:06.938588
Rotated to API key 2/4
Rate limit hit, rotating to next available key...
Key 2 marked as rate-limited until 2025-11-19 21:43:16.125288
All keys in cooldown. Waiting 11.2s for key 3 to be available...
Rotated to API key 3/4
Rate limit hit, rotating to next available key...
Key 3 marked as rate-limited until 2025-11-19 21:43:36.502119
All keys in cooldown. Waiting 81.3s for key 4 to be available...
Rotated to API key 4/4


Processing: 100%|██████████| 240/240 [1:52:05<00:00, 28.02s/it] 


Progress saved: 240/240 questions completed

Saving final results...

PROCESSING COMPLETE!
Total questions processed: 240/240
Successfully processed: 239
Questions with errors: 1

Results saved to: data/answers-llama3.1.json
Statistics saved to: data/processing_statistics.json





## Results Evaluation

To evaluate and compare the results obtained from 
1. Groq deployment with `Llama-3.1`

Run the following scripts using the RePASs virtual environment to evaluate each model's performance. Make sure you have activated the correct environment before running these commands.

In [None]:
# Copy RePASs repo to validate results - ONLY RUN ONCE
# git clone https://github.com/RegNLP/RePASs.git && cd RePASs

In [None]:
## Script to evaluate the results. Results are placed in /RePASs/results/hybrid or /RePASs/results/hybrid-4o
## These scripts must be run using the virtual env in RePASs

#python scripts/evaluate_model.py --input_file ./../results/answers-GPT35Turbo.json --group_method_name hybrid-GPT35Turbo
#python scripts/evaluate_model.py --input_file ./../results/answers-GPT4o.json --group_method_name hybrid-GPT4o
#python scripts/evaluate_model.py --input_file ./../results/answers-llama3.1.json --group_method_name hybrid-llama