In [15]:
!pip install requests pandas datasets



In [16]:
import json
import requests
import random
import time
import os
from datasets import load_dataset
from google.colab import files
from google.colab import userdata

In [25]:
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"

# Output
JSON_FILE_PATH = "driver_llm_responses.json"

if os.path.exists(JSON_FILE_PATH) and os.path.getsize(JSON_FILE_PATH) > 0:
    with open(JSON_FILE_PATH, "r", encoding="utf-8") as file:
        stored_responses = json.load(file)
else:
    stored_responses = []

In [26]:
# Datasets and Fields
HF_DATASETS = {
    "shreyanmitra/OpenEndedLLMPrompts": "question",
    "sewon/ambig_qa": "question",
    "aporia-ai/rag_hallucinations": "question",
    "lasha-nlp/HALoGEN-prompts": "prompt",
    "Cleanlab/FinQA-hallucination-detection": "query",
    "tourist800/LLM-Hallucination-Detection-complex-mathematics": "Problem Statement"
}
JSON_DATASETS = {
    "HaluEval": ("https://github.com/RUCAIBox/HaluEval/raw/main/data/general_data.json", "user_query")
}

# Rate Limit Handling
MAX_REQUESTS_PER_MINUTE = 30
PAUSE_DURATION = 60 / MAX_REQUESTS_PER_MINUTE
MAX_TOKENS_PER_MINUTE = 6000

PENDING_QUERIES = []
token_usage = 0
start_time = time.time()

In [27]:
# GROQ API
def query_groq(prompt):
    """Sends a request to the Groq API and handles rate limit errors."""
    global token_usage

    payload = {
        "model": "llama-3.1-8b-instant",
        "messages": [{"role": "user", "content": prompt}]
    }
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GROQ_API_KEY}"
    }

    response = requests.post(GROQ_URL, headers=headers, json=payload)

    if response.status_code == 200:
        response_data = response.json()
        llm_response = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
        token_usage += response_data.get("usage", {}).get("total_tokens", 0)
        return llm_response

    elif response.status_code == 429:
        error_message = response.json().get("error", {}).get("message", "")
        print(f"Rate limit hit: {error_message}")

        wait_time = extract_wait_time(error_message)
        PENDING_QUERIES.append(prompt)

        print(f"Storing query. Will retry in {wait_time} seconds...")
        time.sleep(wait_time)
        return None

    else:
        return f"Error: {response.status_code}, {response.text}"


In [28]:
def extract_wait_time(error_message):
    """Extracts wait time from the error message (if rate limited)."""
    try:
        parts = error_message.split("Please try again in ")
        wait_time = float(parts[1].split("ms")[0].strip()) / 1000
        return max(wait_time, 1)
    except:
        return 60

In [29]:
def process_pending_queries():
    """Retries any stored queries after all normal queries are processed."""
    global PENDING_QUERIES
    if PENDING_QUERIES:
        print(f"\nRetrying {len(PENDING_QUERIES)} stored queries...")
        for prompt in PENDING_QUERIES:
            response = query_groq(prompt)
            if response:
                stored_responses.append({"query": prompt, "response": response})
        PENDING_QUERIES = []

In [30]:
def fetch_json_queries(url, field):
    try:
        response = requests.get(url)
        response.raise_for_status()

        queries = []
        for line in response.text.strip().split("\n"):
            try:
                entry = json.loads(line)
                if field in entry:
                    queries.append(entry[field])
            except json.JSONDecodeError as e:
                print(f"Skipping malformed line: {e}")

        return random.sample(queries, min(200, len(queries)))

    except Exception as e:
        print(f"Failed to fetch {url}: {e}")
        return []

In [31]:
def fetch_hf_queries(dataset_name, field):
    try:
        dataset = load_dataset(dataset_name, split="train")
        queries = [entry[field] for entry in dataset if field in entry]
        return random.sample(queries, min(200, len(queries)))
    except Exception as e:
        print(f"Failed to fetch {dataset_name}: {e}")
        return []

In [32]:
# Collect Queries
dataset_queries = []
print("Fetching queries from datasets...")

for dataset, field in HF_DATASETS.items():
    dataset_queries.extend(fetch_hf_queries(dataset, field))

for dataset, (url, field) in JSON_DATASETS.items():
    dataset_queries.extend(fetch_json_queries(url, field))

print(f"Total queries collected: {len(dataset_queries)}")

Fetching queries from datasets...
Total queries collected: 1400


In [33]:
# Query processing
print("\nProcessing queries with Groq API...")
processed_queries = {entry["query"] for entry in stored_responses}

for i, query in enumerate(dataset_queries):
    if query in processed_queries:
        continue

    response = query_groq(query)

    if response and not response.startswith("Error: 429"):
        stored_responses.append({"query": query, "response": response})

    # Save after every 30 queries
    if (i + 1) % 30 == 0:
        print(f"Processed {i + 1}/{len(dataset_queries)} queries...")
        with open(JSON_FILE_PATH, "w", encoding="utf-8") as file:
            json.dump(stored_responses, file, indent=4)

    # Handle rate limit
    elapsed_time = time.time() - start_time
    if token_usage >= MAX_TOKENS_PER_MINUTE:
        print(f"Token limit reached ({token_usage}/6000). Waiting 60 seconds...")
        time.sleep(60)
        start_time = time.time()
        token_usage = 0
    else:
        time.sleep(PAUSE_DURATION)



Processing queries with Groq API...
Token limit reached (6641/6000). Waiting 60 seconds...
Token limit reached (6321/6000). Waiting 60 seconds...
Processed 30/1400 queries...
Token limit reached (6244/6000). Waiting 60 seconds...
Token limit reached (6106/6000). Waiting 60 seconds...
Token limit reached (6054/6000). Waiting 60 seconds...
Processed 60/1400 queries...
Token limit reached (6574/6000). Waiting 60 seconds...
Token limit reached (6935/6000). Waiting 60 seconds...
Token limit reached (6658/6000). Waiting 60 seconds...
Processed 90/1400 queries...
Token limit reached (6687/6000). Waiting 60 seconds...
Token limit reached (6243/6000). Waiting 60 seconds...
Token limit reached (7554/6000). Waiting 60 seconds...
Processed 120/1400 queries...
Token limit reached (6287/6000). Waiting 60 seconds...
Token limit reached (6521/6000). Waiting 60 seconds...
Token limit reached (6104/6000). Waiting 60 seconds...
Processed 150/1400 queries...
Token limit reached (6416/6000). Waiting 60 se

KeyboardInterrupt: 

In [None]:
# retry any stored queries
process_pending_queries()

In [34]:
# Filter out any remaining 429 errors before saving
stored_responses = [entry for entry in stored_responses if not entry["response"].startswith("Error: 429")]

# Save cleaned responses
with open(JSON_FILE_PATH, "w", encoding="utf-8") as file:
    json.dump(stored_responses, file, indent=4)

print(f"All {len(stored_responses)} valid queries processed and saved!")

All 1300 valid queries processed and saved!


In [36]:
files.download(JSON_FILE_PATH)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>