In [6]:
import pandas as pd
from dotenv import load_dotenv
import os
from groq import Groq
import time
from tqdm import tqdm
import json

In [7]:
df = pd.read_excel("Fresh_QA_without_misleading_statements.xlsx")
df.head()

Unnamed: 0,id,question,needle,real_needle,context_relevant,context_irrelevant,statements_misleading
0,43,How long has Elon Musk been X Corp.'s CEO?,Elon Musk is no longer X Corp.'s CEO.,Elon Musk is no longer X Corp.'s CEO.,43.txt,43.txt,"['test1', 'test2', 'test3']"
1,44,Where will the FIFA World Cup be hosted this y...,There won't be a FIFA World Cup this year.,There won't be a FIFA World Cup this year.,44.txt,44.txt,"['test1', 'test2', 'test3']"
2,92,Alphabet's market capitalization reached its h...,The all-time highest value of Alphabet was in ...,The all-time highest value of Alphabet was in ...,92.txt,92.txt,"['test1', 'test2', 'test3']"
3,95,Which Republican was elected Speaker of the Ho...,No one received a majority of the votes on the...,No one received a majority of the votes on the...,95.txt,95.txt,"['test1', 'test2', 'test3']"
4,96,"In January 2023, the NHC revised the fatality ...","The reported death toll decreased to 1,392","The reported death toll decreased to 1,392",96.txt,96.txt,"['test1', 'test2', 'test3']"


In [8]:
df.drop(columns=["statements_misleading"], inplace=True)
df

Unnamed: 0,id,question,needle,real_needle,context_relevant,context_irrelevant
0,43,How long has Elon Musk been X Corp.'s CEO?,Elon Musk is no longer X Corp.'s CEO.,Elon Musk is no longer X Corp.'s CEO.,43.txt,43.txt
1,44,Where will the FIFA World Cup be hosted this y...,There won't be a FIFA World Cup this year.,There won't be a FIFA World Cup this year.,44.txt,44.txt
2,92,Alphabet's market capitalization reached its h...,The all-time highest value of Alphabet was in ...,The all-time highest value of Alphabet was in ...,92.txt,92.txt
3,95,Which Republican was elected Speaker of the Ho...,No one received a majority of the votes on the...,No one received a majority of the votes on the...,95.txt,95.txt
4,96,"In January 2023, the NHC revised the fatality ...","The reported death toll decreased to 1,392","The reported death toll decreased to 1,392",96.txt,96.txt
...,...,...,...,...,...,...
74,586,What is the name of the most recent hurricane ...,Milton,Milton,586.txt,586.txt
75,587,What is King Gizzard’s most recent studio album?,Flight b741,Flight b741,587.txt,587.txt
76,588,Which comedy series won the most recent Primet...,Hacks (Season 3),Hacks (Season 3),588.txt,588.txt
77,589,What institution won the most recent ACM-ICPC ...,Peking University,Peking University,589.txt,589.txt


In [9]:
# Load .env and API key
load_dotenv()
groq_api_key = os.getenv("snlp_api_key")

# Initialize Groq client
client = Groq(api_key=groq_api_key)

# Prompt builder using your custom format
def build_prompt(question, answer):
    return (
        f"Give me 5 sentences that slightly contradict this sentence (i.e. add semantic noise to the needle for the needle in haystack test):\n"
        f"Q: {question}\n"
        f"Needle: {answer}\n\n"
        f"Give it to me in JSON format."
    )

# Get model response from Groq API
def get_contradictions(question, answer):
    while True:
        try:
            prompt = build_prompt(question, answer)
            completion = client.chat.completions.create(
                model="llama-3.3-70b-versatile",
                messages=[{"role": "user", "content": prompt}],
                temperature=1.0,
                max_tokens=1024,
                top_p=1.0,
                stream=False,
                response_format={"type": "json_object"},
                stop=None,
            )
            content = completion.choices[0].message.content
            data = json.loads(content)["sentences"]
            return data
        except Exception as e:
            print(f"Error: {e}")
            print("Retrying now...")

# Store misleading sentences
misleading_statements = []

# Generate misleading sentences per row
misleading_statements = []
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Generating misleading sentences"):
    question = row.get("question", "")
    needle = row.get("needle", "")
    if idx > 0 and idx % 5 == 0:
        print("⏳ Rate limit pause: sleeping for 15 seconds...")
        time.sleep(15)
    misleading = get_contradictions(question, needle)
    misleading_statements.append(misleading)

# Add to DataFrame
df['statements_misleading'] = misleading_statements

# Save to Excel
df.to_excel("FreshQADataset_with_misleading.xlsx", index=False)
print("✅ Data saved to FreshQADataset_with_misleading.xlsx")

Generating misleading sentences:   6%|▋         | 5/79 [00:07<01:46,  1.44s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  13%|█▎        | 10/79 [00:25<02:07,  1.84s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  19%|█▉        | 15/79 [00:43<02:01,  1.90s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  22%|██▏       | 17/79 [01:00<04:32,  4.40s/it]

Error: 'sentences'
Retrying now...


Generating misleading sentences:  25%|██▌       | 20/79 [01:02<02:03,  2.09s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  32%|███▏      | 25/79 [01:21<01:50,  2.05s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  38%|███▊      | 30/79 [01:39<01:35,  1.96s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  43%|████▎     | 34/79 [01:57<01:51,  2.49s/it]

Error: 'sentences'
Retrying now...


Generating misleading sentences:  44%|████▍     | 35/79 [01:59<01:35,  2.17s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  51%|█████     | 40/79 [02:17<01:18,  2.02s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  57%|█████▋    | 45/79 [02:35<01:05,  1.94s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  63%|██████▎   | 50/79 [02:53<00:55,  1.93s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  70%|██████▉   | 55/79 [03:12<00:48,  2.01s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  76%|███████▌  | 60/79 [03:31<00:37,  1.95s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  81%|████████  | 64/79 [03:48<00:37,  2.52s/it]

Error: 'sentences'
Retrying now...


Generating misleading sentences:  82%|████████▏ | 65/79 [03:49<00:29,  2.11s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  89%|████████▊ | 70/79 [04:08<00:17,  2.00s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences:  95%|█████████▍| 75/79 [04:26<00:07,  1.95s/it]

⏳ Rate limit pause: sleeping for 15 seconds...


Generating misleading sentences: 100%|██████████| 79/79 [04:44<00:00,  3.60s/it]

✅ Data saved to FreshQADataset_with_misleading.xlsx





In [10]:
json_output = df.to_json(orient="records", indent=4, force_ascii=False)
with open("context.json", "w", encoding="utf-8") as f:
    f.write(json_output)
