<a href="https://colab.research.google.com/github/Rakib911Hossan/Al_Project/blob/main/qwen_qwen3_14b_free.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ========================================
# STEP 1: Install Dependencies
# ========================================
!pip install -q openai

# ========================================
# STEP 2: Upload Files
# ========================================
from google.colab import files
import os
import shutil

print("📤 Please upload your files:")
print("1. questions_top_five.jsonl")
print("2. docs_distractor_top_five.json")
print("\nClick 'Choose Files' below:")

uploaded = files.upload()

# Create directories
os.makedirs("data", exist_ok=True)
os.makedirs("results", exist_ok=True)

# Move uploaded files
for filename in uploaded.keys():
    destination_path = os.path.join("data", filename)
    try:
        shutil.move(filename, destination_path)
        print(f"✅ Moved {filename} to {destination_path}")
    except Exception as e:
        print(f"⚠️  Error moving {filename}: {e}")

print("✅ Files uploaded successfully!\n")


import json
import re
import time
from tqdm import tqdm
from openai import OpenAI


# ========================================
# Setup OpenRouter with Gemini 2.0 Flash
# ========================================
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key="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"
)

# ========================================
# Helper Functions
# ========================================
def load_top_5_questions(jsonl_path):
    questions = []
    with open(jsonl_path, "r") as f:
        for i, line in enumerate(f):
            if i >= 5:
                break
            item = json.loads(line.strip())
            # Keep golden_answer for comparison
            item.pop("answer", None)  # Remove only 'answer' if it exists
            questions.append(item)
    return questions

def load_matching_docs(docs_path, topic_ids):
    with open(docs_path, "r") as f:
        all_docs = json.load(f)

    matching_docs = {}
    for doc in all_docs:
        if doc["topic_id"] in topic_ids:
            cleaned_docs = []
            for d in doc["docs"]:
                cleaned_docs.append({
                    "title": d.get("title", ""),
                    "content": d.get("content", ""),
                    "uuid": d.get("uuid", "")
                })
            matching_docs[doc["topic_id"]] = {"docs": cleaned_docs}
    return matching_docs

def build_prompt(target_event, options, docs):
    """Build an improved prompt with clearer instructions"""
    doc_text = "\n\n".join([f"Document: {d.get('content', '')[:500]}" for d in docs[:3]])  # Limit to first 3 docs
    opt_text = "\n".join([f"{k}: {v}" for k, v in options.items()])

    prompt = f"""Analyze the following causal relationship and select the BEST answer(s).

TARGET EVENT:
{target_event}

CANDIDATE CAUSES:
{opt_text}

CONTEXT (use this to determine causality):
{doc_text}

INSTRUCTIONS:
- Select the option(s) (A, B, C, or D) that DIRECTLY CAUSED the target event
- You can select multiple options if needed (e.g., "A,B" or "B,D")
- Base your answer on the provided context
- If none apply, select "B" (as "None of the others are correct causes")

RESPOND IN THIS EXACT FORMAT:
Answer: <letter(s)>
Reason: <brief explanation>

Example responses:
Answer: C
Reason: The document shows that event C directly led to the target event.

OR

Answer: B,D
Reason: Both events B and D contributed to causing the target event.
"""
    return prompt.strip()

def extract_answer(text):
    """
    Improved answer extraction with multiple fallback methods
    """
    # Try to extract from "Answer: X" format (handles A, B, C, D, or combinations like B,D)
    match = re.search(r"Answer:\s*([A-D](?:\s*,\s*[A-D])*)", text, re.IGNORECASE)
    if match:
        answer = match.group(1).upper().replace(" ", "")  # Remove spaces
        # Sort letters for consistency (e.g., "D,B" becomes "B,D")
        if "," in answer:
            letters = sorted(answer.split(","))
            return ",".join(letters)
        return answer

    # Fallback: Look for standalone letters near the beginning
    first_line = text.split('\n')[0] if '\n' in text else text
    match = re.search(r'\b([A-D](?:\s*,\s*[A-D])*)\b', first_line)
    if match:
        answer = match.group(1).upper().replace(" ", "")
        if "," in answer:
            letters = sorted(answer.split(","))
            return ",".join(letters)
        return answer

    return "Unknown"

def call_gemini_flash(prompt):
    """Call qwen/qwen3-14b:free via OpenRouter with retry logic"""
    max_retries = 3
    retry_delay = 2

    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="qwen/qwen3-14b:free",
                messages=[
                    {"role": "user", "content": prompt}
                ],
                temperature=0.2,  # Lower temperature for more consistent answers
                max_tokens=512
            )
            return response.choices[0].message.content
        except Exception as e:
            if attempt < max_retries - 1:
                print(f"⚠️  Attempt {attempt + 1} failed, retrying in {retry_delay}s...")
                time.sleep(retry_delay)
                retry_delay *= 2  # Exponential backoff
            else:
                print(f"❌ All retry attempts failed: {e}")
                return "Error: Unable to get response"

# ========================================
# Main Inference Function
# ========================================
def infer_causes(questions_path, docs_path, output_path):
    dataset = load_top_5_questions(questions_path)
    topic_ids = {item["topic_id"] for item in dataset}
    print(f"📋 Loaded {len(dataset)} questions")

    docs_dict = load_matching_docs(docs_path, topic_ids)
    print(f"📚 Loaded {len(docs_dict)} document sets\n")

    results = []
    for i, item in enumerate(tqdm(dataset, desc="Processing"), 1):
        topic_id = item["topic_id"]
        target_event = item["target_event"]
        options = {k: item[k] for k in ["option_A", "option_B", "option_C", "option_D"]}

        # Use all documents
        docs = docs_dict.get(topic_id, {}).get("docs", [])

        if not docs:
            print(f"⚠️  No docs for topic {topic_id}")
            continue

        prompt = build_prompt(target_event, options, docs)
        answer_text = call_gemini_flash(prompt)

        # Use improved extraction
        predicted_answer = extract_answer(answer_text)

        # Normalize golden answer for comparison
        golden_answer = item.get("golden_answer", "").upper().replace(" ", "")
        if "," in golden_answer:
            golden_letters = sorted(golden_answer.split(","))
            golden_answer = ",".join(golden_letters)

        # Check if correct
        is_correct = predicted_answer == golden_answer

        results.append({
            "topic_id": topic_id,
            "uuid": item["uuid"],
            "target_event": target_event,
            "option_A": options["option_A"],
            "option_B": options["option_B"],
            "option_C": options["option_C"],
            "option_D": options["option_D"],
            "golden_answer": golden_answer,
            "predicted_answer": predicted_answer,
            "is_correct": is_correct,
            "model_full_output": answer_text
        })

        print(f"✅ Question {i}/5 - {'CORRECT ✓' if is_correct else 'INCORRECT ✗'} (Predicted: {predicted_answer}, Golden: {golden_answer})")

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        json.dump(results, f, indent=2)

    print(f"\n✅ Results saved to {output_path}!")
    return results

# ========================================
# Run Inference
# ========================================
results = infer_causes(
    questions_path="data/questions_top_five.jsonl",
    docs_path="data/docs_distractor_top_five.json",
    output_path="results/causal_inference.json"
)

files.download("results/causal_inference.json")

# ========================================
# Preview Results
# ========================================
print("\n" + "="*50)
print("FINAL RESULTS SUMMARY:")
print("="*50)

correct_count = 0
total_count = len(results)

for i, result in enumerate(results, 1):
    is_correct = result['is_correct']
    if is_correct:
        correct_count += 1

    status_emoji = "✅" if is_correct else "❌"

    print(f"\n[Question {i}] {status_emoji}")
    print(f"Topic ID: {result['topic_id']}")
    print(f"Target: {result['target_event'][:80]}...")
    print(f"Golden Answer: {result['golden_answer']}")
    print(f"Predicted Answer: {result['predicted_answer']}")
    print(f"Match: {'CORRECT ✓' if is_correct else 'INCORRECT ✗'}")
    print(f"Reasoning: {result['model_full_output'][:300]}...")
    print("-"*50)

# Summary
print("\n" + "="*50)
print("FINAL SUMMARY:")
print("="*50)
print(f"Total Questions: {total_count}")
print(f"Correct Answers: {correct_count}")
print(f"Incorrect Answers: {total_count - correct_count}")
print(f"Accuracy: {(correct_count/total_count)*100:.2f}%")
print("="*50)

📤 Please upload your files:
1. questions_top_five.jsonl
2. docs_distractor_top_five.json

Click 'Choose Files' below:


TypeError: 'NoneType' object is not subscriptable

In [4]:
# ========================================
# STEP 1: Install Dependencies
# ========================================
!pip install -q openai

# ========================================
# STEP 2: Upload Files
# ========================================
from google.colab import files
import os
import shutil

print("📤 Please upload your files:")
print("1. questions_second_five.jsonl")
print("2. docs_distractor_second_five.json")
print("\nClick 'Choose Files' below:")

uploaded = files.upload()

# Create directories
os.makedirs("data", exist_ok=True)
os.makedirs("results", exist_ok=True)

# Move uploaded files
for filename in uploaded.keys():
    destination_path = os.path.join("data", filename)
    try:
        shutil.move(filename, destination_path)
        print(f"✅ Moved {filename} to {destination_path}")
    except Exception as e:
        print(f"⚠️  Error moving {filename}: {e}")

print("✅ Files uploaded successfully!\n")

import json
import re
import time
from tqdm import tqdm
from openai import OpenAI

# ========================================
# Setup OpenRouter with Qwen3-14B
# ========================================
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key="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"
)

# ========================================
# Helper Functions
# ========================================
def load_top_5_questions(jsonl_path):
    """Load first 5 questions from JSONL file"""
    questions = []
    with open(jsonl_path, "r") as f:
        for i, line in enumerate(f):
            if i >= 5:
                break
            item = json.loads(line.strip())
            item.pop("answer", None)
            questions.append(item)
    return questions

def load_matching_docs(docs_path, topic_ids):
    """Load all documents matching the topic IDs"""
    with open(docs_path, "r") as f:
        all_docs = json.load(f)

    matching_docs = {}
    for doc in all_docs:
        if doc["topic_id"] in topic_ids:
            cleaned_docs = []
            for d in doc["docs"]:
                cleaned_docs.append({
                    "title": d.get("title", ""),
                    "content": d.get("content", ""),
                    "uuid": d.get("uuid", "")
                })
            matching_docs[doc["topic_id"]] = {"docs": cleaned_docs}
    return matching_docs

def build_prompt(target_event, options, docs):
    """Build prompt with FULL document content for proper reasoning"""
    # Use ALL docs with FULL content - no truncation
    doc_text = "\n\n".join([f"Doc {i+1}: {d.get('content', '')}" for i, d in enumerate(docs)])
    opt_text = "\n".join([f"{k}: {v}" for k, v in options.items()])

    prompt = f"""Analyze which options DIRECTLY caused the target event.

TARGET EVENT: {target_event}

OPTIONS:
{opt_text}

CONTEXT:
{doc_text}

RULES:
- Select ALL options that caused the event
- Format: Answer: A or Answer: B,D or Answer: A,C,D
- Multiple answers are allowed

YOUR ANSWER (must start with "Answer:"):"""
    return prompt.strip()

def extract_answer(text):
    """
    ROBUST extraction: handles A, B,C, A,B,C,D, and edge cases
    """
    if not text or text.startswith("Error"):
        return "Unknown"

    # Method 1: Look for "Answer: X,Y,Z" format
    match = re.search(r"Answer\s*:\s*([A-D](?:\s*,\s*[A-D])*)", text, re.IGNORECASE)
    if match:
        answer = match.group(1).upper().replace(" ", "")
        if "," in answer:
            letters = sorted(set(answer.split(",")))
            return ",".join(letters)
        return answer

    # Method 2: Look for pattern "A, B, D" or "A,B,D" anywhere
    match = re.search(r'\b([A-D])(?:\s*,\s*([A-D]))+', text)
    if match:
        letters = re.findall(r'\b([A-D])\b', match.group(0))
        return ",".join(sorted(set(letters)))

    # Method 3: Look for single letter at start
    match = re.search(r'^\s*([A-D])\b', text, re.MULTILINE)
    if match:
        return match.group(1)

    # Method 4: Last resort - find ANY A-D letter
    match = re.search(r'\b([A-D])\b', text[:300])
    if match:
        return match.group(1)

    print(f"⚠️  Could not extract from: {text[:150]}...")
    return "Unknown"

def call_qwen3_14B(prompt):
    """Call qwen/qwen3-14b:free via OpenRouter with error handling"""
    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="qwen/qwen3-14b:free",
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=0.1,
                max_tokens=500,
                top_p=0.9
            )

            content = response.choices[0].message.content.strip()

            # Debug: Print first 100 chars
            print(f"🔍 Response preview: {content[:100]}...")

            if not content or len(content) < 10:
                print(f"⚠️  Empty response, retrying...")
                time.sleep(2)
                continue

            return content

        except Exception as e:
            print(f"⚠️  Error on attempt {attempt+1}: {str(e)[:100]}")
            if attempt < max_retries - 1:
                time.sleep(3)
            else:
                return f"Error: Failed after {max_retries} attempts"

    return "Error: No valid response"

# ========================================
# Main Inference Function
# ========================================
def infer_causes(questions_path, docs_path, output_path):
    """Run inference on all questions with full document context"""
    dataset = load_top_5_questions(questions_path)
    topic_ids = {item["topic_id"] for item in dataset}
    print(f"📋 Loaded {len(dataset)} questions")

    docs_dict = load_matching_docs(docs_path, topic_ids)
    print(f"📚 Loaded {len(docs_dict)} document sets\n")

    results = []
    for i, item in enumerate(tqdm(dataset, desc="Processing"), 1):
        topic_id = item["topic_id"]
        target_event = item["target_event"]
        options = {k: item[k] for k in ["option_A", "option_B", "option_C", "option_D"]}

        docs = docs_dict.get(topic_id, {}).get("docs", [])
        if not docs:
            print(f"⚠️  No docs for topic {topic_id}")
            continue

        print(f"\n📄 Topic {topic_id}: Using {len(docs)} documents with full content")

        prompt = build_prompt(target_event, options, docs)
        answer_text = call_qwen3_14B(prompt)

        # Extract answer
        predicted_answer = extract_answer(answer_text)

        # Normalize golden answer
        golden_answer = item.get("golden_answer", "").upper().replace(" ", "")
        if "," in golden_answer:
            golden_letters = sorted(set(golden_answer.split(",")))
            golden_answer = ",".join(golden_letters)

        # Check correctness
        is_correct = predicted_answer == golden_answer

        results.append({
            "topic_id": topic_id,
            "uuid": item["uuid"],
            "target_event": target_event,
            "option_A": options["option_A"],
            "option_B": options["option_B"],
            "option_C": options["option_C"],
            "option_D": options["option_D"],
            "golden_answer": golden_answer,
            "predicted_answer": predicted_answer,
            "is_correct": is_correct,
            "model_full_output": answer_text,
            "num_docs_used": len(docs)
        })

        status = "✅" if is_correct else "❌"
        print(f"{status} Q{i}: Pred={predicted_answer}, Gold={golden_answer}")

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        json.dump(results, f, indent=2)

    print(f"\n✅ Results saved to {output_path}!")
    return results

# ========================================
# Run Inference
# ========================================
print("="*60)
print("RUNNING CAUSAL INFERENCE WITH FULL DOCUMENT CONTEXT")
print("="*60 + "\n")

results = infer_causes(
    questions_path="data/questions_second_five.jsonl",
    docs_path="data/docs_distractor_second_five.json",
    output_path="results/causal_inference_full_docs.json"
)

files.download("results/causal_inference_full_docs.json")

# ========================================
# Preview Results
# ========================================
print("\n" + "="*60)
print("DETAILED RESULTS")
print("="*60)

correct_count = 0
total_count = len(results)

for i, result in enumerate(results, 1):
    is_correct = result['is_correct']
    if is_correct:
        correct_count += 1

    status_emoji = "✅" if is_correct else "❌"

    print(f"\n[Question {i}] {status_emoji}")
    print(f"Topic: {result['topic_id']}")
    print(f"Documents Used: {result['num_docs_used']}")
    print(f"Target: {result['target_event'][:70]}...")
    print(f"Golden: {result['golden_answer']}")
    print(f"Predicted: {result['predicted_answer']}")
    print(f"Status: {'CORRECT ✓' if is_correct else 'INCORRECT ✗'}")
    print(f"Reasoning: {result['model_full_output'][:200]}...")
    print("-"*60)

# Summary
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)
print(f"Total Questions: {total_count}")
print(f"✅ Correct: {correct_count}")
print(f"❌ Incorrect: {total_count - correct_count}")
print(f"📊 Accuracy: {(correct_count/total_count)*100:.2f}%")
print("="*60)

print("\n💡 KEY FEATURES:")
print("✅ Uses ALL documents for each topic (no truncation)")
print("✅ Full document content for proper reasoning")
print("✅ Supports multiple answer format (A, B,D, A,B,C,D)")
print("✅ Detailed error handling and debug output")
print("✅ Shows number of documents used per question")

📤 Please upload your files:
1. questions_second_five.jsonl
2. docs_distractor_second_five.json

Click 'Choose Files' below:


Saving docs_distractor_second_five.json to docs_distractor_second_five.json
Saving questions_second_five.jsonl to questions_second_five.jsonl
✅ Moved docs_distractor_second_five.json to data/docs_distractor_second_five.json
✅ Moved questions_second_five.jsonl to data/questions_second_five.jsonl
✅ Files uploaded successfully!

RUNNING CAUSAL INFERENCE WITH FULL DOCUMENT CONTEXT

📋 Loaded 5 questions
📚 Loaded 5 document sets



Processing:   0%|          | 0/5 [00:00<?, ?it/s]


📄 Topic 29: Using 23 documents with full content
⚠️  Error on attempt 1: Error code: 400 - {'error': {'message': 'This endpoint\'s maximum context length is 40960 tokens. Ho
⚠️  Error on attempt 2: Error code: 400 - {'error': {'message': 'This endpoint\'s maximum context length is 40960 tokens. Ho


Processing:  20%|██        | 1/5 [00:06<00:26,  6.56s/it]

⚠️  Error on attempt 3: Error code: 400 - {'error': {'message': 'This endpoint\'s maximum context length is 40960 tokens. Ho
❌ Q1: Pred=Unknown, Gold=A

📄 Topic 23: Using 24 documents with full content
⚠️  Error on attempt 1: Error code: 400 - {'error': {'message': 'This endpoint\'s maximum context length is 40960 tokens. Ho
⚠️  Error on attempt 2: Error code: 400 - {'error': {'message': 'This endpoint\'s maximum context length is 40960 tokens. Ho


Processing:  40%|████      | 2/5 [00:13<00:19,  6.53s/it]

⚠️  Error on attempt 3: Error code: 400 - {'error': {'message': 'This endpoint\'s maximum context length is 40960 tokens. Ho
❌ Q2: Pred=Unknown, Gold=B,C,D

📄 Topic 33: Using 18 documents with full content
🔍 Response preview: Answer: D...
⚠️  Empty response, retrying...
🔍 Response preview: Answer: D...
⚠️  Empty response, retrying...
🔍 Response preview: Answer: D...
⚠️  Empty response, retrying...


Processing:  60%|██████    | 3/5 [00:46<00:37, 18.84s/it]

❌ Q3: Pred=Unknown, Gold=D

📄 Topic 3: Using 19 documents with full content
🔍 Response preview: ...
⚠️  Empty response, retrying...
🔍 Response preview: ...
⚠️  Empty response, retrying...
🔍 Response preview: ...
⚠️  Empty response, retrying...


Processing:  80%|████████  | 4/5 [01:25<00:26, 26.89s/it]

❌ Q4: Pred=Unknown, Gold=B,D

📄 Topic 5: Using 16 documents with full content
🔍 Response preview: ...
⚠️  Empty response, retrying...
🔍 Response preview: ...
⚠️  Empty response, retrying...
🔍 Response preview: ...
⚠️  Empty response, retrying...


Processing: 100%|██████████| 5/5 [02:10<00:00, 26.00s/it]

❌ Q5: Pred=Unknown, Gold=B

✅ Results saved to results/causal_inference_full_docs.json!





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


DETAILED RESULTS

[Question 1] ❌
Topic: 29
Documents Used: 23
Target: Walmart and Amazon are investigating issuing their own stablecoins in ...
Golden: A
Predicted: Unknown
Status: INCORRECT ✗
Reasoning: Error: Failed after 3 attempts...
------------------------------------------------------------

[Question 2] ❌
Topic: 23
Documents Used: 24
Target: China banned food imports from 10 Japanese prefectures and imposed rad...
Golden: B,C,D
Predicted: Unknown
Status: INCORRECT ✗
Reasoning: Error: Failed after 3 attempts...
------------------------------------------------------------

[Question 3] ❌
Topic: 33
Documents Used: 18
Target: President Bolsonaro announced a 60-day ban on setting fires for land c...
Golden: D
Predicted: Unknown
Status: INCORRECT ✗
Reasoning: Error: No valid response...
------------------------------------------------------------

[Question 4] ❌
Topic: 3
Documents Used: 19
Target: The congressional certification of Joe Biden’s victory was halted....
Golden: B,D
Pred