## Setup: Install Dependencies

In [None]:
!pip install -q python-dotenv pymupdf sentence-transformers faiss-cpu huggingface-hub tqdm numpy

## Configure HuggingFace Token

Get your free token from: https://huggingface.co/settings/tokens

In [None]:
import os
from getpass import getpass

# Set your HuggingFace token here
HF_TOKEN = getpass("Enter your HuggingFace token: ")
os.environ["HF_TOKEN"] = HF_TOKEN

## Load RAG Pipeline

In [None]:
import logging
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

from src.rag_pipeline import create_pipeline

pipeline = create_pipeline()

## Index Documents

This step:
1. Parses both PDFs
2. Chunks text into 800-char segments with overlap
3. Generates embeddings using sentence-transformers
4. Builds FAISS vector index

In [None]:
pipeline.index_documents(
    apple_pdf="data/10-Q4-2024-As-Filed.pdf",
    tesla_pdf="data/tsla-20231231-gen.pdf",
    save_path="vector_store"
)

print(f"\nIndexed {pipeline.vector_store.size} chunks")

## Test the Required Interface

The assignment requires an `answer_question(query)` function that returns:
```python
{
    "answer": "...",
    "sources": ["Apple 10-K", "Item 8", "p. 28"]
}
```

In [None]:
result = pipeline.answer_question("What was Apple's total revenue for the fiscal year ended September 28, 2024?")

print("Answer:", result["answer"])
print("Sources:", result["sources"])

## Run Full Evaluation (13 Questions)

This cell runs all assignment questions and saves results to `outputs/answers.json`.

In [None]:
import json

QUESTIONS = [
    {"question_id": 1, "question": "What was Apple's total revenue for the fiscal year ended September 28, 2024?"},
    {"question_id": 2, "question": "How many shares of common stock were issued and outstanding as of October 18, 2024?"},
    {"question_id": 3, "question": "What is the total amount of term debt (current + non-current) reported by Apple as of September 28, 2024?"},
    {"question_id": 4, "question": "On what date was Apple's 10-K report for 2024 signed and filed with the SEC?"},
    {"question_id": 5, "question": "Does Apple have any unresolved staff comments from the SEC as of this filing? How do you know?"},
    {"question_id": 6, "question": "What was Tesla's total revenue for the year ended December 31, 2023?"},
    {"question_id": 7, "question": "What percentage of Tesla's total revenue in 2023 came from Automotive Sales (excluding Leasing)?"},
    {"question_id": 8, "question": "What is the primary reason Tesla states for being highly dependent on Elon Musk?"},
    {"question_id": 9, "question": "What types of vehicles does Tesla currently produce and deliver?"},
    {"question_id": 10, "question": "What is the purpose of Tesla's 'lease pass-through fund arrangements'?"},
    {"question_id": 11, "question": "What is Tesla's stock price forecast for 2025?"},
    {"question_id": 12, "question": "Who is the CFO of Apple as of 2025?"},
    {"question_id": 13, "question": "What color is Tesla's headquarters painted?"},
]

answers = []

for i, q in enumerate(QUESTIONS, 1):
    print(f"\n[{i}/13] Q{q['question_id']}: {q['question'][:70]}...")
    
    try:
        result = pipeline.answer_question(q["question"])
        answers.append({
            "question_id": q["question_id"],
            "answer": result["answer"],
            "sources": result["sources"]
        })
        print(f"   Answer: {result['answer'][:100]}...")
    except Exception as e:
        print(f"   ERROR: {e}")
        answers.append({
            "question_id": q["question_id"],
            "answer": f"Error: {str(e)}",
            "sources": []
        })

# Save results
os.makedirs("outputs", exist_ok=True)
with open("outputs/answers.json", "w") as f:
    json.dump(answers, f, indent=2)

print("Evaluation complete. Results saved to outputs/answers.json")

## View Results

In [None]:
# Display results summary
for ans in answers:
    status = "✓" if ans["sources"] else "✗"
    preview = ans["answer"][:70].replace("\n", " ")
    print(f"{status} Q{ans['question_id']:2d}: {preview}...")

## Download Results

Download `outputs/answers.json` using the file browser on the left.

In [None]:
# Print full JSON for verification
print(json.dumps(answers, indent=2))