# Test Assessment Generation

This notebook runs the BridgeAI assessment generator so you can try different job descriptions and see the generated title, description, and time limit.

**Setup:**
1. Install Jupyter: `pip install jupyter` (if needed).
2. From the **repo root** (`bridge-assessements/`): run `jupyter notebook` or `jupyter lab`, then open `notebooks/test-assessment-generation.ipynb`.
3. Ensure `server/config.env` exists and has your API key (`OPENAI_API_KEY` or Anthropic/Gemini).
4. Edit the job description in the next cell and run the cells (Run All, or run cell by cell).

**Note:** The notebook runs the same TypeScript script the app uses (`server/src/scripts/test-assessment-generation.ts`) via Node. No server or database required.

**To test:** Run the cells in order (Run All, or one by one). Set `ENABLE_REVIEW = True` in the run cell to include the LLM review step and see "Quality check" feedback in the preview.

In [1]:
# Edit this job description and re-run the cell (and the one below) to see new output.
JOB_DESCRIPTION = """
Backend Engineer – Node.js

We're looking for a backend engineer to build and maintain APIs and services.

Requirements:
- 2+ years experience with Node.js and TypeScript
- Experience with REST APIs and relational databases (PostgreSQL)
- Familiarity with authentication (JWT, OAuth) and rate limiting
- Comfort with testing (unit and integration)
- Good communication and collaboration skills

Nice to have: Redis, message queues, Docker/Kubernetes.
"""

In [4]:
import subprocess
import tempfile
import os
import json

# Set to True to run the LLM review step and get reviewFeedback (same as ENABLE_ASSESSMENT_REVIEW in config.env)
ENABLE_REVIEW = True
env = {**os.environ, "ENABLE_ASSESSMENT_REVIEW": "1" if ENABLE_REVIEW else "0"}

# Repo root: if cwd is notebooks/, go up one level; otherwise use cwd (e.g. when run from repo root)
REPO_ROOT = os.getcwd()
if os.path.basename(REPO_ROOT) == "notebooks":
    REPO_ROOT = os.path.dirname(REPO_ROOT)
SERVER_DIR = os.path.join(REPO_ROOT, "server")
SCRIPT_PATH = os.path.join(SERVER_DIR, "src", "scripts", "test-assessment-generation.ts")

if not os.path.exists(SCRIPT_PATH):
    raise FileNotFoundError(f"Script not found: {SCRIPT_PATH}. Repo root used: {REPO_ROOT}")

with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
    f.write(JOB_DESCRIPTION)
    temp_path = f.name

try:
    print("Running assessment generator..." + (" (with review step)" if ENABLE_REVIEW else ""))
    result = subprocess.run(
        ["npx", "tsx", "src/scripts/test-assessment-generation.ts", temp_path, "--json"],
        cwd=SERVER_DIR,
        capture_output=True,
        text=True,
        timeout=120,
        env=env,
    )
    if result.returncode != 0:
        print(result.stdout)
        if result.stderr:
            print("STDERR:", result.stderr)
        print("Exit code:", result.returncode)
        assessment = None
    else:
        # Last line of stdout is JSON (title, timeLimit, description)
        lines = [ln.strip() for ln in result.stdout.strip().split("\n") if ln.strip()]
        assessment = None
        for line in reversed(lines):
            try:
                data = json.loads(line)
                if "title" in data and "description" in data and "timeLimit" in data:
                    assessment = data
                    break
            except json.JSONDecodeError:
                continue
        if assessment is None:
            print("Could not parse script output as JSON. Raw stdout (last 500 chars):")
            print(result.stdout[-500:] if len(result.stdout) > 500 else result.stdout)
        else:
            print("OK — title, description, timeLimit" + (", reviewFeedback" if assessment.get("reviewFeedback") else "") + " parsed.")
finally:
    os.unlink(temp_path)

Running assessment generator... (with review step)
OK — title, description, timeLimit, reviewFeedback parsed.


In [5]:
# Preview: how this assessment will look to candidates (matches CandidateAssessment.jsx)
from IPython.display import display, Markdown, HTML

if assessment is None:
    print("No assessment to preview. Run the cell above first.")
else:
    # Show review feedback if present (from ENABLE_ASSESSMENT_REVIEW step)
    if assessment.get("reviewFeedback"):
        fb = assessment["reviewFeedback"].replace("<", "&lt;").replace(">", "&gt;").replace("\\n", "<br>")
        display(HTML(f"""
        <div style=\"background:#EFF6FF; border:1px solid #93C5FD; padding:16px 20px; border-radius:12px; margin-bottom:16px;\">
            <h3 style=\"margin:0 0 8px 0; font-size:1rem; color:#1E3A8A;\">Quality check (review feedback)</h3>
            <p style=\"margin:0; color:#1E40AF; line-height:1.5;\">{fb}</p>
        </div>
        """))
    mins = assessment["timeLimit"]
    if mins >= 60:
        time_str = f"{mins // 60} hour{'s' if mins != 60 else ''}" + (f" {mins % 60} min" if mins % 60 else "")
    else:
        time_str = f"{mins} minutes"

    # Candidate-style header (blue) + card (white) with "Project Instructions"
    html_header = f"""
    <div style="background:#1E3A8A; color:white; padding:24px 32px; border-radius:12px 12px 0 0; text-align:center;">
        <h1 style="margin:0 0 4px 0; font-size:1.5rem;">{assessment['title']}</h1>
        <p style="margin:0; color:#93C5FD;">Technical Assessment</p>
        <p style="margin:8px 0 0 0; font-size:0.875rem; color:#BFDBFE;">{time_str} to complete</p>
    </div>
    <div style="border:1px solid #e5e7eb; border-top:none; padding:24px; border-radius:0 0 12px 12px; background:#fff; margin-bottom:16px;">
        <h2 style="font-size:1.125rem; margin:0 0 16px 0; color:#111;">Project Instructions</h2>
    </div>
    """
    display(HTML(html_header))
    # Description is Markdown (candidates see it rendered the same way via ReactMarkdown)
    display(Markdown(assessment["description"]))

## Scenario
You are tasked with building a REST API for a personal finance tracking tool called **BudgetBuddy**. This tool allows users to manage their expenses and income, categorize transactions, and generate reports. Users will be able to create accounts, log in, and manage their financial data securely.

## What you will build
You will create a REST API that supports user authentication, transaction management, and basic reporting functionalities. The API will allow users to register, log in, add transactions, and retrieve their transaction history.

## Requirements (must-have)
- Implement user registration and login endpoints that use JWT for authentication.
- Create an endpoint to add a transaction, which includes fields for `amount`, `category`, `description`, and `date`.
- Implement an endpoint to retrieve a user's transaction history, filtering by date range.
- Ensure that the API can handle rate limiting to prevent abuse (e.g., limit requests to 100 per hour per user).
- Use PostgreSQL as the database to store user and transaction data.
- Write unit tests for all endpoints and integration tests for user authentication and transaction management.

## Acceptance Criteria (definition of done)
- [ ] User can register with a unique email and password, receiving a JWT upon successful registration.
- [ ] User can log in with valid credentials and receive a JWT.
- [ ] User can add a transaction with valid data, and it is stored in the PostgreSQL database.
- [ ] User can retrieve their transaction history, filtered by a specified date range.
- [ ] API returns a 401 status code for unauthorized access to protected routes.
- [ ] API returns a 400 status code for invalid transaction data (e.g., missing fields).
- [ ] Rate limiting is implemented, returning a 429 status code when the limit is exceeded.
- [ ] All endpoints have appropriate HTTP status codes for success and error responses.
- [ ] Unit tests cover at least 80% of the codebase.
- [ ] Integration tests verify the end-to-end functionality of user authentication and transaction management.

## Constraints
- Do not implement a frontend for this assessment; focus solely on the API.
- Do not include advanced features like user roles or permissions; keep it simple.
- Avoid using third-party libraries for rate limiting; implement a basic in-memory solution.

## Provided / Assumptions
- You can assume that the PostgreSQL database is set up and accessible.
- Seed data for testing can include a few sample users and transactions.
- You may use any libraries or frameworks you prefer for building the API, as long as they are compatible with Node.js and TypeScript.

## Deliverables
1. Source code for the API, including all necessary files and configurations.
2. A README file that includes setup instructions, API endpoint documentation, and testing instructions.
3. Database schema or migration files for PostgreSQL.

## Nice-to-haves (optional)
- Implement caching for transaction retrieval using Redis.
- Add pagination to the transaction history endpoint.
- Use Docker to containerize the application for easier deployment.

---
## How to judge quality

Use the **checklist** below (subjective) and the **Quality snapshot** in the next cell (objective checks from the prompt rules).

**Checklist (subjective)**  
- **Fit to role:** Does the scenario and tech stack match the job description?  
- **Specific, not generic:** Is it a concrete task (e.g. “API for article CRUD”) rather than “build a full‑stack app”?  
- **Scopable in time:** Could a strong candidate finish in the given time limit?  
- **Clear requirements:** Are must-haves unambiguous? Is “definition of done” observable (not vague)?  
- **Fair:** Are constraints and “provided/assumptions” clear so candidates aren’t penalized for guessing?

In [6]:
# Quality snapshot (objective checks from prompt rules in server/src/prompts/index.ts)
import re

if assessment is None:
    print("No assessment. Run the generation cell first.")
else:
    desc = assessment.get("description", "")
    title = assessment.get("title", "")
    time_limit = assessment.get("timeLimit", 0)

    word_count = len(desc.split())
    sections = re.findall(r"^##\s+(.+)$", desc, re.MULTILINE)
    checklist_items = re.findall(r"^\s*-\s*\[\s*\]", desc, re.MULTILINE)

    time_ok = 30 <= time_limit <= 480
    words_ok = 300 <= word_count <= 650
    # Key phrases from prompt (flexible match)
    required_phrases = [
        "scenario",
        "what you will build",
        "requirements",
        "acceptance criteria",
        "constraints",
        "provided",
        "assumptions",
        "deliverables",
        "nice-to-have",
    ]
    section_text = " ".join(s.strip().lower() for s in sections)
    missing = [p for p in required_phrases if p not in section_text]
    criteria_ok = len(checklist_items) >= 10

    print("━━━ Quality snapshot ━━━\n")
    print(f"  Title length:     {len(title)} chars")
    print(f"  Time limit:       {time_limit} min  {'✓ in range [30–480]' if time_ok else '✗ outside 30–480'}")
    print(f"  Description:      {word_count} words  {'✓ in range [300–650]' if words_ok else '✗ outside 300–650'}")
    print(f"  Acceptance items: {len(checklist_items)}  {'✓ ≥ 10' if criteria_ok else '✗ need ≥ 10'}")
    print(f"  Sections found:  {', '.join(sections) if sections else '(none)'}")
    if missing:
        print(f"  Sections:         ✗ missing: {', '.join(missing)}")
    else:
        print("  Sections:         ✓ all expected section topics present")
    print()

━━━ Quality snapshot ━━━

  Title length:     53 chars
  Time limit:       240 min  ✓ in range [30–480]
  Description:      504 words  ✓ in range [300–650]
  Acceptance items: 10  ✓ ≥ 10
  Sections found:  Scenario, What you will build, Requirements (must-have), Acceptance Criteria (definition of done), Constraints, Provided / Assumptions, Deliverables, Nice-to-haves (optional)
  Sections:         ✓ all expected section topics present

