# Task 02 — Fixtures & Parametrize

Practice writing fixtures, using `conftest.py`, and advanced parametrize patterns.

## Setup

In [None]:
import subprocess, sys, os, textwrap, tempfile, pathlib

FIXTURES = os.path.abspath(os.path.join("..", "fixtures", "input"))
if not os.path.exists(FIXTURES):
    FIXTURES = os.path.abspath(os.path.join("fixtures", "input"))
sys.path.insert(0, FIXTURES)

def run_pytest(test_code: str, extra_files: dict[str, str] | None = None, extra_args: list[str] | None = None):
    with tempfile.TemporaryDirectory() as td:
        td_path = pathlib.Path(td)
        p = td_path / "test_tmp.py"
        p.write_text(textwrap.dedent(test_code))
        if extra_files:
            for name, content in extra_files.items():
                (td_path / name).write_text(textwrap.dedent(content))
        cmd = [sys.executable, "-m", "pytest", str(td), "-v", "--tb=short", "--no-header"]
        if extra_args:
            cmd.extend(extra_args)
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        print(result.stdout + result.stderr)
        return result.returncode

print("Setup complete.")

## Task 2.1: Create Fixtures for Text Processing

Write a `conftest.py` with these fixtures:
- `sample_texts` — returns a list of 5 diverse text strings
- `empty_text` — returns ""
- `text_with_emails` — returns a string containing 2+ email addresses

Then write tests that use them.

In [None]:
# YOUR CODE HERE
conftest_code = f'''
import sys, pytest
sys.path.insert(0, "{FIXTURES}")

# @pytest.fixture
# def sample_texts():
#     ...
'''

test_code = f'''
import sys
sys.path.insert(0, "{FIXTURES}")
from sample_module import tokenize, extract_emails

# def test_tokenize_samples(sample_texts):
#     for text in sample_texts:
#         tokens = tokenize(text)
#         assert isinstance(tokens, list)
#
# def test_extract_from_email_text(text_with_emails):
#     emails = extract_emails(text_with_emails)
#     assert len(emails) >= 2
'''

# TEST — Do not modify
rc = run_pytest(test_code, extra_files={"conftest.py": conftest_code})
assert rc == 0, "Tests should pass"
print("Task 2.1 passed!")

## Task 2.2: Parametrize ML Preprocessing

Use `@pytest.mark.parametrize` to test `preprocess_texts` from `sample_ml_pipeline.py` with different inputs.

Test cases should include:
- Normal text → lowercased and stripped
- Text with URLs → URLs removed
- Text with extra whitespace → normalized

In [None]:
# YOUR CODE HERE
test_preprocess = f'''
import sys, pytest
sys.path.insert(0, "{FIXTURES}")
from sample_ml_pipeline import preprocess_texts

# @pytest.mark.parametrize("texts, expected", [
#     (["Hello World"], ["hello world"]),
#     (["Check https://example.com out"], ["check out"]),
#     ...
# ])
# def test_preprocess(texts, expected):
#     assert preprocess_texts(texts) == expected
'''

# TEST — Do not modify
rc = run_pytest(test_preprocess)
assert rc == 0, "Tests should pass"
print("Task 2.2 passed!")

## Task 2.3: Use `tmp_path` for File-Based Tests

Write a test that:
1. Creates a CSV file in `tmp_path` with text data
2. Reads it back with pandas
3. Applies `clean_text` to each row
4. Asserts the results are correct

In [None]:
# YOUR CODE HERE
test_file = f'''
import sys
sys.path.insert(0, "{FIXTURES}")
import pandas as pd
from sample_module import clean_text

# def test_csv_processing(tmp_path):
#     # 1. Create CSV
#     csv_path = tmp_path / "data.csv"
#     csv_path.write_text("text" + chr(10) + "  hello   world  " + chr(10) + "  foo   bar  " + chr(10))
#     # 2. Read with pandas
#     df = pd.read_csv(csv_path)
#     # 3. Apply clean_text
#     df["cleaned"] = df["text"].apply(clean_text)
#     # 4. Assert
#     assert df["cleaned"].tolist() == ["hello world", "foo bar"]
'''

# TEST — Do not modify
rc = run_pytest(test_file)
assert rc == 0, "Tests should pass"
print("Task 2.3 passed!")

## Task 2.4: Fixture with Yield (Setup + Teardown)

Write a fixture that:
1. Creates a trained ML pipeline (setup)
2. Yields it
3. Logs "pipeline cleaned up" to a list (teardown)

Then write a test using this fixture.

In [None]:
# YOUR CODE HERE
test_yield = f'''
import sys, pytest, numpy as np
sys.path.insert(0, "{FIXTURES}")
from sample_ml_pipeline import create_pipeline, train_pipeline, predict, SAMPLE_TEXTS, SAMPLE_LABELS

cleanup_log = []

# @pytest.fixture
# def trained_pipe():
#     np.random.seed(42)
#     pipe = create_pipeline(max_features=100)
#     train_pipeline(pipe, SAMPLE_TEXTS, SAMPLE_LABELS)
#     yield pipe
#     cleanup_log.append("pipeline cleaned up")

# def test_predict(trained_pipe):
#     preds = predict(trained_pipe, ["great product"])
#     assert len(preds) == 1

# def test_cleanup_happened():
#     assert "pipeline cleaned up" in cleanup_log
'''

# TEST — Do not modify
rc = run_pytest(test_yield)
assert rc == 0, "Tests should pass"
print("Task 2.4 passed!")