## Task 1: Create the Package Skeleton

Create the following structure:
```
src/
└── csv_profiler/
    ├── __init__.py
    ├── io.py
    ├── profiling.py
    ├── render.py
    └── cli.py
```

**Note:** This is a file system task. Create these files in your project.


In [None]:
# Solution
import csv
from pathlib import Path


def read_csv_rows(path: Path) -> list[dict[str, str]]:
    """Read a CSV file and return a list of row dictionaries."""
    if not path.exists():
        raise FileNotFoundError(f"CSV not found: {path}")

    with path.open("r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        rows = list(reader)

    if not rows:
        raise ValueError("CSV has no data rows")
    return rows


## Task 2: Move CSV Reading into `io.py`

**Task:** Create `read_csv_rows` function in `src/csv_profiler/io.py`

**Requirements:**
- Function signature: `read_csv_rows(path: Path) -> list[dict[str, str]]`
- Use `csv.DictReader`
- Raise `FileNotFoundError` if file doesn't exist
- Raise `ValueError` if CSV has no rows
- Return a list of row dictionaries


In [None]:
# Write this in src/csv_profiler/io.py
### CODE START HERE ###
import csv
from pathlib import Path


def read_csv_rows(path: Path) -> list[dict[str, str]]:
    """Read a CSV file and return a list of row dictionaries."""
    # Your code here
    ...
### CODE END HERE ###


In [None]:
# Test read_csv_rows
# Note: This test assumes you have a test CSV file
# Create a test file first:
from pathlib import Path
import csv

# Create a test CSV
test_csv = Path("test_data.csv")
test_csv.write_text("name,age\nSara,23\nAli,30", encoding="utf-8")

# Import and test
try:
    from csv_profiler.io import read_csv_rows
    rows = read_csv_rows(test_csv)
    print(f"✅ Successfully read {len(rows)} rows")
    print(f"First row: {rows[0]}")
    
    # Test error handling
    try:
        read_csv_rows(Path("nonexistent.csv"))
        print("❌ Should have raised FileNotFoundError")
    except FileNotFoundError:
        print("✅ FileNotFoundError raised correctly")
    
    # Cleanup
    test_csv.unlink()
except ImportError as e:
    print(f"⚠️  Could not import: {e}")
    print("Make sure you're running from the project root with PYTHONPATH=src")


In [None]:
# Solution
import csv
from pathlib import Path


def read_csv_rows(path: Path) -> list[dict[str, str]]:
    """Read a CSV file and return a list of row dictionaries."""
    if not path.exists():
        raise FileNotFoundError(f"CSV not found: {path}")

    with path.open("r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        rows = list(reader)

    if not rows:
        raise ValueError("CSV has no data rows")
    return rows


## Task 3: Move Profiling Logic into `profiling.py`

**Task:** Create helper functions and `profile_rows` in `src/csv_profiler/profiling.py`

**Requirements:**
- `is_missing(value: str | None) -> bool`
- `try_float(value: str) -> float | None`
- `infer_type(values: list[str]) -> str`
- `profile_rows(rows: list[dict[str, str]]) -> dict`

The report dict should have: `n_rows`, `n_cols`, `columns` (list of column profiles)


In [None]:
# Write this in src/csv_profiler/profiling.py
### CODE START HERE ###
def is_missing(value: str | None) -> bool:
    # Your code here
    ...


def try_float(value: str) -> float | None:
    # Your code here
    ...


def infer_type(values: list[str]) -> str:
    # Your code here
    ...


def profile_rows(rows: list[dict[str, str]]) -> dict:
    # Your code here
    ...
### CODE END HERE ###


In [None]:
# Test for: profiling functions
def test_exercise():
    """Run tests with detailed feedback."""
    errors = []
    
    # Test is_missing
    try:
        from csv_profiler.profiling import is_missing
        if not is_missing(""):
            errors.append("is_missing should return True for empty string")
        if not is_missing("NA"):
            errors.append("is_missing should return True for 'NA' (case-insensitive)")
        if is_missing("valid"):
            errors.append("is_missing should return False for valid text")
    except ImportError:
        errors.append("Could not import is_missing. Check your module path.")
    except Exception as e:
        errors.append(f"is_missing raised an error: {e}")
    
    # Test try_float
    try:
        from csv_profiler.profiling import try_float
        if try_float("123") != 123.0:
            errors.append("try_float should convert valid number")
        if try_float("abc") is not None:
            errors.append("try_float should return None for non-numeric")
    except ImportError:
        errors.append("Could not import try_float")
    except Exception as e:
        errors.append(f"try_float raised an error: {e}")
    
    # Test infer_type
    try:
        from csv_profiler.profiling import infer_type
        if infer_type(["1", "2", "3"]) != "number":
            errors.append("infer_type should return 'number' for numeric values")
        if infer_type(["a", "b", "c"]) != "text":
            errors.append("infer_type should return 'text' for non-numeric values")
    except ImportError:
        errors.append("Could not import infer_type")
    except Exception as e:
        errors.append(f"infer_type raised an error: {e}")
    
    # Test profile_rows (basic structure)
    try:
        from csv_profiler.profiling import profile_rows
        test_rows = [{"name": "Sara", "age": "23"}, {"name": "Ali", "age": "30"}]
        report = profile_rows(test_rows)
        if "n_rows" not in report:
            errors.append("report should have 'n_rows' key")
        if "n_cols" not in report:
            errors.append("report should have 'n_cols' key")
        if "columns" not in report:
            errors.append("report should have 'columns' key")
        if report["n_rows"] != 2:
            errors.append(f"Expected n_rows=2, got {report['n_rows']}")
    except ImportError:
        errors.append("Could not import profile_rows")
    except Exception as e:
        errors.append(f"profile_rows raised an error: {e}")
    
    if errors:
        print("❌ Some tests failed. Here's what went wrong:\n")
        for i, error in enumerate(errors, 1):
            print(f"{i}. {error}")
        raise AssertionError(f"{len(errors)} test(s) failed")
    else:
        print("✅ All tests passed! Great job!")

test_exercise()


In [None]:
# Solution
def is_missing(value: str | None) -> bool:
    if value is None:
        return True
    cleaned = value.strip().casefold()
    return cleaned in {"", "na", "n/a", "null", "none", "nan"}


def try_float(value: str) -> float | None:
    try:
        return float(value)
    except ValueError:
        return None


def infer_type(values: list[str]) -> str:
    usable = [v for v in values if not is_missing(v)]
    if not usable:
        return "text"
    for v in usable:
        if try_float(v) is None:
            return "text"
    return "number"


def profile_rows(rows: list[dict[str, str]]) -> dict:
    n_rows, columns = len(rows), list(rows[0].keys())
    col_profiles = []
    for col in columns:
        values = [r.get(col, "") for r in rows]
        usable = [v for v in values if not is_missing(v)]
        missing = len(values) - len(usable)
        inferred = infer_type(values)
        unique = len(set(usable))
        profile = {
            "name": col,
            "type": inferred,
            "missing": missing,
            "missing_pct": 100.0 * missing / n_rows if n_rows else 0.0,
            "unique": unique,
        }
        if inferred == "number":
            nums = [try_float(v) for v in usable]
            nums = [x for x in nums if x is not None]
            if nums:
                profile.update({"min": min(nums), "max": max(nums), "mean": sum(nums) / len(nums)})
        col_profiles.append(profile)
    return {"n_rows": n_rows, "n_cols": len(columns), "columns": col_profiles}


## Task 4: Render Markdown in `render.py`

**Task:** Create `render_markdown` function in `src/csv_profiler/render.py`

**Requirements:**
- Function signature: `render_markdown(report: dict) -> str`
- Include: title, dataset summary, a table of columns
- Return a multi-line Markdown string


In [None]:
# Write this in src/csv_profiler/render.py
### CODE START HERE ###
from datetime import datetime


def render_markdown(report: dict) -> str:
    # Your code here
    ...
### CODE END HERE ###


In [None]:
# Solution
from datetime import datetime


def render_markdown(report: dict) -> str:
    lines: list[str] = []

    lines.append(f"# CSV Profiling Report\n")
    lines.append(f"Generated: {datetime.now().isoformat(timespec='seconds')}\n")

    lines.append("## Summary\n")
    lines.append(f"- Rows: **{report['n_rows']}**")
    lines.append(f"- Columns: **{report['n_cols']}**\n")

    lines.append("## Columns\n")
    lines.append("| name | type | missing | missing_pct | unique |")
    lines.append("|---|---:|---:|---:|---:|")
    lines.extend([
        f"| {c['name']} | {c['type']} | {c['missing']} | {c['missing_pct']:.1f}% | {c['unique']} |"
        for c in report["columns"]
    ])

    lines.append("\n## Notes\n")
    lines.append("- Missing values are: `''`, `na`, `n/a`, `null`, `none`, `nan` (case-insensitive)")

    return "\n".join(lines)


## Task 5: Wire Everything in `cli.py`

**Task:** Implement the `profile` command in `src/csv_profiler/cli.py`

**Requirements:**
- Call `read_csv_rows()`, `profile_rows()`, `render_markdown()`
- Write outputs to `out_dir`: `<report_name>.json` and `<report_name>.md`
- Add timing information
- Handle errors gracefully


In [None]:
# Write this in src/csv_profiler/cli.py
### CODE START HERE ###
import json
import time
import typer
from pathlib import Path

from csv_profiler.io import read_csv_rows
from csv_profiler.profiling import profile_rows
from csv_profiler.render import render_markdown

app = typer.Typer()

@app.command(help="Profile a CSV file and write JSON + Markdown")
def profile(
    input_path: Path = typer.Argument(..., help="Input CSV file"),
    out_dir: Path = typer.Option(Path("outputs"), "--out-dir", help="Output folder"),
    report_name: str = typer.Option("report", "--report-name", help="Base name for outputs"),
    preview: bool = typer.Option(False, "--preview", help="Print a short summary"),
):
    # Your implementation here
    ...

if __name__ == "__main__":
    app()
### CODE END HERE ###


In [None]:
# Solution
import json
import time
import typer
from pathlib import Path

from csv_profiler.io import read_csv_rows
from csv_profiler.profiling import profile_rows
from csv_profiler.render import render_markdown

app = typer.Typer()

@app.command(help="Profile a CSV file and write JSON + Markdown")
def profile(
    input_path: Path = typer.Argument(..., help="Input CSV file"),
    out_dir: Path = typer.Option(Path("outputs"), "--out-dir", help="Output folder"),
    report_name: str = typer.Option("report", "--report-name", help="Base name for outputs"),
    preview: bool = typer.Option(False, "--preview", help="Print a short summary"),
):
    try:
        t0 = time.perf_counter_ns()
        rows = read_csv_rows(input_path)
        report = profile_rows(rows)
        t1 = time.perf_counter_ns()
        report["timing_ms"] = (t1 - t0) / 1_000_000

        out_dir.mkdir(parents=True, exist_ok=True)

        json_path = out_dir / f"{report_name}.json"
        json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
        typer.secho(f"Wrote {json_path}", fg=typer.colors.GREEN)

        md_path = out_dir / f"{report_name}.md"
        md_path.write_text(render_markdown(report), encoding="utf-8")
        typer.secho(f"Wrote {md_path}", fg=typer.colors.GREEN)

        if preview:
            typer.echo(f"Rows: {report['n_rows']} | Cols: {report['n_cols']} | {report['timing_ms']:.2f}ms")

    except Exception as e:
        typer.secho(f"Error: {e}", fg=typer.colors.RED)
        raise typer.Exit(code=1)


## Recap

You now have:
- a real Python package layout
- a CLI that reads CSV and writes JSON + Markdown
- timing + better error handling

**To run your CLI:**
```bash
PYTHONPATH=src uv run python -m csv_profiler.cli profile data/sample.csv --preview
```
