# Hands-On Session

## Task 1: Create the Package Skeleton

Create the following structure:
```
src/
└── csv_profiler/
    ├── __init__.py
    ├── io.py
    ├── profiling.py
    ├── render.py
    └── cli.py
main.py
```

**Note:** This is a file system task. Create these files in your project.


In [None]:
# main.py
import csv_profiler.profiling



In [None]:
# PYTHONPATH=src uv run main.py

In [None]:
# Solution
import csv
from pathlib import Path


def read_csv_rows(path: Path) -> list[dict[str, str]]:
    """Read a CSV file and return a list of row dictionaries."""
    # Check if file exists before attempting to read
    if not path.exists():
        raise FileNotFoundError(f"CSV not found: {path}")

    # Use context manager to ensure file is closed properly
    # csv.DictReader automatically uses first row as keys
    with path.open("r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        rows = list(reader)  # Convert iterator to list

    # Validate that we got some data
    if not rows:
        raise ValueError("CSV has no data rows")
    return rows

## Task 2: Move CSV Reading into `io.py`

**Task:** Create `read_csv_rows` function in `src/csv_profiler/io.py`

**Requirements:**
- Function signature: `read_csv_rows(path: Path) -> list[dict[str, str]]`
- Use `csv.DictReader`
- Raise `FileNotFoundError` if file doesn't exist
- Raise `ValueError` if CSV has no rows
- Return a list of row dictionaries


In [None]:
# Write this in src/csv_profiler/io.py
### CODE START HERE ###
import csv
from pathlib import Path


def read_csv_rows(path: Path) -> list[dict[str, str]]:
    """Read a CSV file and return a list of row dictionaries."""
    # Your code here
    ...
### CODE END HERE ###


In [None]:
# Test read_csv_rows
from tests import test_read_csv_rows

test_read_csv_rows()

In [None]:
# Test for: Person class
from tests import test_person_class

test_person_class()

# Test for: slugify function
from tests import test_slugify

test_slugify()

In [None]:
# Write this in src/csv_profiler/profiling.py
### CODE START HERE ###
def is_missing(value: str | None) -> bool:
    # Your code here
    ...


def try_float(value: str) -> float | None:
    # Your code here
    ...


def infer_type(values: list[str]) -> str:
    # Your code here
    ...


def profile_rows(rows: list[dict[str, str]]) -> dict:
    # Your code here
    ...
### CODE END HERE ###


In [None]:
# Test for: profiling functions
from tests import test_profiling_functions

test_profiling_functions()

In [None]:
# Test for: ColumnProfile class
from tests import test_column_profile_class

test_column_profile_class()

## Task 4: Render Markdown in `render.py`

**Task:** Create `render_markdown` function in `src/csv_profiler/render.py`

**Requirements:**
- Function signature: `render_markdown(report: dict) -> str`
- Include: title, dataset summary, a table of columns
- Return a multi-line Markdown string


In [None]:
# Write this in src/csv_profiler/render.py
### CODE START HERE ###
from datetime import datetime


def render_markdown(report: dict) -> str:
    # Your code here
    ...
### CODE END HERE ###


In [None]:
# Solution
from datetime import datetime


def render_markdown(report: dict) -> str:
    lines: list[str] = []

    # Header with timestamp
    lines.append(f"# CSV Profiling Report\n")
    lines.append(f"Generated: {datetime.now().isoformat(timespec='seconds')}\n")

    # Summary section
    lines.append("## Summary\n")
    lines.append(f"- Rows: **{report['n_rows']}**")
    lines.append(f"- Columns: **{report['n_cols']}**\n")

    # Columns table
    lines.append("## Columns\n")
    # Table header with right-aligned numbers (|---|---:|)
    lines.append("| name | type | missing | missing_pct | unique |")
    lines.append("|---|---:|---:|---:|---:|")
    # Generate table rows for each column
    lines.extend([
        f"| {c['name']} | {c['type']} | {c['missing']} | {c['missing_pct']:.1f}% | {c['unique']} |"
        for c in report["columns"]
    ])

    # Notes section
    lines.append("\n## Notes\n")
    lines.append("- Missing values are: `''`, `na`, `n/a`, `null`, `none`, `nan` (case-insensitive)")

    # Join all lines with newlines
    return "\n".join(lines)

## Task 5: Wire Everything in `cli.py`

**Task:** Implement the `profile` command in `src/csv_profiler/cli.py`

**Requirements:**
- Call `read_csv_rows()`, `profile_rows()`, `render_markdown()`
- Write outputs to `out_dir`: `<report_name>.json` and `<report_name>.md`
- Add timing information
- Handle errors gracefully


In [None]:
# Write this in src/csv_profiler/cli.py
### CODE START HERE ###
import json
import time
import typer
from pathlib import Path

from csv_profiler.io import read_csv_rows
from csv_profiler.profiling import profile_rows
from csv_profiler.render import render_markdown

app = typer.Typer()

@app.command(help="Profile a CSV file and write JSON + Markdown")
def profile(
    input_path: Path = typer.Argument(..., help="Input CSV file"),
    out_dir: Path = typer.Option(Path("outputs"), "--out-dir", help="Output folder"),
    report_name: str = typer.Option("report", "--report-name", help="Base name for outputs"),
    preview: bool = typer.Option(False, "--preview", help="Print a short summary"),
):
    # Your implementation here
    ...

if __name__ == "__main__":
    app()
### CODE END HERE ###


In [None]:
# Solution
import json
import time
import typer
from pathlib import Path

from csv_profiler.io import read_csv_rows
from csv_profiler.profiling import profile_rows
from csv_profiler.render import render_markdown

app = typer.Typer()

@app.command(help="Profile a CSV file and write JSON + Markdown")
def profile(
    input_path: Path = typer.Argument(..., help="Input CSV file"),
    out_dir: Path = typer.Option(Path("outputs"), "--out-dir", help="Output folder"),
    report_name: str = typer.Option("report", "--report-name", help="Base name for outputs"),
    preview: bool = typer.Option(False, "--preview", help="Print a short summary"),
):
    try:
        # Measure execution time
        t0 = time.perf_counter_ns()
        rows = read_csv_rows(input_path)
        report = profile_rows(rows)
        t1 = time.perf_counter_ns()
        # Convert nanoseconds to milliseconds
        report["timing_ms"] = (t1 - t0) / 1_000_000

        # Create output directory if it doesn't exist
        out_dir.mkdir(parents=True, exist_ok=True)

        # Write JSON output
        json_path = out_dir / f"{report_name}.json"
        # Use ensure_ascii=False to support Unicode characters
        json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
        typer.secho(f"Wrote {json_path}", fg=typer.colors.GREEN)

        # Write Markdown output
        md_path = out_dir / f"{report_name}.md"
        md_path.write_text(render_markdown(report), encoding="utf-8")
        typer.secho(f"Wrote {md_path}", fg=typer.colors.GREEN)

        # Optional preview output
        if preview:
            typer.echo(f"Rows: {report['n_rows']} | Cols: {report['n_cols']} | {report['timing_ms']:.2f}ms")

    except Exception as e:
        # Print error in red and exit with non-zero code
        typer.secho(f"Error: {e}", fg=typer.colors.RED)
        raise typer.Exit(code=1)

## Recap

You now have:
- a real Python package layout
- a CLI that reads CSV and writes JSON + Markdown
- timing + better error handling

**To run your CLI:**
```bash
PYTHONPATH=src uv run python -m csv_profiler.cli profile data/sample.csv --preview
```
