# Load Data Example (DataLoader)

This notebook demonstrates how to use `scripts/data-loader.py` to load and inspect the default National Water Plan dataset. It mirrors the optional enhancement described in `docs/DEVELOPMENT_RECOMMENDATIONS.md` and serves as a quick reference for running the loader end-to-end.



## Notebook Goals

- Configure the notebook environment so it can import the loader module
- Load the default dataset and review the generated validation report
- Preview the first few rows to confirm the data loaded as expected



In [None]:
from pathlib import Path
import sys
import logging
import importlib.util

try:
    NOTEBOOK_DIR = Path(__file__).resolve().parent
except NameError:
    NOTEBOOK_DIR = Path.cwd()

PROJECT_ROOT = NOTEBOOK_DIR.parent.parent
SCRIPTS_DIR = PROJECT_ROOT / "scripts"
DATA_FILE = PROJECT_ROOT / "data" / "national_water_plan.csv"

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")

print(f"Project root: {PROJECT_ROOT}")
print(f"Scripts dir:  {SCRIPTS_DIR}")
print(f"Data file:    {DATA_FILE}")



In [None]:
def load_data_loader_module(module_name: str = "data_loader_demo"):
    """Dynamically import the hyphenated `data-loader.py` module."""
    module_path = SCRIPTS_DIR / "data-loader.py"
    spec = importlib.util.spec_from_file_location(module_name, module_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module


data_loader_module = load_data_loader_module()
DataConfig = data_loader_module.DataConfig
DataLoader = data_loader_module.DataLoader

print("Data loader module imported successfully.")



In [None]:
config = DataConfig(filepath=str(DATA_FILE))
loader = DataLoader(config=config)

df, exploration_report = loader.load_and_explore_data()
metadata = exploration_report.metadata

print("Load complete! ✅")
print(f"Rows: {metadata.rows:,}")
print(f"Columns: {metadata.columns}")
print(f"Memory usage: {metadata.memory_usage:.2f} MB")
print(f"Missing values: {metadata.missing_values_percent:.2f}%")
print(f"Duplicate rows: {metadata.duplicate_rows}")



In [None]:
if exploration_report.warnings:
    print("Warnings:")
    for warning in exploration_report.warnings:
        print(f"  ⚠️  {warning}")
else:
    print("No warnings recorded.")

if exploration_report.errors:
    print("\nErrors:")
    for error in exploration_report.errors:
        print(f"  ❌ {error}")
else:
    print("No errors recorded.")



In [None]:
df.head()



## Next Steps

- Save the report with `loader.save_exploration_report(exploration_report, "export/data_exploration_report.json")`
- Explore additional configuration options in `02_data_loading_options.ipynb`
- Move on to cleaning or downstream analysis once satisfied with the load

