# Exploratory Data Analysis

Minimal, repeatable EDA built from `scripts/eda.py`. Run cells top-to-bottom to generate summary tables and plots under `../reports/eda`.


In [None]:
from __future__ import annotations

from pathlib import Path
import sys

import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import Image, display

# Ensure the project root is on the Python path so we can import from scripts/
ROOT = Path("..\").resolve()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from scripts.data_processing import load_and_clean_data
from scripts.eda import plot_charges_relationships, plot_distributions, run_basic_eda, summarize_dataframe

plt.style.use("seaborn-v0_8-whitegrid")


In [None]:
DATA_PATH = ROOT / "data/insurance.txt"  # adjust if using a different file
OUT_DIR = ROOT / "reports/eda"

sep = None  # auto-detect delimiter; override if needed, e.g., sep="," or sep="\t"

df = load_and_clean_data(DATA_PATH, sep=sep)

print(f"Rows: {len(df):,} | Columns: {len(df.columns)}")
display(df.head())


In [None]:
artifacts = run_basic_eda(str(DATA_PATH), out_dir=str(OUT_DIR), sep=sep)
artifacts


In [None]:
display(pd.read_csv(artifacts["summary"]).head())

if "distributions" in artifacts:
    display(Image(filename=str(artifacts["distributions"])))
if "charges_relationships" in artifacts:
    display(Image(filename=str(artifacts["charges_relationships"])))
