# 🧰 BigQueryVisualizer – Quick EDA Walk-Through  
This notebook shows two workflows:

1. **Manual**: call individual helper methods for ad-hoc plots & stats.  
2. **Pipeline**: run the entire modular EDA pipeline in one line.

*Dataset used*: `bigquery-public-data.samples.natality` (public).

In [None]:
from bigquery_visualizer import BigQueryVisualizer
from pipeline import Pipeline

In [None]:
PROJECT = "bigquery-public-data"
TABLE   = "samples.natality"

viz = BigQueryVisualizer(
    project_id=PROJECT,
    table_id=TABLE,
    # credentials_path=None  # uses Colab / local gcloud default
)

### Manual Plots

In [None]:
# Quick look at the raw table
viz.display_table(limit=5)

In [None]:
# Categorical Bar
# births per year
viz.plot_categorical_chart(
    dimensions=["year"],
    metrics={"record_count": "COUNT"},
    order_by="year",
    orientation="v",
    limit=None
)

In [None]:
# Numeric Histogram
viz.plot_histogram(
    numeric_column="weight_pounds",
    bins=40,
    histnorm="probability",
    log_x=False
)

In [None]:
# Violin by Category
viz.plot_violin_chart(
    numeric_column="weight_pounds",
    category_dimension="is_male",
    log_scale=False,
    palette="Pastel1"
)

In [None]:
# Scatter / Bubble
viz.plot_scatter_chart(
    dimension="state",
    x_metric={"column": "gestation_weeks", "aggregation": "AVG"},
    y_metric={"column": "weight_pounds",   "aggregation": "AVG"},
    bubble_size_metric={"column": "record_count", "aggregation": "COUNT"},
    color_dimension=None,
    remove_nulls=True,
    trend_line="ols"
)

### Run the full pipeline

In [None]:
pipe = Pipeline(
    sample_rows=150_000,    # sampling cap for heavy stages
    target_column=None      # no label in this dataset
)
ctx = pipe.run(viz)

In [None]:
list(ctx.tables.keys())[:6]

In [None]:
# non-null percentage table
ctx.get_table("profiling.non_null_pct").head()

In [None]:
# correlation heat-map figure
ctx.get_figure("bivariate.corr_heatmap").show()