# Report Example
A convenient tool for looking at the current status of one's data is to build a report. With this example, we are building a simple dataset report that displays summary statistics for a specific dataset. This includes both the extract/raw data and the loaded/transformed data.

In [None]:
import altair as alt
import itables
import pandas as pd
from IPython.display import Markdown, display

from cfa.dataops import datacat, get_data  # noqa: F401

In [None]:
dataset_namespace: str = (
    "scenarios.covid19vax_trends"  # This is the dataset to build a report for
)

In [None]:
config = eval(f"datacat.{dataset_namespace}")

## Dataset Config

---

In [None]:
with open(config._metadata.config_path, "r") as f:
    full_config = f.read()

display(
    Markdown(
        f"""
```toml
{full_config}
```    
"""
    )
)

---

## Extract

### Available Versions

In [None]:
print(
    *[
        j + " *latest*" if i == 0 else j
        for i, j in enumerate(config.extract.get_versions())
    ],
    sep="\n",
)

### Data Info (using latest)

In [None]:
raw_df = get_data(
    dataset_namespace, type="raw", version="latest", output="pandas"
)

In [None]:
raw_df.info()

### Sample (n=20 using latest)

In [None]:
itables.show(
    raw_df.sample(20).style.hide().background_gradient(subset=["census"]),
    allow_html=True,
    lengthMenu=[5, 20],
    pageLength=5,
)

## Load/Transformed
### Available Versions

In [None]:
print(
    *[
        j + " *latest*" if i == 0 else j
        for i, j in enumerate(config.load.get_versions())
    ],
    sep="\n",
)

### Data Info (using latest)

In [None]:
load_df = get_data(
    dataset_namespace, type="transformed", version="latest", output="pandas"
)

In [None]:
load_df.info()

### Sample (n=20 using latest)

In [None]:
itables.show(
    load_df.sample(20).style.hide(),
    allow_html=True,
    lengthMenu=[5, 20],
    pageLength=5,
)

In [None]:
if dataset_namespace == "scenarios.covid19vax_trends":
    display(
        Markdown(
            """
## Plots

Here is an example of plotting in a report and breaking data into a dropdown selection.

"""
        )
    )
    input_dropdown = alt.binding_select(options=[1, 2, 3], name="Dose: ")
    selection = alt.selection_point(
        fields=["dose"], bind=input_dropdown, empty=False, value=1
    )
    color = (
        alt.when(selection)
        .then(alt.Color("age:N", legend=alt.Legend(title="Age Group")))
        .otherwise(alt.value(None))
    )

    alt.data_transformers.disable_max_rows()

    load_df["dose1_pop"] = [i[0] for i in load_df["total"]]
    load_df["dose2_pop"] = [i[1] for i in load_df["total"]]
    load_df["dose3_pop"] = [i[2] for i in load_df["total"]]
    load_df["date"] = pd.to_datetime(load_df.date)
    dose_cols = ["dose1_pop", "dose2_pop", "dose3_pop"]
    df_stack = []
    for i in [1, 2, 3]:
        df = load_df[["date", "state", "age", f"dose{i}_pop"]].rename(
            columns={f"dose{i}_pop": "dose_pop"}
        )
        df["dose"] = i
        df_stack.append(df)

    plot_df = pd.concat(df_stack)
    chart = (
        alt.Chart(plot_df[plot_df.state == "US"])
        .mark_point()
        .encode(
            x="date:T",
            y="dose_pop:Q",
            color=color,
        )
        .add_params(selection)
        .properties(
            width=800,
            height=400,
            title="US",
        )
    )

    chart.show()