# Demographics

- 回答者の基本属性を確認する
  - 1次元
  - ヒストグラムと円グラフ

In [None]:
import altair as alt
import pandas as pd
import titanite as ti

print(f"Altair {alt.__version__}")
print(f"Pandas {pd.__version__}")
print(f"Titanite {ti.__version__}")

In [None]:
f_cfg = "../../sandbox/config.toml"
f_csv = "../../data/test_data/prepared_data.csv"
d = ti.Data(read_from=f_csv, load_from=f_cfg)
config = d.config()
data = d.read()
# data
# data.info()

In [None]:
def group_data(data: pd.DataFrame, x: str):
    # x でグループ化してカウント数を計算する
    # | x | response |
    v = "response"
    grouped = data.groupby(x)[v].count().reset_index()
    # countの合計を計算して、パーセンテージを計算する
    n = grouped[v].sum()
    grouped["percentage"] = grouped[v] / n
    return grouped


def hbar(data: pd.DataFrame, x: str, y: str, w: int, h: int):
    tips = list(data.columns)

    y_max = data[y].max() + 20

    base = (
        alt.Chart(data)
        .encode(
            alt.X(x).axis(labelFontSize=15),
            alt.Y(y).axis(labelFontSize=15).scale(domain=[0, y_max]),
        )
        .properties(
            width=w,
            height=h,
        )
    )

    mark = base.mark_bar().encode(
        alt.Color(x),
        # alt.Color(x).scale(scheme="set1"),
        alt.Tooltip(tips),
    )

    text = base.mark_text(dy=-10, size=15).encode(alt.Text(y))

    return mark + text


def pie(data: pd.DataFrame, x: str, y: str, w: int, h: int):
    tips = list(data.columns)

    base = (
        alt.Chart(data)
        .encode(
            alt.Theta(y).stack(True),
            alt.Color(x),
            alt.Order(y),
        )
        .properties(
            width=w,
            height=h,
        )
    )

    mark = base.mark_arc(outerRadius=120).encode(
        alt.Tooltip(tips),
    )

    text = base.mark_text(radius=150, size=15).encode(
        alt.Text(y).format(".1%"),
    )

    return mark + text

In [None]:
def check(data: pd.DataFrame, x: str):
    w, h = 400, 400
    y = "response"
    grouped = group_data(data, x)
    b = hbar(grouped, x, y, w, h)
    y = "percentage"
    p = pie(grouped, x, y, w, h)
    return b | p

In [None]:
check(data, "q05")

In [None]:
def check_all(data: pd.DataFrame, headers: list[str], questions: dict[str, str]):
    charts = []
    for header in headers:
        name = header.split("_")[0]
        title = questions.get(name, "No Title")
        chart = check(data, header).properties(title=title)
        charts.append(chart)
    return charts

In [None]:
headers = [
    "q01",
    "q02",
    "q03_regional",
    "q03_subregional",
    "q04_regional",
    "q04_subregional",
    "q05",
    "q06",
    "q07",
    "q08",
    "q09",
    "q11",
]

questions = config.get("questions", "No Questions")

In [None]:
charts = check_all(data, headers, questions)
for chart in charts:
    chart.display()