# Contents

**Category-data Visualization**
1. Interactive Bar / Pie chart
2. 3D Categorical Mesh / Cross Heatmap (for **wide-format** data)

**Numeric-data Visualization**
1. Interactive Boxplot
2. Interactive Histogram & Density Line Graph
3. Interactive Line Graph (for Group)
4. Interactive Line Graph (for Person/Subjects)
5. Interactive Scatter Plot
6. Interactive Bar Graph

参考：\
Vega-Altair: https://altair-viz.github.io \
ipywidgets: https://ipywidgets.readthedocs.io/en/stable/examples/Widget%20List.html

In [None]:
# 日本語フォントインストール
!pip install japanize_matplotlib
import japanize_matplotlib

Collecting japanize_matplotlib
  Downloading japanize-matplotlib-1.1.3.tar.gz (4.1 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/4.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/4.1 MB[0m [31m9.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━[0m [32m2.7/4.1 MB[0m [31m36.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m4.1/4.1 MB[0m [31m48.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: japanize_matplotlib
  Building wheel for japanize_matplotlib (setup.py) ... [?25l[?25hdone
  Created wheel for japanize_matplotlib: filename=japanize_matplotlib-1.1.3-py3-none-any

# Category-data Visualization

## matplotlib ver（比較用）

In [None]:
!pip install japanize-matplotlib
import japanize_matplotlib



In [None]:
# @title matplotlibによる描画
# -*- coding: utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt


# ===== 1) CSVアップロード（Colab優先、なければファイルダイアログ） =====
def load_csv_as_str():
    read_kwargs = dict(dtype=str, na_filter=False, keep_default_na=False)

    # 1) Google Colab のアップローダ
    try:
        from google.colab import files  # Colab環境でのみ存在
        uploaded = files.upload()
        if not uploaded:
            raise RuntimeError("No file uploaded.")
        filename = next(iter(uploaded))
        print(f"[INFO] Colabから '{filename}' を読み込みます")
        return pd.read_csv(filename, **read_kwargs)
    except Exception as e:
        print(f"[INFO] Colabアップロードはスキップ: {e}")

    # 2) ローカルJupyter/Pythonのファイルダイアログ
    try:
        import tkinter as tk
        from tkinter import filedialog
        root = tk.Tk(); root.withdraw()
        filepath = filedialog.askopenfilename(
            title="CSVファイルを選択してください",
            filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]
        )
        if not filepath:
            raise RuntimeError("ファイルが選択されませんでした。")
        print(f"[INFO] ローカルから '{filepath}' を読み込みます")
        return pd.read_csv(filepath, **read_kwargs)
    except Exception as e:
        raise RuntimeError(
            "ファイルの読み込みに失敗しました。Colabではfiles.upload()、ローカルではtkinterが必要です。"
        ) from e

df = load_csv_as_str()

# ===== 2) 必須列の存在チェック =====
required_cols = ["試験名", "時期", "科目", "スコア"]
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"必須列が見つかりません: {missing}\n現在の列名: {list(df.columns)}")

# 空白の混入対策（両端の空白を除去）※保持したい場合はこのブロックを無効化
for c in ["試験名", "時期", "科目", "スコア"]:
    df[c] = df[c].map(lambda x: x.strip())

# ===== 3) プルダウンUI（連動） =====
import ipywidgets as widgets
from IPython.display import display, clear_output

exam_options = sorted(df["試験名"].unique())
exam_dd = widgets.Dropdown(options=exam_options, description="試験名", layout=widgets.Layout(width="350px"))

def periods_for(exam):
    return sorted(df.loc[df["試験名"] == exam, "時期"].unique())

def subjects_for(exam, period):
    m = (df["試験名"] == exam) & (df["時期"] == period)
    return sorted(df.loc[m, "科目"].unique())

# 初期値
period_dd = widgets.Dropdown(options=periods_for(exam_dd.value), description="時期", layout=widgets.Layout(width="350px"))
subject_dd = widgets.Dropdown(options=subjects_for(exam_dd.value, period_dd.value), description="科目", layout=widgets.Layout(width="350px"))

# 連動ハンドラ
def on_exam_change(change):
    if change["name"] == "value":
        new_periods = periods_for(change["new"])
        period_dd.options = new_periods
        period_dd.value = new_periods[0] if new_periods else None

def on_period_change(change):
    if change["name"] == "value":
        new_subjects = subjects_for(exam_dd.value, change["new"])
        subject_dd.options = new_subjects
        subject_dd.value = new_subjects[0] if new_subjects else None

exam_dd.observe(on_exam_change, names="value")
period_dd.observe(on_period_change, names="value")

# 描画ボタン & 出力領域
plot_btn = widgets.Button(description="描画", button_style="primary")
out = widgets.Output()

def plot_category_counts(_):
    with out:
        clear_output()
        exam = exam_dd.value
        period = period_dd.value
        subject = subject_dd.value

        if (exam is None) or (period is None) or (subject is None):
            print("選択が不足しています。『試験名』『時期』『科目』を選んでください。")
            return

        mask = (df["試験名"] == exam) & (df["時期"] == period) & (df["科目"] == subject)
        df_sub = df.loc[mask].copy()

        if df_sub.empty:
            print("該当データが見つかりませんでした。選択を変更してください。")
            return

        counts = df_sub["スコア"].value_counts(dropna=False)

        # テキスト概要
        print(f"選択: 試験名={exam} / 時期={period} / 科目={subject}")
        display(counts.to_frame("件数"))

        # 棒グラフ
        plt.figure(figsize=(8, 5))
        counts.plot(kind="bar")
        plt.title("カテゴリ件数（スコア）")
        plt.xlabel("スコア（カテゴリ）")
        plt.ylabel("件数")
        plt.xticks(rotation=45, ha="right")
        plt.tight_layout()
        plt.show()

        # 円グラフ
        plt.figure(figsize=(6, 6))
        counts.plot(kind="pie", autopct="%1.1f%%", startangle=90)
        plt.ylabel("")
        plt.title("カテゴリ構成比（スコア）")
        plt.tight_layout()
        plt.show()

plot_btn.on_click(plot_category_counts)

# UIの表示
control_box = widgets.VBox([
    widgets.HBox([exam_dd, period_dd, subject_dd]),
    plot_btn,
    out
])
display(control_box)


Saving 英語学習スタイルに関する調査LSS_long.csv to 英語学習スタイルに関する調査LSS_long (18).csv
[INFO] Colabから '英語学習スタイルに関する調査LSS_long (18).csv' を読み込みます


VBox(children=(HBox(children=(Dropdown(description='試験名', layout=Layout(width='350px'), options=('LSS',), valu…

## Interactive Bar / Pie chart

In [2]:
# @title Interactive Bar / Pie chart（層別分析対応）
# -*- coding: utf-8 -*-
import pandas as pd

# ===== 1) CSVアップロード（Colab優先、なければローカル） =====
def load_csv_as_str():
    read_kwargs = dict(dtype=str, na_filter=False, keep_default_na=False)
    try:
        from google.colab import files
        uploaded = files.upload()
        if not uploaded:
            raise RuntimeError("No file uploaded.")
        filename = next(iter(uploaded))
        print(f"[INFO] Colabから '{filename}' を読み込みます")
        return pd.read_csv(filename, **read_kwargs)
    except Exception as e:
        print(f"[INFO] Colabアップロードはスキップ: {e}")
    try:
        import tkinter as tk
        from tkinter import filedialog
        root = tk.Tk(); root.withdraw()
        filepath = filedialog.askopenfilename(
            title="CSVファイルを選択してください",
            filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]
        )
        if not filepath:
            raise RuntimeError("ファイルが選択されませんでした。")
        print(f"[INFO] ローカルから '{filepath}' を読み込みます")
        return pd.read_csv(filepath, **read_kwargs)
    except Exception as e:
        raise RuntimeError("ファイルの読み込みに失敗しました。") from e

df = load_csv_as_str()

# ===== 2) 列前処理：固定列は可視化に使わない（無視） =====
FIXED = ["ID", "Name", "スコア", "試験名", "時期", "科目"]
for c in df.columns:
    df[c] = df[c].map(lambda x: x.strip())

# ラベル列候補（固定列以外すべて）
label_cols = [c for c in df.columns if c not in FIXED]
if not label_cols:
    raise ValueError("ラベル列が見つかりません。『ID』『Name』『スコア』『試験名』『時期』『科目』以外の列を1つ以上含めてください。")

# ===== 3) UI =====
import ipywidgets as widgets
from IPython.display import display, clear_output
import altair as alt

try:
    alt.renderers.enable('colab')   # Colab
except Exception:
    alt.renderers.enable('default') # Jupyter
alt.data_transformers.disable_max_rows()

mode_tb = widgets.ToggleButtons(
    options=[("デフォルト（単純カウント）","default"), ("層別分析モード","stratified")],
    value="default",
    description="モード",
    layout=widgets.Layout(width="520px")
)

# デフォルト：カウント対象ラベル列
label_dd_default = widgets.Dropdown(options=sorted(label_cols), description="ラベル列", layout=widgets.Layout(width="320px"))

# 層別：層別基準 / カウントするラベル
stratum_dd = widgets.Dropdown(options=sorted(label_cols), description="層別基準", layout=widgets.Layout(width="280px"))
label_dd_strat = widgets.Dropdown(
    options=[c for c in sorted(label_cols) if c != stratum_dd.value],
    description="カウントするラベル",
    layout=widgets.Layout(width="320px")
)
def on_stratum_change(ch):
    if ch["name"] == "value":
        label_dd_strat.options = [c for c in sorted(label_cols) if c != ch["new"]]
        if label_dd_strat.value == ch["new"]:
            label_dd_strat.value = next((c for c in label_dd_strat.options), None)
stratum_dd.observe(on_stratum_change, names="value")

mode_area = widgets.VBox([])
plot_btn = widgets.Button(description="描画（Altair）", button_style="primary")
out = widgets.Output()

def rebuild_mode_area():
    if mode_tb.value == "default":
        mode_area.children = [widgets.HBox([label_dd_default])]
    else:
        mode_area.children = [widgets.HBox([stratum_dd, label_dd_strat])]
mode_tb.observe(lambda ch: rebuild_mode_area(), names="value")
rebuild_mode_area()

# ===== 4) ユーティリティ =====
MAX_NAMES = 25  # ツールチップに表示する最大人数
def summarize_names(series, max_n=MAX_NAMES):
    vals = [str(v).strip() for v in series if str(v).strip() != ""]
    uniq = sorted(set(vals))
    if not uniq:
        return "(名前情報なし)"
    if len(uniq) <= max_n:
        return "、".join(uniq)
    return "、".join(uniq[:max_n]) + f"  ほか{len(uniq)-max_n}名"

# ===== 5) デフォルト（単純カウント） =====
def make_charts_default(df_in, label_col):
    name_source = "Name" if "Name" in df_in.columns else ("ID" if "ID" in df_in.columns else None)
    lab = df_in[label_col].replace({"": "(空白)"})
    grp = df_in.assign(__LABEL__=lab).groupby("__LABEL__", dropna=False)

    counts_df = grp.size().rename("件数").reset_index().rename(columns={"__LABEL__": label_col})
    if name_source is not None:
        names_df = grp[name_source].apply(summarize_names).reset_index(name="名前一覧").rename(columns={"__LABEL__": label_col})
    else:
        names_df = counts_df[[label_col]].copy(); names_df["名前一覧"] = "(名前情報なし)"
    counts_df = counts_df.merge(names_df, on=label_col, how="left")
    counts_df["割合"] = counts_df["件数"] / counts_df["件数"].sum()

    if counts_df.empty:
        return alt.LayerChart()

    sel = alt.selection_single(fields=[label_col], empty="none")

    bar = (
        alt.Chart(counts_df, title=f"カテゴリ件数（{label_col}）")
        .mark_bar()
        .encode(
            x=alt.X(f"{label_col}:N", sort=alt.SortField(field="件数", order="descending"), title=f"{label_col}（カテゴリ）"),
            y=alt.Y("件数:Q", title="件数"),
            tooltip=[alt.Tooltip(f"{label_col}:N", title="カテゴリ"),
                     alt.Tooltip("件数:Q", title="件数"),
                     alt.Tooltip("割合:Q", title="構成比", format=".1%"),
                     alt.Tooltip("名前一覧:N", title="Name/ID一覧")],
            opacity=alt.condition(sel, alt.value(1.0), alt.value(0.6)),
            color=alt.Color(f"{label_col}:N", legend=alt.Legend(title=label_col))
        )
        .add_params(sel)
        .properties(width=520, height=360)
        .interactive()
    )
    bar_text = (
        alt.Chart(counts_df).mark_text(dy=-5)
        .encode(x=alt.X(f"{label_col}:N", sort=alt.SortField(field="件数", order="descending")),
                y="件数:Q", text="件数:Q",
                opacity=alt.condition(sel, alt.value(1.0), alt.value(0.85)))
        .properties(width=520, height=360)
    )
    pie = (
        alt.Chart(counts_df, title=f"カテゴリ構成比（{label_col}）")
        .mark_arc()
        .encode(
            theta=alt.Theta("件数:Q", stack=True),
            color=alt.Color(f"{label_col}:N", legend=alt.Legend(title=label_col)),
            tooltip=[alt.Tooltip(f"{label_col}:N", title="カテゴリ"),
                     alt.Tooltip("件数:Q", title="件数"),
                     alt.Tooltip("割合:Q", title="構成比", format=".1%"),
                     alt.Tooltip("名前一覧:N", title="Name/ID一覧")],
            opacity=alt.condition(sel, alt.value(1.0), alt.value(0.6)),
        )
        .properties(width=420, height=420)
    )
    return alt.hconcat(bar + bar_text, pie).resolve_scale(color="independent")

# ===== 6) 層別分析：層ごとに分割（上段=棒を横並び、下段=円を横並び） =====
def make_charts_stratified(df_in, stratum_col, label_col):
    name_source = "Name" if "Name" in df_in.columns else ("ID" if "ID" in df_in.columns else None)

    # 空文字を明示化
    s = df_in[stratum_col].replace({"": "(空白)"})
    l = df_in[label_col].replace({"": "(空白)"})

    # 集計（層×ラベル）
    grp = df_in.assign(__STRAT__=s, __LABEL__=l).groupby(["__STRAT__", "__LABEL__"], dropna=False)
    counts = grp.size().rename("件数").reset_index().rename(columns={"__STRAT__": stratum_col, "__LABEL__": label_col})

    # 層ごとの総数→割合
    sums = counts.groupby(stratum_col, as_index=False)["件数"].sum().rename(columns={"件数": "総数"})
    counts = counts.merge(sums, on=stratum_col, how="left")
    counts["割合(層内)"] = counts["件数"] / counts["総数"]

    # 名前一覧（層×ラベル）
    if name_source is not None:
        names = grp[name_source].apply(summarize_names).reset_index(name="名前一覧").rename(
            columns={"__STRAT__": stratum_col, "__LABEL__": label_col}
        )
        counts = counts.merge(names, on=[stratum_col, label_col], how="left")
    else:
        counts["名前一覧"] = "(名前情報なし)"

    if counts.empty:
        return alt.LayerChart()

    # 全カテゴリの順序（色の対応を固定）
    label_domain = sorted(counts[label_col].unique())
    # 層の順序（棒の並び順）
    strata_domain = list(counts.groupby(stratum_col)["総数"].sum().sort_values(ascending=False).index)
    n_cols = len(strata_domain) if len(strata_domain) > 0 else 1

    # 上段：各層の棒グラフ（横に並べる＝facet column）
    bar_base = (
        alt.Chart(counts, title=f"層別：{stratum_col} ごとの {label_col} 件数")
        .mark_bar()
        .encode(
            x=alt.X(f"{label_col}:N",
                    sort=alt.SortField(field="件数", order="descending"),
                    title=f"{label_col}（カテゴリ）"),
            y=alt.Y("件数:Q", title="件数"),
            color=alt.Color(f"{label_col}:N", legend=alt.Legend(title=label_col),
                            scale=alt.Scale(domain=label_domain)),
            tooltip=[
                alt.Tooltip(f"{stratum_col}:N", title="層"),
                alt.Tooltip(f"{label_col}:N",   title="カテゴリ"),
                alt.Tooltip("件数:Q",           title="件数"),
                alt.Tooltip("割合(層内):Q",     title="層内比", format=".1%"),
                alt.Tooltip("名前一覧:N",       title="Name/ID一覧"),
            ],
        )
        .properties(width=240, height=280)
        .interactive()
    )

    bars_row = bar_base.facet(
        column=alt.Column(f"{stratum_col}:N",
                          sort=strata_domain,
                          header=alt.Header(title="層", labelOrient="bottom"))
        ,
        columns=n_cols
    )

    # 下段：各層の円グラフ（横に並べる＝facet column）
    pie_base = (
        alt.Chart(counts, title=f"層別：{stratum_col} ごとの {label_col} 構成比")
        .mark_arc()
        .encode(
            theta=alt.Theta("件数:Q", stack=True),
            color=alt.Color(f"{label_col}:N", legend=alt.Legend(title=label_col),
                            scale=alt.Scale(domain=label_domain)),
            tooltip=[
                alt.Tooltip(f"{stratum_col}:N", title="層"),
                alt.Tooltip(f"{label_col}:N",   title="カテゴリ"),
                alt.Tooltip("件数:Q",           title="件数"),
                alt.Tooltip("割合(層内):Q",     title="層内比", format=".1%"),
                alt.Tooltip("名前一覧:N",       title="Name/ID一覧"),
            ],
        )
        .properties(width=240, height=240)
    )

    pies_row = pie_base.facet(
        column=alt.Column(f"{stratum_col}:N",
                          sort=strata_domain,
                          header=alt.Header(title=None, labelOrient="bottom"))
        ,
        columns=n_cols
    )

    # 上段（棒 横並び）＋ 下段（円 横並び）
    return alt.vconcat(bars_row, pies_row).resolve_scale(color="shared")

# ===== 7) 実行ハンドラ =====
def plot(_):
    with out:
        clear_output()
        if mode_tb.value == "default":
            label_col = label_dd_default.value
            if label_col is None:
                print("ラベル列を選んでください。"); return
            print(f"モード=デフォルト｜ラベル列={label_col}｜総行数={len(df)}")
            chart = make_charts_default(df.copy(), label_col)
        else:
            stratum_col = stratum_dd.value
            label_col   = label_dd_strat.value
            if (stratum_col is None) or (label_col is None):
                print("層別基準とカウントするラベルを選んでください。"); return
            if stratum_col == label_col:
                print("層別基準とカウントするラベルは別の列を選んでください。"); return
            print(f"モード=層別分析｜層別基準={stratum_col}｜ラベル列={label_col}｜総行数={len(df)}")
            chart = make_charts_stratified(df.copy(), stratum_col, label_col)
        display(chart)

plot_btn.on_click(plot)

# ===== 8) 画面表示 =====
ui = widgets.VBox([
    mode_tb,
    mode_area,
    plot_btn,
    out
])
display(ui)


Saving 英語学習スタイルに関する調査Sample_wide.csv to 英語学習スタイルに関する調査Sample_wide.csv
[INFO] Colabから '英語学習スタイルに関する調査Sample_wide.csv' を読み込みます


VBox(children=(ToggleButtons(description='モード', layout=Layout(width='520px'), options=(('デフォルト（単純カウント）', 'defa…

## 3D Categorical Mesh / Cross Heatmap

In [None]:
# @title 2軸クロス：3Dメッシュ + ヒートマップ
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np

# ===== 1) CSVアップロード（Colab優先、なければローカル） =====
def load_csv_as_str():
    read_kwargs = dict(dtype=str, na_filter=False, keep_default_na=False)
    try:
        from google.colab import files
        uploaded = files.upload()
        if not uploaded:
            raise RuntimeError("No file uploaded.")
        filename = next(iter(uploaded))
        print(f"[INFO] Colabから '{filename}' を読み込みます")
        return pd.read_csv(filename, **read_kwargs)
    except Exception as e:
        print(f"[INFO] Colabアップロードはスキップ: {e}")
    try:
        import tkinter as tk
        from tkinter import filedialog
        root = tk.Tk(); root.withdraw()
        filepath = filedialog.askopenfilename(
            title="CSVファイルを選択してください",
            filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]
        )
        if not filepath:
            raise RuntimeError("ファイルが選択されませんでした。")
        print(f"[INFO] ローカルから '{filepath}' を読み込みます")
        return pd.read_csv(filepath, **read_kwargs)
    except Exception as e:
        raise RuntimeError("ファイルの読み込みに失敗しました。") from e

df = load_csv_as_str()

# ===== 2) 列前処理：固定列は可視化に使わない（無視） =====
FIXED = ["ID", "Name", "スコア", "試験名", "時期", "科目"]
for c in df.columns:
    df[c] = df[c].map(lambda x: x.strip())

label_cols = [c for c in df.columns if c not in FIXED]
if not label_cols:
    raise ValueError("ラベル列が見つかりません。『ID』『Name』『スコア』『試験名』『時期』『科目』以外の列を1つ以上含めてください。")

# ===== 3) 可視化ユーティリティ =====
import altair as alt
from IPython.display import display, clear_output
import ipywidgets as w
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 (3D登録用)

alt.data_transformers.disable_max_rows()

def cross_tab(df, x_col, y_col, sort_mode="freq"):
    """Y(行)×X(列)のクロステーブル（件数）を返す。"""
    ct = (df.groupby([y_col, x_col]).size()
            .unstack(fill_value=0)
            .astype(int))
    if sort_mode == "alpha":
        ct = ct.sort_index(axis=0).sort_index(axis=1)
    else:
        ct = ct.loc[ct.sum(axis=1).sort_values(ascending=False).index,
                    ct.sum(axis=0).sort_values(ascending=False).index]
    return ct

def make_long(ct):
    """クロステーブルから件数と割合（全体/行/列）を付与したロングを作成。"""
    grand = ct.values.sum()
    row_sum = ct.sum(axis=1).replace(0, np.nan)
    col_sum = ct.sum(axis=0).replace(0, np.nan)
    long = ct.stack().rename("count").reset_index()
    long["pct_all"] = (long["count"] / grand * 100.0) if grand > 0 else 0.0
    long["pct_row"] = long.apply(lambda r: (r["count"] / row_sum.loc[r[long.columns[0]]])*100.0
                                 if row_sum.loc[r[long.columns[0]]] > 0 else 0.0, axis=1)
    long["pct_col"] = long.apply(lambda r: (r["count"] / col_sum.loc[r[long.columns[1]]])*100.0
                                 if col_sum.loc[r[long.columns[1]]] > 0 else 0.0, axis=1)
    return long

def plot_heatmap(long_df, x_name, y_name, color_metric="count", pct_mode="pct_all",
                 show_text=True, width=520, height=380):
    """Altairヒートマップ（件数または割合で着色、数値ラベル重ね）"""
    val_col = "count" if color_metric == "count" else pct_mode
    base = alt.Chart(long_df).encode(
        x=alt.X(f"{x_name}:N", sort=list(long_df[x_name].drop_duplicates())),
        y=alt.Y(f"{y_name}:N", sort=list(long_df[y_name].drop_duplicates())[::-1]),
    )
    hm = base.mark_rect().encode(
        color=alt.Color(f"{val_col}:Q", title=("件数" if val_col=="count" else "割合(%)"),
                        scale=alt.Scale(scheme='blues')),
        tooltip=[
            alt.Tooltip(f"{x_name}:N", title=x_name),
            alt.Tooltip(f"{y_name}:N", title=y_name),
            alt.Tooltip("count:Q", title="件数"),
            alt.Tooltip("pct_all:Q", format=".1f", title="全体比(%)"),
            alt.Tooltip("pct_row:Q", format=".1f", title="行内比(%)"),
            alt.Tooltip("pct_col:Q", format=".1f", title="列内比(%)"),
        ]
    ).properties(width=width, height=height)

    if show_text:
        if "__label" not in long_df.columns:
            long_df = long_df.copy()
            long_df["__label"] = long_df.apply(
                lambda r: f'{r["count"]} ({r["pct_all"]:.1f}%)' if r["count"]>0 else "", axis=1
            )
        text = alt.Chart(long_df).encode(
            x=alt.X(f"{x_name}:N", sort=list(long_df[x_name].drop_duplicates())),
            y=alt.Y(f"{y_name}:N", sort=list(long_df[y_name].drop_duplicates())[::-1]),
            text="__label:N",
            color=alt.value("#000000")
        ).mark_text(baseline='middle', fontSize=10)
        return hm + text
    return hm

def plot_3d(ct, z_mode="count", pct_mode="pct_all",
            surface=True, wire=True, elev=35, azim=-60,
            surface_color="#1f77b4", surface_alpha=0.35,
            show_points=True, point_size=40, point_color="#111111", point_alpha=0.9,
            fig_w=6.2, fig_h=5.0):
    """
    Matplotlib 3D図を生成（単色・透過サーフェス＋ワイヤー＋交点ドット）。
    フォールバック：X or Y が1カテゴリの場合は3Dバー。
    """
    ys = list(ct.index)
    xs = list(ct.columns)
    Zc = ct.values.astype(float)

    # 高さZ：件数 or 割合
    if z_mode == "percent":
        grand = Zc.sum()
        row_sum = Zc.sum(axis=1, keepdims=True)
        col_sum = Zc.sum(axis=0, keepdims=True)
        if pct_mode == "pct_all":
            Z = (Zc / grand * 100.0) if grand > 0 else np.zeros_like(Zc)
        elif pct_mode == "pct_row":
            Z = np.divide(Zc, row_sum, out=np.zeros_like(Zc), where=row_sum>0) * 100.0
        else:
            Z = np.divide(Zc, col_sum, out=np.zeros_like(Zc), where=col_sum>0) * 100.0
    else:
        Z = Zc

    X_idx = np.arange(len(xs))
    Y_idx = np.arange(len(ys))
    XX, YY = np.meshgrid(X_idx, Y_idx)

    fig = plt.figure(figsize=(fig_w, fig_h), constrained_layout=True)
    ax = fig.add_subplot(111, projection='3d')

    can_surface = (len(xs) >= 2 and len(ys) >= 2)
    drew_any = False
    if can_surface:
        if surface:
            ax.plot_surface(XX, YY, Z, color=surface_color, alpha=surface_alpha,
                            edgecolor='none', antialiased=True, rstride=1, cstride=1)
            drew_any = True
        if wire:
            ax.plot_wireframe(XX, YY, Z, rstride=1, cstride=1,
                              linewidth=0.6, color='k', alpha=0.7)
            drew_any = True
    else:
        # フォールバック：3Dバー（XまたはYが1カテゴリ）
        dx, dy = 0.8, 0.8
        if len(xs) == 1 and len(ys) >= 1:
            for yi in range(len(ys)):
                ax.bar3d(0 - dx/2, yi - dy/2, 0, dx, dy, float(Z[yi, 0]),
                         shade=True, color=surface_color, alpha=surface_alpha)
            drew_any = True
        elif len(ys) == 1 and len(xs) >= 1:
            for xi in range(len(xs)):
                ax.bar3d(xi - dx/2, 0 - dy/2, 0, dx, dy, float(Z[0, xi]),
                         shade=True, color=surface_color, alpha=surface_alpha)
            drew_any = True

    # 交点ドット（格子上の各 (x,y) に対応する Z）
    if show_points and Z.size:
        ax.scatter(XX.ravel(), YY.ravel(), Z.ravel(),
                   s=point_size, c=point_color, alpha=point_alpha,
                   depthshade=True, linewidths=0.3, edgecolors='k')

    # 軸・ラベル
    ax.set_xticks(X_idx); ax.set_xticklabels(xs, rotation=45, ha='right')
    ax.set_yticks(Y_idx); ax.set_yticklabels(ys)
    ax.set_zlabel("件数" if z_mode=="count" else "割合(%)")
    ax.set_xlabel(ct.columns.name if ct.columns.name else "Xカテゴリ")
    ax.set_ylabel(ct.index.name if ct.index.name else "Yカテゴリ")

    # Z範囲（全ゼロでも見えるように確保）
    zmax = np.nanmax(Z) if Z.size else 0.0
    ax.set_zlim(0, (zmax * 1.08) if (np.isfinite(zmax) and zmax > 0) else 1)

    ax.view_init(elev=elev, azim=azim)
    ax.grid(False)
    ax.set_title("Cross Categories 3D-surface")
    return fig

# ===== 4) UI（横並び & 自動更新） =====
def build_ui():
    # --- コントロール ---
    x_dd = w.Dropdown(options=label_cols, value=label_cols[0], description="X軸列")
    y_dd = w.Dropdown(options=label_cols, value=(label_cols[1] if len(label_cols)>1 else label_cols[0]), description="Y軸列")
    sort_dd = w.Dropdown(options=[("頻度順","freq"),("アルファベット順","alpha")], value="freq", description="並び順")

    color_dd = w.Dropdown(options=[("高さ/色=件数","count"),("高さ/色=割合","percent")], value="count", description="指標")
    pct_dd = w.Dropdown(options=[("全体比","pct_all"),("行内比","pct_row"),("列内比","pct_col")], value="pct_all", description="割合種別")
    label_cb = w.Checkbox(value=True, description="ヒートマップに数値表示")

    # 3D表示設定
    surf_cb = w.Checkbox(value=True, description="3Dサーフェス")
    wire_cb = w.Checkbox(value=True, description="3Dワイヤー")
    elev_sl = w.IntSlider(value=35, min=0, max=90, step=1, description="俯角", continuous_update=True)
    azim_sl = w.IntSlider(value=-60, min=-180, max=180, step=5, description="方位", continuous_update=True)
    color_pk = w.ColorPicker(value="#1f77b4", description="面カラー")
    alpha_sl = w.FloatSlider(value=0.35, min=0.05, max=0.95, step=0.05, readout_format=".2f",
                             description="面の透明度", continuous_update=True)

    # 交点ドット設定
    dots_cb = w.Checkbox(value=True, description="交点ドット")
    dot_size_sl = w.IntSlider(value=40, min=5, max=150, step=5, description="ドットサイズ", continuous_update=True)
    dot_color_pk = w.ColorPicker(value="#111111", description="ドット色")
    dot_alpha_sl = w.FloatSlider(value=0.9, min=0.1, max=1.0, step=0.05, description="ドット透明度", continuous_update=True)

    # --- 出力領域（横並び） ---
    out_left = w.Output(layout=w.Layout(width="50%", border='1px solid #444', padding='4px'))
    out_right = w.Output(layout=w.Layout(width="50%", border='1px solid #444', padding='4px'))
    row = w.HBox([out_left, out_right], layout=w.Layout(width="100%"))
    out_below = w.Output()

    # --- レイアウト ---
    box1 = w.HBox([x_dd, y_dd, sort_dd])
    box2 = w.HBox([color_dd, pct_dd, label_cb])
    box3 = w.HBox([surf_cb, wire_cb, elev_sl, azim_sl])
    box4 = w.HBox([color_pk, alpha_sl])
    box5 = w.HBox([dots_cb, dot_size_sl, dot_color_pk, dot_alpha_sl])
    ui = w.VBox([box1, box2, box3, box4, box5, row, out_below])

    # --- レンダリング関数（自動更新） ---
    render_lock = {"busy": False}
    def render(_=None):
        if render_lock["busy"]:
            return
        render_lock["busy"] = True
        try:
            with out_left:
                clear_output(wait=True)
            with out_right:
                clear_output(wait=True)
            with out_below:
                clear_output(wait=True)

            x_col = x_dd.value
            y_col = y_dd.value
            if x_col == y_col:
                with out_below:
                    print("同じ列は選べません。X軸とY軸に別の列を選んでください。")
                return

            ct = cross_tab(df, x_col, y_col, sort_mode=sort_dd.value)
            long = make_long(ct)
            long.columns = [y_col, x_col, "count", "pct_all", "pct_row", "pct_col"]

            # 右：ヒートマップ
            hm = plot_heatmap(long, x_col, y_col,
                              color_metric=color_dd.value,
                              pct_mode=pct_dd.value,
                              show_text=label_cb.value,
                              width=520, height=380)
            with out_right:
                display(hm.properties(title="2軸クロス・ヒートマップ"))

            # 左：3D（単色透過サーフェス＋ワイヤー＋交点ドット）
            z_mode = "count" if color_dd.value == "count" else "percent"
            fig = plot_3d(ct, z_mode=z_mode, pct_mode=pct_dd.value,
                          surface=surf_cb.value, wire=wire_cb.value,
                          elev=elev_sl.value, azim=azim_sl.value,
                          surface_color=color_pk.value, surface_alpha=alpha_sl.value,
                          show_points=dots_cb.value, point_size=dot_size_sl.value,
                          point_color=dot_color_pk.value, point_alpha=dot_alpha_sl.value,
                          fig_w=6.2, fig_h=5.0)
            with out_left:
                display(fig)
                plt.close(fig)

            # 下：件数テーブル
            with out_below:
                print("件数クロステーブル")
                display(ct)
        finally:
            render_lock["busy"] = False

    # 主要コントロールの変更を監視して自動レンダリング
    for wdg in [x_dd, y_dd, sort_dd, color_dd, pct_dd, label_cb,
                surf_cb, wire_cb, elev_sl, azim_sl, color_pk, alpha_sl,
                dots_cb, dot_size_sl, dot_color_pk, dot_alpha_sl]:
        wdg.observe(render, names='value')

    display(ui)
    render()  # 初回描画

build_ui()


Saving 英語学習スタイルに関する調査LSS_raw.csv to 英語学習スタイルに関する調査LSS_raw (1).csv
[INFO] Colabから '英語学習スタイルに関する調査LSS_raw (1).csv' を読み込みます


VBox(children=(HBox(children=(Dropdown(description='X軸列', options=('Eng-QAdvice', 'Eng-AI', 'Eng-GroupStyle', …

# Numeric-data Visualization

In [None]:
# @title Interactive Boxplot
import pandas as pd
import altair as alt
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np

# --- CSVのアップロードとデータ読み込み ---
uploaded = files.upload()
filename = list(uploaded.keys())[0]
data = pd.read_csv(filename, index_col='Name').reset_index()

# --- 試験ID列の作成 ---
data['試験ID'] = data['科目'].astype(str) + "_" + data['試験名'].astype(str) + "_" + data['時期'].astype(str)

# --- 試験IDの選択肢 ---
testid_options = sorted(data['試験ID'].unique())

# --- ウィジェットの作成 ---
plot_count = widgets.BoundedIntText(value=1, min=1, max=20, description='箱ひげ図数:')

def create_testid_dropdowns(n):
    dropdowns = []
    for i in range(n):
        dd = widgets.Dropdown(options=testid_options, description=f'試験ID {i+1}:')
        dropdowns.append(dd)
    return dropdowns

testid_dropdowns = create_testid_dropdowns(plot_count.value)
testid_container = widgets.VBox(testid_dropdowns)

required_cols = ['Name', 'ID', '試験名', '時期', '科目', 'スコア']
label_cols = [col for col in data.columns if col not in required_cols + ['試験ID']]
stratify_dropdown1 = widgets.Dropdown(options=["なし"] + label_cols, description="層別因子1:")
stratify_dropdown2 = widgets.Dropdown(options=["なし"] + label_cols, description="層別因子2:")

color_palette_dropdown = widgets.Dropdown(
    options=["category10", "tableau10", "accent", "dark2", "set1", "set2", "set3"],
    value="category10", description="色パレット:")
shape_palette_dropdown = widgets.Dropdown(
    options=["Default", "Set 1", "Set 2", "Set 3"],
    value="Default", description="形パレット:")

filled_checkbox = widgets.Checkbox(value=True, description="Filledマーカー")

boxplot_opacity_slider = widgets.FloatSlider(value=1.0, min=0.0, max=1.0, step=0.05, description='箱ひげ透明度:')
jitter_opacity_slider = widgets.FloatSlider(value=1.0, min=0.0, max=1.0, step=0.05, description='データ点透明度:')
jitter_slider = widgets.FloatSlider(value=0.3, min=0.0, max=1.0, step=0.05, description='Jitter幅:')
point_size_slider = widgets.IntSlider(value=100, min=10, max=200, step=10, description='データ点サイズ:')
facet_spacing_slider = widgets.IntSlider(value=20, min=0, max=100, step=1, description='テスト間スペース:')

output = widgets.Output()

def update_testid_dropdowns(change):
    n = change['new']
    new_dropdowns = create_testid_dropdowns(n)
    testid_container.children = new_dropdowns
    update_chart()

plot_count.observe(update_testid_dropdowns, names='value')

shape_palettes = {
    "Set 1": ["square", "diamond", "cross", "triangle-up", "triangle-down"],
    "Set 2": ["square", "cross", "triangle-up", "diamond"],
    "Set 3": ["diamond", "triangle-up", "triangle-down", "square", "star"]
}

def update_chart(*args):
    with output:
        clear_output(wait=True)
        selected_testids = [dd.value for dd in testid_container.children]
        filt = data[data['試験ID'].isin(selected_testids)]
        if filt.empty:
            print("選択された試験IDに一致するデータがありません。")
            return

        max_jitter = jitter_slider.value
        jitter_scale = 20
        filt = filt.copy()
        filt['jitter_offset'] = np.random.uniform(-max_jitter, max_jitter, size=len(filt)) * jitter_scale

        # 層別因子の設定
        factor1 = stratify_dropdown1.value
        factor2 = stratify_dropdown2.value

        if factor1 == "なし" and factor2 == "なし":
            filt['dummy'] = "全体"
            x_field = alt.X('dummy:N', title="")
            groupby_fields = ['dummy', '試験ID']
            # ダミーの場合でもパレットを反映
            color_enc = alt.Color('dummy:N', legend=None, scale=alt.Scale(scheme=color_palette_dropdown.value))
            if shape_palette_dropdown.value == "Default":
                shape_enc = alt.Shape('dummy:N', legend=None, scale=alt.Scale(range=["circle"]))
            else:
                shape_enc = alt.Shape('dummy:N', legend=None, scale=alt.Scale(range=shape_palettes[shape_palette_dropdown.value]))
        elif factor1 != "なし" and factor2 == "なし":
            x_field = alt.X(f'{factor1}:N', title=factor1)
            groupby_fields = [factor1, '試験ID']
            color_enc = alt.Color(f'{factor1}:N', legend=alt.Legend(title=factor1),
                                  scale=alt.Scale(scheme=color_palette_dropdown.value))
            if shape_palette_dropdown.value == "Default":
                shape_enc = alt.Shape(f'{factor1}:N', legend=alt.Legend(title=factor1),
                                      scale=alt.Scale(range=["circle"]))
            else:
                shape_enc = alt.Shape(f'{factor1}:N', legend=alt.Legend(title=factor1),
                                      scale=alt.Scale(range=shape_palettes[shape_palette_dropdown.value]))
        elif factor1 == "なし" and factor2 != "なし":
            x_field = alt.X(f'{factor2}:N', title=factor2)
            groupby_fields = [factor2, '試験ID']
            color_enc = alt.Color(f'{factor2}:N', legend=alt.Legend(title=factor2),
                                  scale=alt.Scale(scheme=color_palette_dropdown.value))
            if shape_palette_dropdown.value == "Default":
                shape_enc = alt.Shape(f'{factor2}:N', legend=alt.Legend(title=factor2),
                                      scale=alt.Scale(range=["circle"]))
            else:
                shape_enc = alt.Shape(f'{factor2}:N', legend=alt.Legend(title=factor2),
                                      scale=alt.Scale(range=shape_palettes[shape_palette_dropdown.value]))
        else:
          # 両方指定されている場合
          # 2つの因子を連結して combined_strata を作成
          filt['combined_strata'] = filt[factor1].astype(str) + "_" + filt[factor2].astype(str)
          # x軸のドメインを、実際に存在する組み合わせのみに限定する
          unique_strata = sorted(filt['combined_strata'].unique())
          x_field = alt.X('combined_strata:N', title=f"{factor1} & {factor2}",
                          scale=alt.Scale(domain=unique_strata))
          groupby_fields = [factor1, factor2, '試験ID']
          # 色は factor1、形は factor2 とする
          color_enc = alt.Color(f'{factor1}:N', legend=alt.Legend(title=factor1),
                                scale=alt.Scale(scheme=color_palette_dropdown.value))
          if shape_palette_dropdown.value == "Default":
              shape_enc = alt.Shape(f'{factor2}:N', legend=alt.Legend(title=factor2),
                                    scale=alt.Scale(range=["circle"]))
          else:
              shape_enc = alt.Shape(f'{factor2}:N', legend=alt.Legend(title=factor2),
                                    scale=alt.Scale(range=shape_palettes[shape_palette_dropdown.value]))

        charts = []

        # --- 箱ひげ図 ---
        if boxplot_opacity_slider.value > 0:
            bp = alt.Chart(filt).mark_boxplot(extent='min-max', opacity=boxplot_opacity_slider.value).encode(
                x=x_field,
                y=alt.Y('スコア:Q', scale=alt.Scale(domain=[0, 100]), title='スコア')
            )
            if color_enc is not None:
                bp = bp.encode(color=color_enc)
            charts.append(bp)

        # --- jittered plot（データ点） ---
        if jitter_opacity_slider.value > 0:
            jp = alt.Chart(filt).mark_point(filled=filled_checkbox.value, size=point_size_slider.value, opacity=jitter_opacity_slider.value).encode(
                x=x_field,
                xOffset=alt.X('jitter_offset:Q', title=None),
                y=alt.Y('スコア:Q', scale=alt.Scale(domain=[0, 100]), title='スコア'),
                tooltip=[
                    alt.Tooltip('ID:N', title='受験者ID'),
                    alt.Tooltip('Name:N', title='Name'),
                    alt.Tooltip('スコア:Q', title='スコア')
                ]
            )
            if color_enc is not None:
                jp = jp.encode(color=color_enc, shape=shape_enc)
            charts.append(jp)

        layered = alt.layer(*charts)

        if len(selected_testids) > 1:
            final_chart = layered.facet(
                column=alt.Facet('試験ID:N', title='試験ID'),
                spacing=facet_spacing_slider.value
            ).configure_view(strokeWidth=0)
        else:
            final_chart = layered.configure_view(strokeWidth=0)

        display(final_chart)

for dd in testid_container.children:
    dd.observe(update_chart, names='value')

boxplot_opacity_slider.observe(update_chart, names='value')
jitter_opacity_slider.observe(update_chart, names='value')
stratify_dropdown1.observe(update_chart, names='value')
stratify_dropdown2.observe(update_chart, names='value')
jitter_slider.observe(update_chart, names='value')
facet_spacing_slider.observe(update_chart, names='value')
color_palette_dropdown.observe(update_chart, names='value')
shape_palette_dropdown.observe(update_chart, names='value')
point_size_slider.observe(update_chart, names='value')
filled_checkbox.observe(update_chart, names='value')

update_chart()

display(widgets.VBox([
    plot_count,
    testid_container,
    stratify_dropdown1,
    stratify_dropdown2,
    color_palette_dropdown,
    shape_palette_dropdown,
    filled_checkbox,
    boxplot_opacity_slider,
    jitter_opacity_slider,
    jitter_slider,
    point_size_slider,
    facet_spacing_slider,
    output
]))


Saving Sample_data(multi-wide-sheets)_long.csv to Sample_data(multi-wide-sheets)_long.csv


VBox(children=(BoundedIntText(value=1, description='箱ひげ図数:', max=20, min=1), VBox(children=(Dropdown(descripti…

In [None]:
# @title Interactive Histogram & Density Line Graph
import pandas as pd
import numpy as np
import altair as alt
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- CSVのアップロードとデータ読み込み ---
uploaded = files.upload()
filename = list(uploaded.keys())[0]
data = pd.read_csv(filename, index_col='Name').reset_index()

# --- 試験ID列の作成 ---
data['試験ID'] = data['科目'].astype(str) + "_" + data['試験名'].astype(str) + "_" + data['時期'].astype(str)
testid_options = sorted(data['試験ID'].unique())

# --- ウィジェットの作成 ---
plot_count = widgets.BoundedIntText(value=1, min=1, max=20, description='ヒストグラム数:')

def create_testid_dropdowns(n):
    dropdowns = []
    for i in range(n):
        dd = widgets.Dropdown(options=testid_options, description=f'試験ID {i+1}:')
        dropdowns.append(dd)
    return dropdowns

testid_dropdowns = create_testid_dropdowns(plot_count.value)
testid_container = widgets.VBox(testid_dropdowns)

# 層別因子を2つ選択（どちらも「なし」の場合は全体扱い）
required_cols = ['Name', 'ID', '試験名', '時期', '科目', 'スコア']
label_cols = [col for col in data.columns if col not in required_cols + ['試験ID']]
stratify_dropdown1 = widgets.Dropdown(options=["なし"] + label_cols, description="因子1:")
stratify_dropdown2 = widgets.Dropdown(options=["なし"] + label_cols, description="因子2:")

# 色パレット
color_palette_dropdown = widgets.Dropdown(
    options=["category10", "tableau10", "accent", "dark2", "set1", "set2", "set3"],
    value="category10", description="色パレット:")

bin_count_slider = widgets.IntSlider(value=20, min=5, max=50, step=1, description='ビン数:')

hist_mode_dropdown = widgets.Dropdown(
    options=["積み上げ", "重ね合わせ", "密度曲線"],
    value="積み上げ",
    description="ヒストグラムモード:")

facet_spacing_slider = widgets.IntSlider(value=20, min=0, max=100, step=1, description='テスト間スペース:')

output = widgets.Output()

def update_testid_dropdowns(change):
    n = change['new']
    new_dropdowns = create_testid_dropdowns(n)
    testid_container.children = new_dropdowns
    update_chart()

plot_count.observe(update_testid_dropdowns, names='value')

def update_chart(*args):
    with output:
        clear_output(wait=True)
        selected_testids = [dd.value for dd in testid_container.children]
        # スコアの欠損値を除去
        filt = data[data['試験ID'].isin(selected_testids)].dropna(subset=["スコア"])
        if filt.empty:
            print("選択された試験IDに一致する有効なデータがありません。")
            return

        # 2つの因子を組み合わせたグループ分け
        strat1 = stratify_dropdown1.value
        strat2 = stratify_dropdown2.value
        if strat1 == "なし" and strat2 == "なし":
            filt['combined_strat'] = "全体"
            strat_field = "combined_strat"
        elif strat1 != "なし" and strat2 == "なし":
            strat_field = strat1
        elif strat1 == "なし" and strat2 != "なし":
            strat_field = strat2
        else:
            filt['combined_strat'] = filt[strat1].astype(str) + " | " + filt[strat2].astype(str)
            strat_field = "combined_strat"

        mode = hist_mode_dropdown.value

        # strat_field が「全体」の場合はハイライトなし
        if strat_field != "全体":
            # クリックで複数選択できる凡例の選択
            highlight = alt.selection_multi(fields=[strat_field], bind='legend')
        else:
            highlight = None

        if mode in ["積み上げ", "重ね合わせ"]:
            hist_base = alt.Chart(filt).transform_bin(
                "binned_score",
                field="スコア",
                bin=alt.Bin(maxbins=bin_count_slider.value)
            )
            y_stack = "zero" if mode == "積み上げ" else None
            # 色エンコードにハイライトの条件を組み込み
            if highlight is not None:
                color_enc = alt.condition(
                    highlight,
                    alt.Color(f"{strat_field}:N",
                              scale=alt.Scale(scheme=color_palette_dropdown.value),
                              legend=alt.Legend(title=strat_field)),
                    alt.value("lightgray")
                )
            else:
                color_enc = alt.Color(f"{strat_field}:N",
                                      scale=alt.Scale(scheme=color_palette_dropdown.value),
                                      legend=alt.Legend(title=strat_field))
            hist = hist_base.mark_bar(opacity=0.7).encode(
                x=alt.X("binned_score:Q", title="スコア", bin=alt.Bin(maxbins=bin_count_slider.value)),
                y=alt.Y("count():Q", title="件数", stack=y_stack),
                tooltip=[alt.Tooltip("count():Q", title="件数")],
                color=color_enc
            )
            final_chart = hist
            if highlight is not None:
                final_chart = final_chart.add_selection(highlight)

        elif mode == "密度曲線":
            if strat_field != "全体":
                density = alt.Chart(filt).transform_density(
                    "スコア",
                    as_=["スコア", "density"],
                    extent=[0, 100],
                    groupby=[strat_field]
                ).mark_line().encode(
                    x=alt.X("スコア:Q", title="スコア"),
                    y=alt.Y("density:Q", title="密度"),
                    color=alt.condition(
                        highlight,
                        alt.Color(f"{strat_field}:N",
                                  scale=alt.Scale(scheme=color_palette_dropdown.value),
                                  legend=alt.Legend(title=strat_field)),
                        alt.value("lightgray")
                    )
                )
                mean_rule = alt.Chart(filt).transform_aggregate(
                    mean_score='mean(スコア)',
                    groupby=[strat_field]
                ).mark_rule(strokeDash=[5,5]).encode(
                    x=alt.X("mean_score:Q", title="平均値"),
                    color=alt.condition(
                        highlight,
                        alt.Color(f"{strat_field}:N",
                                  scale=alt.Scale(scheme=color_palette_dropdown.value),
                                  legend=alt.Legend(title=strat_field)),
                        alt.value("lightgray")
                    ),
                    tooltip=[alt.Tooltip("mean_score:Q", title="平均値")]
                )
                final_chart = alt.layer(density, mean_rule)
                final_chart = final_chart.add_selection(highlight)
            else:
                density = alt.Chart(filt).transform_density(
                    "スコア",
                    as_=["スコア", "density"],
                    extent=[0, 100]
                ).mark_line(color="red").encode(
                    x=alt.X("スコア:Q", title="スコア"),
                    y=alt.Y("density:Q", title="密度")
                )
                mean_rule = alt.Chart(filt).transform_aggregate(
                    mean_score='mean(スコア)'
                ).mark_rule(strokeDash=[5,5], color="red").encode(
                    x=alt.X("mean_score:Q", title="平均値"),
                    tooltip=[alt.Tooltip("mean_score:Q", title="平均値")]
                )
                final_chart = alt.layer(density, mean_rule)

        # 複数の試験IDが選択された場合は facet で各試験の図を表示
        if len(selected_testids) > 1:
            final_chart = final_chart.facet(
                column=alt.Facet('試験ID:N', title='試験ID'),
                spacing=facet_spacing_slider.value
            ).configure_view(strokeWidth=0).resolve_scale(y='independent')
        else:
            final_chart = final_chart.configure_view(strokeWidth=0)

        display(final_chart)

for dd in testid_container.children:
    dd.observe(update_chart, names='value')
stratify_dropdown1.observe(update_chart, names='value')
stratify_dropdown2.observe(update_chart, names='value')
color_palette_dropdown.observe(update_chart, names='value')
bin_count_slider.observe(update_chart, names='value')
hist_mode_dropdown.observe(update_chart, names='value')
facet_spacing_slider.observe(update_chart, names='value')

update_chart()

display(widgets.VBox([
    plot_count,
    testid_container,
    stratify_dropdown1,
    stratify_dropdown2,
    color_palette_dropdown,
    bin_count_slider,
    hist_mode_dropdown,
    facet_spacing_slider,
    output
]))


Saving longdata.csv to longdata (6).csv


VBox(children=(BoundedIntText(value=1, description='ヒストグラム数:', max=20, min=1), VBox(children=(Dropdown(descrip…

In [None]:
# @title Interactive Line Graph (for Group)
import pandas as pd
import altair as alt
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
from scipy.stats import t

# --- CSVファイルのアップロードと読み込み ---
uploaded = files.upload()
if uploaded:
    filename = list(uploaded.keys())[0]
    df = pd.read_csv(filename)
else:
    print("ファイルがアップロードされませんでした。")

# --- 標準列 ---
standard_cols = {"ID", "Name", "試験名", "時期", "科目", "スコア", "試験ID"}
# --- 標準列以外の列をラベル候補として抽出 ---
extra_columns = [col for col in df.columns if col not in standard_cols]

# ============================
# 解析モードの選択ウィジェット（先頭に配置）
# ============================
mode_dropdown = widgets.Dropdown(
    options=["集団分析モード", "層別分析モード"],
    description="解析モード:",
    value="集団分析モード"
)
display(mode_dropdown)

# ============================
# 集団分析用ウィジェット（フィルタ用）
# ============================
if extra_columns:
    label_column_dropdown = widgets.Dropdown(
        options=extra_columns,
        description="ラベル列:",
        value=extra_columns[0]
    )

    label_value_dropdown = widgets.Dropdown(
        options=["全て"],
        description="ラベル値:",
        value="全て"
    )

    def update_label_value_options(change=None):
        current_label = label_column_dropdown.value
        values = sorted(df[current_label].dropna().unique().tolist())
        options = ["全て"] + values
        label_value_dropdown.options = options
        label_value_dropdown.value = "全て"

    update_label_value_options()
    label_column_dropdown.observe(update_label_value_options, names='value')
else:
    label_column_dropdown = None
    label_value_dropdown = None

# ============================
# 集団分析用ウィジェット（配色因子用）
# ============================
if extra_columns:
    color_factor1 = widgets.Dropdown(
        options=["なし"] + extra_columns,
        description="配色因子1:",
        value="なし"
    )
    color_factor2 = widgets.Dropdown(
        options=["なし"] + extra_columns,
        description="配色因子2:",
        value="なし"
    )
else:
    color_factor1 = None
    color_factor2 = None

# ----------------------------
# 集団分析用：色パレット選択ウィジェット
# ----------------------------
palette_dropdown = widgets.Dropdown(
    options=["category10", "tableau10", "set1", "set2", "set3"],
    description="色パレット:",
    value="category10"
)

# 集団分析用コンテナ：フィルタ用、配色因子用、色パレットをまとめて表示
individual_box = widgets.VBox([label_column_dropdown, label_value_dropdown, color_factor1, color_factor2, palette_dropdown])

# ============================
# 層別分析用ウィジェット
# ============================
if extra_columns:
    strat_label1 = widgets.Dropdown(
        options=extra_columns,
        description="層別属性1:",
        value=extra_columns[0]
    )
    strat_label2 = widgets.Dropdown(
        options=["なし"] + extra_columns,
        description="層別属性2:",
        value="なし"
    )
else:
    strat_label1 = None
    strat_label2 = None

stratified_box = widgets.VBox([strat_label1, strat_label2])

# ----------------------------
# 層別分析用：エラーバー表示のチェックボックスと信頼度αスライダー
# ----------------------------
error_bar_checkbox = widgets.Checkbox(
    description="エラーバー表示",
    value=False
)
alpha_slider = widgets.FloatSlider(
    min=0.01, max=0.2, step=0.01, value=0.05,
    description="信頼度 α"
)
error_bar_box = widgets.HBox([error_bar_checkbox, alpha_slider])

# ----------------------------
# 「幅」と「高さ」のスライダーをグラフ更新ボタンの上側に配置
# ----------------------------
chart_width_slider = widgets.IntSlider(
    min=100, max=1000, step=50, value=300,
    description="幅:"
)
chart_height_slider = widgets.IntSlider(
    min=100, max=800, step=50, value=300,
    description="高さ:"
)
size_box = widgets.VBox([chart_width_slider, chart_height_slider])
display(size_box)

# ============================
# モードに応じたウィジェット表示切替
# ============================
def update_mode_visibility(change):
    if mode_dropdown.value == "集団分析モード":
        individual_box.layout.display = 'block'
        stratified_box.layout.display = 'none'
        error_bar_box.layout.display = 'none'
    else:
        individual_box.layout.display = 'none'
        stratified_box.layout.display = 'block'
        error_bar_box.layout.display = 'block'

mode_dropdown.observe(update_mode_visibility, names='value')
update_mode_visibility(None)

display(individual_box)
display(stratified_box)
display(error_bar_box)

# ============================
# 試験IDの作成とテスト数選択用ウィジェット
# ============================
df["試験ID"] = df["試験名"].astype(str) + "_" + df["時期"].astype(str) + "_" + df["科目"].astype(str)
unique_tests = list(df["試験ID"].unique())

names_df = df[['ID', 'Name']].drop_duplicates().sort_values("ID")
ordered_names = names_df["Name"].tolist()

test_count = 3
test_count_label = widgets.Label(value=f"テスト数: {test_count}")
plus_button = widgets.Button(description="テスト追加")
minus_button = widgets.Button(description="テスト削除")

def create_dropdowns(n):
    dropdown_list = []
    for i in range(n):
        dd = widgets.Dropdown(
            options=unique_tests,
            description=f'{i+1}番目:',
            value=unique_tests[i] if i < len(unique_tests) else unique_tests[0]
        )
        dropdown_list.append(dd)
    return dropdown_list

current_dropdowns = create_dropdowns(test_count)
dropdowns_container = widgets.VBox(current_dropdowns)

def on_plus_button_clicked(b):
    global test_count, current_dropdowns
    if test_count < len(unique_tests):
        test_count += 1
        test_count_label.value = f"テスト数: {test_count}"
        current_dropdowns = create_dropdowns(test_count)
        dropdowns_container.children = current_dropdowns

def on_minus_button_clicked(b):
    global test_count, current_dropdowns
    if test_count > 1:
        test_count -= 1
        test_count_label.value = f"テスト数: {test_count}"
        current_dropdowns = create_dropdowns(test_count)
        dropdowns_container.children = current_dropdowns

plus_button.on_click(on_plus_button_clicked)
minus_button.on_click(on_minus_button_clicked)

display(widgets.HBox([minus_button, test_count_label, plus_button]))
display(dropdowns_container)

# ============================
# グラフ更新用ボタンと出力領域
# ============================
update_button = widgets.Button(description="グラフ更新")
chart_out = widgets.Output()
display(update_button, chart_out)

def update_chart(b):
    selected_tests = [dd.value for dd in current_dropdowns]
    current_mode = mode_dropdown.value
    width = chart_width_slider.value
    height = chart_height_slider.value

    if current_mode == "集団分析モード":
        if label_value_dropdown and label_value_dropdown.value != "全て":
            current_label = label_column_dropdown.value
            df_filtered = df[df[current_label] == label_value_dropdown.value]
        else:
            df_filtered = df.copy()
        filtered_df = df_filtered[df_filtered["試験ID"].isin(selected_tests)]
        filtered_names_df = names_df[names_df["Name"].isin(filtered_df["Name"])]
        filtered_ordered_names = filtered_names_df["Name"].tolist()

        cf1 = color_factor1.value if color_factor1 is not None else "なし"
        cf2 = color_factor2.value if color_factor2 is not None else "なし"
        if cf1 != "なし" and cf2 != "なし":
            filtered_df["ColorFactor"] = filtered_df[cf1].astype(str) + " / " + filtered_df[cf2].astype(str)
        elif cf1 != "なし":
            filtered_df["ColorFactor"] = filtered_df[cf1].astype(str)
        else:
            filtered_df["ColorFactor"] = filtered_df["Name"]
        # もし配色因子がName（すなわち cf1=="なし"）なら、凡例の順番はID順に設定
        if cf1 == "なし":
            color_scale = alt.Scale(domain=filtered_ordered_names, scheme=palette_dropdown.value)
        else:
            color_scale = alt.Scale(scheme=palette_dropdown.value)

        # shiftキーを用いた複数選択
        highlight = alt.selection_multi(fields=["ColorFactor"], bind="legend", on="click", toggle="event.shiftKey")
        chart = (
            alt.Chart(filtered_df)
            .mark_line(point=True, strokeWidth=1)
            .encode(
                x=alt.X("試験ID:N", sort=selected_tests, title="試験ID"),
                y=alt.Y("スコア:Q", title="スコア(%)", scale=alt.Scale(domain=[0, 100])),
                color=alt.Color("ColorFactor:N",
                                title="配色因子",
                                scale=color_scale,
                                legend=alt.Legend(columns=2)),
                detail=alt.Detail("Name:N"),
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.1)),
                tooltip=["Name", "試験ID", "スコア", "ColorFactor"]
            )
            .add_selection(highlight)
            .properties(width=width, height=height, title="テストスコア推移（集団分析）")
            .interactive()
        )
    else:
        current_label1 = strat_label1.value if strat_label1 else None
        current_label2 = strat_label2.value if strat_label2 and strat_label2.value != "なし" else None

        df_filtered = df[df["試験ID"].isin(selected_tests)]
        if current_label1 and current_label2:
            group_cols = ["試験ID", current_label1, current_label2]
        elif current_label1:
            group_cols = ["試験ID", current_label1]
        else:
            group_cols = ["試験ID"]

        if error_bar_checkbox.value:
            grouped = df_filtered.groupby(group_cols).agg(
                mean_score=("スコア", "mean"),
                count=("スコア", "count"),
                std=("スコア", "std")
            ).reset_index()
            def calc_margin(row):
                if row["count"] > 1 and not np.isnan(row["std"]):
                    return t.ppf(1 - alpha_slider.value/2, row["count"] - 1) * (row["std"] / np.sqrt(row["count"]))
                else:
                    return 0
            grouped["margin"] = grouped.apply(calc_margin, axis=1)
            grouped["lower"] = grouped["mean_score"] - grouped["margin"]
            grouped["upper"] = grouped["mean_score"] + grouped["margin"]

            if current_label1 and current_label2:
                grouped["属性"] = grouped[current_label1].astype(str) + " / " + grouped[current_label2].astype(str)
            elif current_label1:
                grouped["属性"] = grouped[current_label1].astype(str)
            else:
                grouped["属性"] = "全体"
            unique_attr = list(grouped["属性"].unique())

            highlight = alt.selection_multi(fields=["属性"], bind="legend", on="click", toggle="event.shiftKey")
            line_chart = alt.Chart(grouped).mark_line(point=True, strokeWidth=1).encode(
                x=alt.X("試験ID:N", sort=selected_tests, title="試験ID"),
                y=alt.Y("mean_score:Q", title="平均スコア(%)", scale=alt.Scale(domain=[0, 100])),
                color=alt.Color("属性:N", title="属性", legend=alt.Legend(values=unique_attr, columns=2)),
                tooltip=["属性", "試験ID", "mean_score"],
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
            ).add_selection(highlight)

            vertical_rule = alt.Chart(grouped).mark_rule().encode(
                x=alt.X("試験ID:N", sort=selected_tests),
                y=alt.Y("lower:Q"),
                y2=alt.Y2("upper:Q"),
                color=alt.Color("属性:N", title="属性"),
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
            )
            top_caps = alt.Chart(grouped).mark_tick(thickness=2, size=10).encode(
                x=alt.X("試験ID:N", sort=selected_tests),
                y=alt.Y("upper:Q"),
                color=alt.Color("属性:N", title="属性"),
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
            )
            bottom_caps = alt.Chart(grouped).mark_tick(thickness=2, size=10).encode(
                x=alt.X("試験ID:N", sort=selected_tests),
                y=alt.Y("lower:Q"),
                color=alt.Color("属性:N", title="属性"),
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
            )
            error_bars = vertical_rule + top_caps + bottom_caps
            chart = (line_chart + error_bars).properties(width=width, height=height, title="テストスコア推移（層別分析）")
        else:
            grouped = df_filtered.groupby(group_cols).agg(mean_score=("スコア", "mean")).reset_index()
            if current_label1 and current_label2:
                grouped["属性"] = grouped[current_label1].astype(str) + " / " + grouped[current_label2].astype(str)
            elif current_label1:
                grouped["属性"] = grouped[current_label1].astype(str)
            else:
                grouped["属性"] = "全体"
            unique_attr = list(grouped["属性"].unique())
            highlight = alt.selection_multi(fields=["属性"], bind="legend", on="click", toggle="event.shiftKey")
            chart = (
                alt.Chart(grouped)
                .mark_line(point=True, strokeWidth=1)
                .encode(
                    x=alt.X("試験ID:N", sort=selected_tests, title="試験ID"),
                    y=alt.Y("mean_score:Q", title="平均スコア(%)", scale=alt.Scale(domain=[0, 100])),
                    color=alt.Color("属性:N", title="属性", legend=alt.Legend(values=unique_attr, columns=2)),
                    tooltip=["属性", "試験ID", "mean_score"],
                    opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
                )
                .add_selection(highlight)
                .properties(width=width, height=height, title="テストスコア推移（層別分析）")
                .interactive()
            )

    with chart_out:
        clear_output(wait=True)
        display(chart)

update_button.on_click(update_chart)


Saving 28th_R6_EngTerm_long.csv to 28th_R6_EngTerm_long.csv


Dropdown(description='解析モード:', options=('集団分析モード', '層別分析モード'), value='集団分析モード')

VBox(children=(IntSlider(value=300, description='幅:', max=1000, min=100, step=50), IntSlider(value=300, descri…

VBox(children=(Dropdown(description='ラベル列:', options=('Class', 'Gender', 'Eng-QAdvice', 'Eng-AI', 'Eng-GroupSt…

VBox(children=(Dropdown(description='層別属性1:', options=('Class', 'Gender', 'Eng-QAdvice', 'Eng-AI', 'Eng-GroupS…

HBox(children=(Checkbox(value=False, description='エラーバー表示'), FloatSlider(value=0.05, description='信頼度 α', max=…

HBox(children=(Button(description='テスト削除', style=ButtonStyle()), Label(value='テスト数: 3'), Button(description='テ…

VBox(children=(Dropdown(description='1番目:', options=('AiGROW_4月_思考力/論理性', 'AiGROW_4月_判断力/決断力', 'AiGROW_4月_表現力'…

Button(description='グラフ更新', style=ButtonStyle())

Output()

Deprecated since `altair=5.0.0`. Use selection_point instead.
  highlight = alt.selection_multi(fields=["属性"], bind="legend", on="click", toggle="event.shiftKey")
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(highlight)
Deprecated since `altair=5.0.0`. Use selection_point instead.
  highlight = alt.selection_multi(fields=["属性"], bind="legend", on="click", toggle="event.shiftKey")
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(highlight)
Deprecated since `altair=5.0.0`. Use selection_point instead.
  highlight = alt.selection_multi(fields=["属性"], bind="legend", on="click", toggle="event.shiftKey")
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(highlight)
Deprecated since `altair=5.0.0`. Use selection_point instead.
  highlight = alt.selection_multi(fields=["属性"], bind="legend", on="click", toggle="event.shiftKey")
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(highlight)
Deprecat

In [None]:
# @title Interactive Line Graph (for Person/Subjects)
import pandas as pd
import altair as alt
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
from scipy.stats import t

# --- CSVファイルのアップロードと読み込み ---
uploaded = files.upload()
if uploaded:
    filename = list(uploaded.keys())[0]
    df = pd.read_csv(filename)
else:
    print("ファイルがアップロードされませんでした。")

# --- 共通：モード選択（プルダウン） ---
mode_dropdown = widgets.Dropdown(
    options=["個人分析モード", "科目分析モード"],
    description="モード:"
)

# --- 個人分析モード用ウィジェット ---
# Nameは単一選択（ID順にソート）
names_df = df[['ID', 'Name']].drop_duplicates().sort_values('ID')
names_ordered = names_df['Name'].tolist()
student_dropdown = widgets.Dropdown(
    options=names_ordered,
    description="生徒名:"
)
# 個人分析モードでは、科目は複数選択可能
subject_selector_personal = widgets.SelectMultiple(
    options=sorted(df["科目"].unique()),
    description="科目:",
    value=(sorted(df["科目"].unique())[0],)
)

personal_controls = widgets.VBox([student_dropdown, subject_selector_personal])

# --- 科目分析モード用ウィジェット ---
# こちらは科目を複数選択
subject_selector_subject = widgets.SelectMultiple(
    options=sorted(df["科目"].unique()),
    description="科目:",
    value=(sorted(df["科目"].unique())[0],)
)
# エラーバー表示のチェックボックス
error_bar_checkbox = widgets.Checkbox(
    description="エラーバー表示",
    value=False
)
# 信頼度スライダー（α、例えば0.05なら95%信頼区間）
confidence_slider = widgets.FloatSlider(
    value=0.05,
    min=0.01,
    max=0.2,
    step=0.01,
    description='信頼度 α:',
    readout_format='.2f'
)
subject_controls = widgets.VBox([subject_selector_subject, error_bar_checkbox, confidence_slider])

# --- 共通：テスト数・試験種別選択 ---
# exam_typeは「試験名」と「時期」の組み合わせ
exam_df = df[["試験名", "時期"]].drop_duplicates()
exam_df["exam_type"] = exam_df["試験名"].astype(str) + "_" + exam_df["時期"].astype(str)
exam_options = sorted(exam_df["exam_type"].unique())

test_count_widget = widgets.IntText(
    value=3,
    description="テスト数:"
)

def create_exam_dropdowns(n):
    dropdowns = []
    for i in range(n):
        default_value = exam_options[i] if i < len(exam_options) else exam_options[0]
        dd = widgets.Dropdown(
            options=exam_options,
            description=f"試験 {i+1}:",
            value=default_value
        )
        dropdowns.append(dd)
    return dropdowns

current_exam_dropdowns = create_exam_dropdowns(test_count_widget.value)
exam_dropdowns_container = widgets.VBox(current_exam_dropdowns)

def update_exam_dropdowns(change):
    n = change['new']
    new_dropdowns = create_exam_dropdowns(n)
    exam_dropdowns_container.children = new_dropdowns
    # テスト数が1なら棒グラフ用の棒の太さスライダーを表示
    if n == 1:
        bar_thickness_slider.layout.display = 'flex'
    else:
        bar_thickness_slider.layout.display = 'none'

test_count_widget.observe(update_exam_dropdowns, names='value')

# --- 共通：図サイズ調整スライダー ---
chart_width_slider = widgets.IntSlider(
    min=100,
    max=1000,
    step=10,
    value=600,
    description="幅:"
)
chart_height_slider = widgets.IntSlider(
    min=100,
    max=1000,
    step=10,
    value=400,
    description="高さ:"
)

# --- 共通：棒グラフ用の棒の太さ調整スライダー ---
bar_thickness_slider = widgets.IntSlider(
    min=1,
    max=100,
    value=30,
    description="棒の太さ:"
)
bar_thickness_slider.layout.display = 'none'  # 初期はテスト数が3なので非表示

# --- グラフ更新ボタンと出力領域 ---
update_button = widgets.Button(description="グラフ更新")
chart_out = widgets.Output()

# --- モード切替により表示ウィジェットを切り替え ---
def update_mode_visibility(change):
    if mode_dropdown.value == "個人分析モード":
        personal_controls.layout.display = 'block'
        subject_controls.layout.display = 'none'
    else:
        personal_controls.layout.display = 'none'
        subject_controls.layout.display = 'block'

mode_dropdown.observe(update_mode_visibility, names='value')
update_mode_visibility(None)

# --- グラフ更新処理 ---
def update_chart(b):
    mode = mode_dropdown.value
    selected_exams = [dd.value for dd in exam_dropdowns_container.children]
    chart_width = chart_width_slider.value
    chart_height = chart_height_slider.value

    if mode == "個人分析モード":
        selected_student = student_dropdown.value
        selected_subjects = list(subject_selector_personal.value)
        records = []
        for exam in selected_exams:
            try:
                exam_name, exam_period = exam.split("_", 1)
            except Exception:
                continue
            for subj in selected_subjects:
                row = df[
                    (df["Name"] == selected_student) &
                    (df["科目"] == subj) &
                    (df["試験名"] == exam_name) &
                    (df["時期"] == exam_period)
                ]
                if not row.empty:
                    score = row.iloc[0]["スコア"]
                    records.append({
                        "exam_type": exam,
                        "科目": subj,
                        "スコア": score,
                        "試験名": exam_name,
                        "時期": exam_period
                    })
                else:
                    records.append({
                        "exam_type": exam,
                        "科目": subj,
                        "スコア": None,
                        "試験名": exam_name,
                        "時期": exam_period
                    })
        plot_df = pd.DataFrame(records)
        # 個別分析モードでも、各レコードに試験ID（exam_type + "_" + 科目）を追加
        plot_df["試験ID"] = plot_df["exam_type"] + "_" + plot_df["科目"]

        highlight = alt.selection_multi(fields=["科目"], bind="legend", on="click", toggle="event.shiftKey")

        if len(selected_exams) == 1:
            # 個別分析モードの棒グラフ：x軸は試験ID
            chart = alt.Chart(plot_df).mark_bar(size=bar_thickness_slider.value).encode(
                x=alt.X("試験ID:N", title="", sort="ascending"),
                y=alt.Y("スコア:Q", title="スコア(%)", scale=alt.Scale(domain=[0, 100])),
                color=alt.Color("科目:N", title="科目", scale=alt.Scale(scheme='category10')),
                tooltip=["科目", "試験名", "時期", "スコア"],
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.3))
            ).add_selection(highlight).properties(
                width=chart_width,
                height=chart_height,
                title=f"{selected_student} のスコア（棒グラフ）"
            )
        else:
            chart = alt.Chart(plot_df).mark_line(point=True).encode(
                x=alt.X("exam_type:N", title="", sort=selected_exams),
                y=alt.Y("スコア:Q", title="スコア(%)", scale=alt.Scale(domain=[0,100])),
                color=alt.Color("科目:N", title="科目", scale=alt.Scale(scheme='category10')),
                tooltip=["科目", "試験名", "時期", "スコア"],
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.3))
            ).add_selection(highlight).properties(
                width=chart_width,
                height=chart_height,
                title=f"{selected_student} のスコア推移"
            )

    else:  # 科目分析モード
        selected_subjects = list(subject_selector_subject.value)
        records = []
        for exam in selected_exams:
            try:
                exam_name, exam_period = exam.split("_", 1)
            except Exception:
                continue
            df_filtered = df[
                (df["試験名"] == exam_name) &
                (df["時期"] == exam_period) &
                (df["科目"].isin(selected_subjects))
            ]
            if not df_filtered.empty:
                grp = df_filtered.groupby("科目").agg(
                    mean_score=("スコア", "mean"),
                    count=("スコア", "count"),
                    std=("スコア", "std")
                ).reset_index()
                grp["exam_type"] = exam
                alpha = confidence_slider.value
                grp["margin"] = grp.apply(lambda r: t.ppf(1 - alpha/2, r["count"]-1) * (r["std"]/np.sqrt(r["count"]))
                                          if r["count"] > 1 and pd.notnull(r["std"]) else 0, axis=1)
                grp["lower"] = grp["mean_score"] - grp["margin"]
                grp["upper"] = grp["mean_score"] + grp["margin"]
                records.append(grp)
            else:
                for subj in selected_subjects:
                    records.append(pd.DataFrame({
                        "科目": [subj],
                        "mean_score": [None],
                        "count": [0],
                        "std": [None],
                        "margin": [0],
                        "lower": [None],
                        "upper": [None],
                        "exam_type": [exam]
                    }))
        if records:
            plot_df = pd.concat(records, ignore_index=True)
        else:
            plot_df = pd.DataFrame()

        highlight = alt.selection_multi(fields=["科目"], bind="legend", on="click", toggle="event.shiftKey")

        if len(selected_exams) == 1:
            plot_df = plot_df.copy()
            plot_df["試験ID"] = plot_df["exam_type"] + "_" + plot_df["科目"]
            if error_bar_checkbox.value:
                vertical = alt.Chart(plot_df).mark_rule(color='black').encode(
                    x=alt.X("試験ID:N", sort="ascending"),
                    y=alt.Y("lower:Q"),
                    y2=alt.Y2("upper:Q")
                )
                top_cap = alt.Chart(plot_df).mark_tick(thickness=2, size=10, color='black').encode(
                    x=alt.X("試験ID:N", sort="ascending"),
                    y=alt.Y("upper:Q")
                )
                bottom_cap = alt.Chart(plot_df).mark_tick(thickness=2, size=10, color='black').encode(
                    x=alt.X("試験ID:N", sort="ascending"),
                    y=alt.Y("lower:Q")
                )
                errorbars = vertical + top_cap + bottom_cap
            else:
                errorbars = None
            base = alt.Chart(plot_df).mark_bar(size=bar_thickness_slider.value).encode(
                x=alt.X("試験ID:N", title="", sort="ascending"),
                y=alt.Y("mean_score:Q", title="平均スコア(%)", scale=alt.Scale(domain=[0,100])),
                color=alt.Color("科目:N", scale=alt.Scale(scheme='category10')),
                tooltip=["科目", "mean_score", "std"],
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.3))
            ).add_selection(highlight)
            chart = base
            if errorbars is not None:
                chart = base + errorbars
            chart = chart.properties(width=chart_width, height=chart_height, title="科目別平均スコア（棒グラフ）")
        else:
            base = alt.Chart(plot_df).mark_line(point=True).encode(
                x=alt.X("exam_type:N", title="", sort=selected_exams),
                y=alt.Y("mean_score:Q", title="平均スコア(%)", scale=alt.Scale(domain=[0,100])),
                color=alt.Color("科目:N", scale=alt.Scale(scheme='category10')),
                tooltip=["科目", "mean_score", "std", "exam_type"],
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.3))
            ).add_selection(highlight)
            if error_bar_checkbox.value:
                vertical = alt.Chart(plot_df).mark_rule().encode(
                    x=alt.X("exam_type:N", sort=selected_exams),
                    y=alt.Y("lower:Q"),
                    y2=alt.Y2("upper:Q"),
                    color=alt.Color("科目:N", scale=alt.Scale(scheme='category10'))
                )
                top_cap = alt.Chart(plot_df).mark_tick(thickness=2, size=10).encode(
                    x=alt.X("exam_type:N", sort=selected_exams),
                    y=alt.Y("upper:Q"),
                    color=alt.Color("科目:N", scale=alt.Scale(scheme='category10'))
                )
                bottom_cap = alt.Chart(plot_df).mark_tick(thickness=2, size=10).encode(
                    x=alt.X("exam_type:N", sort=selected_exams),
                    y=alt.Y("lower:Q"),
                    color=alt.Color("科目:N", scale=alt.Scale(scheme='category10'))
                )
                errorbars = vertical + top_cap + bottom_cap
                chart = base + errorbars
            else:
                chart = base
            chart = chart.properties(width=chart_width, height=chart_height, title="科目別平均スコア")

    with chart_out:
        clear_output(wait=True)
        display(chart)

update_button.on_click(update_chart)

# --- 全体レイアウト ---
controls_box = widgets.VBox([mode_dropdown, personal_controls, subject_controls])
exam_box = widgets.VBox([test_count_widget, exam_dropdowns_container])
size_box = widgets.HBox([chart_width_slider, chart_height_slider])
action_box = widgets.VBox([bar_thickness_slider, update_button])

display(controls_box, exam_box, size_box, action_box, chart_out)


Saving Sample_data(multi-wide-sheets)_long.csv to Sample_data(multi-wide-sheets)_long (1).csv


VBox(children=(Dropdown(description='モード:', options=('個人分析モード', '科目分析モード'), value='個人分析モード'), VBox(children=(D…

VBox(children=(IntText(value=3, description='テスト数:'), VBox(children=(Dropdown(description='試験 1:', options=('A…

HBox(children=(IntSlider(value=600, description='幅:', max=1000, min=100, step=10), IntSlider(value=400, descri…

VBox(children=(IntSlider(value=30, description='棒の太さ:', layout=Layout(display='none'), min=1), Button(descript…

Output()

In [None]:
# @title Interactive Scatter Plot
import pandas as pd
import altair as alt
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import statsmodels.api as sm
from statsmodels.nonparametric.smoothers_lowess import lowess

# --- CSVファイルのアップロードと読み込み ---
uploaded = files.upload()
if uploaded:
    filename = list(uploaded.keys())[0]
    df = pd.read_csv(filename)
else:
    print("ファイルがアップロードされませんでした。")

# --- 試験IDの作成 ---
df["試験ID"] = df["試験名"].astype(str) + "_" + df["時期"].astype(str) + "_" + df["科目"].astype(str)

# --- データのワイド化 ---
pivot_df = df.pivot_table(index=["ID", "Name"], columns="試験ID", values="スコア").reset_index()

# --- 試験IDの選択肢 ---
test_ids = sorted([col for col in pivot_df.columns if col not in ["ID", "Name"]])
if not test_ids:
    print("試験IDの候補が見つかりません。")

# --- 層別属性の候補設定 ---
exclude_cols = {"ID", "Name", "スコア", "試験名", "時期", "科目", "試験ID"}
strat_candidates = [col for col in df.columns if col not in exclude_cols]

# ============================
# ウィジェットの作成
# ============================
x_test_dropdown = widgets.Dropdown(options=test_ids, description="X軸試験ID:")
y_test_dropdown = widgets.Dropdown(options=test_ids, description="Y軸試験ID:",
                                     value=test_ids[0] if len(test_ids) > 0 else None)
strat_dropdown1 = widgets.Dropdown(options=["なし"]+strat_candidates, description="層別属性1:", value="なし")
strat_dropdown2 = widgets.Dropdown(options=["なし"]+strat_candidates, description="層別属性2:", value="なし")
line_type_dropdown = widgets.Dropdown(options=["なし", "回帰直線", "LOESS"], description="ライン種別:")
conf_checkbox = widgets.Checkbox(description="信頼区間描画（回帰直線のみ）", value=False)
alpha_slider = widgets.FloatSlider(min=0.01, max=0.2, step=0.01, value=0.05, description="信頼度 α")
marginal_checkbox = widgets.Checkbox(description="マージナルヒストグラム描画", value=False)
corr_checkbox = widgets.Checkbox(description="相関係数・ヒートマップ描画", value=False)

display(x_test_dropdown, y_test_dropdown, strat_dropdown1, strat_dropdown2,
        line_type_dropdown, conf_checkbox, alpha_slider, marginal_checkbox, corr_checkbox)

# ----------------------------
# グラフ更新用ボタンと出力領域
# ----------------------------
update_button = widgets.Button(description="グラフ更新")
chart_out = widgets.Output()
display(update_button, chart_out)

def compute_regression(df_sub, x_var, y_var, conf_flag, alpha):
    if len(df_sub) < 2:
        return pd.DataFrame()
    x_min = df_sub[x_var].min()
    x_max = df_sub[x_var].max()
    x_pred = np.linspace(x_min, x_max, 100)
    X_pred = sm.add_constant(x_pred)
    X = sm.add_constant(df_sub[x_var])
    model = sm.OLS(df_sub[y_var], X).fit()
    predictions = model.get_prediction(X_pred)
    pred_summary = predictions.summary_frame(alpha=alpha)
    result = pd.DataFrame({ x_var: x_pred, "pred": pred_summary["mean"] })
    if conf_flag:
        result["ci_lower"] = pred_summary["mean_ci_lower"]
        result["ci_upper"] = pred_summary["mean_ci_upper"]
    return result

def compute_loess(df_sub, x_var, y_var, frac=0.3):
    if len(df_sub) < 2:
        return pd.DataFrame()
    loess_result = lowess(df_sub[y_var], df_sub[x_var], frac=frac)
    result = pd.DataFrame(loess_result, columns=[x_var, "pred"])
    return result

def update_chart(b):
    with chart_out:
        clear_output(wait=True)
        x_test = x_test_dropdown.value
        y_test = y_test_dropdown.value
        line_type = line_type_dropdown.value  # "なし", "回帰直線", "LOESS"
        conf_flag = conf_checkbox.value
        alpha = alpha_slider.value
        show_marginal = marginal_checkbox.value
        show_corr = corr_checkbox.value

        # 対象の2つの試験スコアが欠損していない学生を抽出
        plot_df = pivot_df.dropna(subset=[x_test, y_test]).copy()
        plot_df = plot_df.rename(columns={x_test: "x_score", y_test: "y_score"})

        # --- 層別属性のマージ ---
        group_cols = []
        if strat_dropdown1.value != "なし":
            group_cols.append(strat_dropdown1.value)
        if strat_dropdown2.value != "なし" and strat_dropdown2.value != strat_dropdown1.value:
            group_cols.append(strat_dropdown2.value)
        if group_cols:
            unique_attrs = df.drop_duplicates(subset=["ID"] + group_cols)[["ID"] + group_cols]
            plot_df = pd.merge(plot_df, unique_attrs, on="ID", how="left")
            if len(group_cols) > 1:
                # 2因子の場合は連結して1つの属性として扱う（散布図用）
                plot_df["strat_comb"] = plot_df[group_cols[0]].astype(str) + " / " + plot_df[group_cols[1]].astype(str)
                color_field = "strat_comb"
                tooltip_fields = ["ID", "Name", "x_score", "y_score", group_cols[0], group_cols[1]]
            else:
                color_field = group_cols[0]
                tooltip_fields = ["ID", "Name", "x_score", "y_score", group_cols[0]]
        else:
            color_field = None
            tooltip_fields = ["ID", "Name", "x_score", "y_score"]

        # --- ズーム選択の追加 ---
        zoom = alt.selection_interval(bind='scales')

        # --- 散布図の作成 ---
        if color_field:
            highlight = alt.selection_multi(fields=[color_field], bind="legend",
                                            on="click")
            scatter = alt.Chart(plot_df).mark_point().encode(
                x=alt.X("x_score:Q", title=x_test + " スコア", scale=alt.Scale(domain=[0,100])),
                y=alt.Y("y_score:Q", title=y_test + " スコア", scale=alt.Scale(domain=[0,100])),
                color=alt.Color(f"{color_field}:N", title=color_field),
                tooltip=tooltip_fields,
                opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
            ).add_selection(highlight, zoom)
        else:
            scatter = alt.Chart(plot_df).mark_point().encode(
                x=alt.X("x_score:Q", title=x_test + " スコア", scale=alt.Scale(domain=[0,100])),
                y=alt.Y("y_score:Q", title=y_test + " スコア", scale=alt.Scale(domain=[0,100])),
                tooltip=tooltip_fields
            ).add_selection(zoom)

        chart_main = scatter

        # --- ライン（回帰直線／LOESS）の描画 ---
        if line_type != "なし":
            line_dfs = []
            if color_field:
                for grp, sub_df in plot_df.groupby(color_field):
                    if line_type == "回帰直線":
                        line_df = compute_regression(sub_df, "x_score", "y_score", conf_flag, alpha)
                    elif line_type == "LOESS":
                        line_df = compute_loess(sub_df, "x_score", "y_score", frac=0.3)
                    else:
                        line_df = pd.DataFrame()
                    if line_df.empty:
                        continue
                    line_df[color_field] = grp
                    line_dfs.append(line_df)
            else:
                if line_type == "回帰直線":
                    line_df = compute_regression(plot_df, "x_score", "y_score", conf_flag, alpha)
                elif line_type == "LOESS":
                    line_df = compute_loess(plot_df, "x_score", "y_score", frac=0.3)
                else:
                    line_df = pd.DataFrame()
                if not line_df.empty:
                    line_dfs.append(line_df)
            if line_dfs:
                lines_df = pd.concat(line_dfs, ignore_index=True)
                if color_field:
                    line_chart = alt.Chart(lines_df).mark_line().encode(
                        x=alt.X("x_score:Q", scale=alt.Scale(domain=[0,100])),
                        y=alt.Y("pred:Q", scale=alt.Scale(domain=[0,100])),
                        color=alt.Color(f"{color_field}:N", title=color_field),
                        opacity=alt.condition(highlight, alt.value(1), alt.value(0.1))
                    )
                else:
                    line_chart = alt.Chart(lines_df).mark_line(color="red").encode(
                        x=alt.X("x_score:Q", scale=alt.Scale(domain=[0,100])),
                        y=alt.Y("pred:Q", scale=alt.Scale(domain=[0,100]))
                    )
                chart_main = chart_main + line_chart
                if line_type == "回帰直線" and conf_flag:
                    if color_field:
                        conf_band = alt.Chart(lines_df).mark_area(opacity=0.2).encode(
                            x=alt.X("x_score:Q", scale=alt.Scale(domain=[0,100])),
                            y=alt.Y("ci_lower:Q", scale=alt.Scale(domain=[0,100])),
                            y2=alt.Y2("ci_upper:Q"),
                            color=alt.Color(f"{color_field}:N", title=color_field),
                            opacity=alt.condition(highlight, alt.value(0.3), alt.value(0.1))
                        )
                    else:
                        conf_band = alt.Chart(lines_df).mark_area(opacity=0.2, color="red").encode(
                            x=alt.X("x_score:Q", scale=alt.Scale(domain=[0,100])),
                            y=alt.Y("ci_lower:Q", scale=alt.Scale(domain=[0,100])),
                            y2=alt.Y2("ci_upper:Q")
                        )
                    chart_main = chart_main + conf_band

        # --- マージナルヒストグラムの追加 ---
        if show_marginal:
            # x軸のヒストグラム（上側）— zoomフィルタ適用、軸ラベル非表示、色エンコード追加
            if color_field:
                chart_x = alt.Chart(plot_df).mark_bar().encode(
                    x=alt.X("x_score:Q", bin=alt.Bin(maxbins=30), scale=alt.Scale(domain=[0,100]), axis=alt.Axis(title="")),
                    y=alt.Y("count()", stack="zero", axis=alt.Axis(title="")),
                    color=alt.Color(f"{color_field}:N", title=color_field),
                    tooltip=["count()"]
                ).transform_filter(zoom).properties(height=60)
            else:
                chart_x = alt.Chart(plot_df).mark_bar().encode(
                    x=alt.X("x_score:Q", bin=alt.Bin(maxbins=30), scale=alt.Scale(domain=[0,100]), axis=alt.Axis(title="")),
                    y=alt.Y("count()", stack="zero", axis=alt.Axis(title="")),
                    tooltip=["count()"]
                ).transform_filter(zoom).properties(height=60)

            # y軸のヒストグラム（右側）— zoomフィルタ適用、軸ラベル非表示
            if color_field:
                chart_y = alt.Chart(plot_df).mark_bar().encode(
                    y=alt.Y("y_score:Q", bin=alt.Bin(maxbins=30), scale=alt.Scale(domain=[0,100]), axis=alt.Axis(title="")),
                    x=alt.X("count()", stack="zero", axis=alt.Axis(title="")),
                    color=alt.Color(f"{color_field}:N", title=color_field),
                    tooltip=["count()"]
                ).transform_filter(zoom).properties(width=60)
            else:
                chart_y = alt.Chart(plot_df).mark_bar().encode(
                    y=alt.Y("y_score:Q", bin=alt.Bin(maxbins=30), scale=alt.Scale(domain=[0,100]), axis=alt.Axis(title="")),
                    x=alt.X("count()", stack="zero", axis=alt.Axis(title="")),
                    tooltip=["count()"]
                ).transform_filter(zoom).properties(width=60)

            scatter_with_y = alt.hconcat(chart_main, chart_y).resolve_scale(y='shared')
            final_chart = alt.vconcat(chart_x, scatter_with_y).resolve_scale(x='shared')
        else:
            final_chart = chart_main

        # --- 相関係数・ヒートマップの追加 ---
        if show_corr:
            if not group_cols:
                corr_value = plot_df["x_score"].corr(plot_df["y_score"])
                corr_df = pd.DataFrame({'dummy': [''], 'corr': [corr_value]})
                heatmap = alt.Chart(corr_df).mark_rect().encode(
                    x=alt.X('dummy:N', axis=alt.Axis(labels=False, ticks=False, title="")),
                    y=alt.Y('dummy:N', axis=alt.Axis(labels=False, ticks=False, title="")),
                    # ここでscaleのdomainとrangeを3点指定
                    color=alt.Color('corr:Q', scale=alt.Scale(domain=[-1, 0, 1], range=["blue", "white", "red"]))
                )
                text_chart = alt.Chart(corr_df).mark_text(color='black').encode(
                    text=alt.Text('corr:Q', format=".2f")
                )
                corr_chart = (heatmap + text_chart).properties(width=100, height=100)
            else:
                if len(group_cols) == 1:
                    corr_series = plot_df.groupby(color_field).apply(lambda d: d["x_score"].corr(d["y_score"]))
                    corr_df = corr_series.reset_index()
                    corr_df.columns = [color_field, "corr"]
                    corr_df["dummy"] = "Correlation"
                    heatmap = alt.Chart(corr_df).mark_rect().encode(
                        x=alt.X("dummy:N", axis=alt.Axis(labels=False, ticks=False, title="")),
                        y=alt.Y(f"{color_field}:N", axis=alt.Axis(title=color_field)),
                        color=alt.Color("corr:Q", scale=alt.Scale(domain=[-1, 0, 1], range=["blue", "white", "red"]))
                    )
                    text_chart = alt.Chart(corr_df).mark_text(color="black").encode(
                        x=alt.X("dummy:N", axis=alt.Axis(labels=False, ticks=False, title="")),
                        y=alt.Y(f"{color_field}:N", axis=alt.Axis(title=color_field)),
                        text=alt.Text("corr:Q", format=".2f")
                    )
                    corr_chart = (heatmap + text_chart).properties(width=100, height=200)
                elif len(group_cols) == 2:
                    corr_series = plot_df.groupby(group_cols).apply(lambda d: d["x_score"].corr(d["y_score"]))
                    corr_df = corr_series.reset_index()
                    corr_df.columns = [group_cols[0], group_cols[1], "corr"]
                    heatmap = alt.Chart(corr_df).mark_rect().encode(
                        x=alt.X(f"{group_cols[1]}:N", title=group_cols[1]),
                        y=alt.Y(f"{group_cols[0]}:N", title=group_cols[0]),
                        color=alt.Color("corr:Q", scale=alt.Scale(domain=[-1, 0, 1], range=["blue", "white", "red"]))
                    )
                    text_chart = alt.Chart(corr_df).mark_text(color="black").encode(
                        x=alt.X(f"{group_cols[1]}:N"),
                        y=alt.Y(f"{group_cols[0]}:N"),
                        text=alt.Text("corr:Q", format=".2f")
                    )
                    corr_chart = (heatmap + text_chart).properties(width=150, height=150)
            final_chart = alt.hconcat(final_chart, corr_chart)

        display(final_chart)

update_button.on_click(update_chart)


Saving longdata2.csv to longdata2.csv


Dropdown(description='X軸試験ID:', options=('AiGrow_R6.5月_主体性', 'AiGrow_R6.5月_思考力', 'ベ駿マ_9月_全', 'ベ駿マ_9月_国語', 'ベ駿マ…

Dropdown(description='Y軸試験ID:', options=('AiGrow_R6.5月_主体性', 'AiGrow_R6.5月_思考力', 'ベ駿マ_9月_全', 'ベ駿マ_9月_国語', 'ベ駿マ…

Dropdown(description='層別属性1:', options=('なし', 'Class', 'Gender'), value='なし')

Dropdown(description='層別属性2:', options=('なし', 'Class', 'Gender'), value='なし')

Dropdown(description='ライン種別:', options=('なし', '回帰直線', 'LOESS'), value='なし')

Checkbox(value=False, description='信頼区間描画（回帰直線のみ）')

FloatSlider(value=0.05, description='信頼度 α', max=0.2, min=0.01, step=0.01)

Checkbox(value=False, description='マージナルヒストグラム描画')

Checkbox(value=False, description='相関係数・ヒートマップ描画')

Button(description='グラフ更新', style=ButtonStyle())

Output()

In [1]:
# @title Interactive Bar Graph
import pandas as pd
import altair as alt
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np

# ---------------------------
# CSVファイルのアップロードと読み込み
# ---------------------------
uploaded = files.upload()
if uploaded:
    filename = list(uploaded.keys())[0]
    df = pd.read_csv(filename)
    display(df.head())
else:
    print("ファイルがアップロードされませんでした。")

# ---------------------------
# 必要な列と試験IDの作成
# ---------------------------
standard_cols = {"ID", "Name", "試験名", "時期", "科目", "スコア", "試験ID"}
# 試験IDは「試験名」「時期」「科目」を連結して作成
df["試験ID"] = df["試験名"].astype(str) + "_" + df["時期"].astype(str) + "_" + df["科目"].astype(str)

# ---------------------------
# 因子属性候補の抽出（標準列以外）
# ---------------------------
extra_columns = [col for col in df.columns if col not in standard_cols]

# ---------------------------
# 試験選択用ウィジェット（単一選択）
# ---------------------------
unique_tests = list(df["試験ID"].unique())
exam_dropdown = widgets.Dropdown(
    options=unique_tests,
    description='試験ID:',
    value=unique_tests[0],
    disabled=False,
)

# ---------------------------
# 並び順選択用ウィジェット
# ---------------------------
sort_method_dropdown = widgets.Dropdown(
    options=["ID順", "順位順"],
    description="並び順:",
    value="ID順",
    disabled=False,
)

# ---------------------------
# 因子属性選択用ウィジェット（最大2種類）
# ---------------------------
# 「None」を選択肢に追加（未選択状態）
factor_options = ["None"] + extra_columns
factor_dropdown1 = widgets.Dropdown(
    options=factor_options,
    description='因子属性1:',
    value="None",
    disabled=False,
)
factor_dropdown2 = widgets.Dropdown(
    options=factor_options,
    description='因子属性2:',
    value="None",
    disabled=False,
)

# ---------------------------
# その他のウィジェット
# ---------------------------
threshold_text = widgets.Text(
    value='',
    description='閾値:',
    placeholder='半角数字を入力'
)
mean_button = widgets.Button(description='平均値を自動入力')
show_labels_checkbox = widgets.Checkbox(value=False, description='スコア表示')
# 文字サイズ変更用ウィジェット（デフォルト値10）
font_size_text = widgets.IntText(value=10, description='文字サイズ:', min=1)
update_chart_button = widgets.Button(description='グラフを更新')

# ---------------------------
# 受験者名の順序（ID順）を保持
# ---------------------------
names_df = df[['ID', 'Name']].drop_duplicates().sort_values("ID")
ordered_names = names_df["Name"].tolist()

# ---------------------------
# 各試験内での順位計算用関数
# ---------------------------
def compute_rank(grp):
    grp = grp.copy()
    valid_count = grp['スコア'].notna().sum()  # 有効なスコアの件数
    grp.loc[grp['スコア'].notna(), '順位_num'] = grp.loc[grp['スコア'].notna(), 'スコア'].rank(ascending=False, method='min')
    grp.loc[grp['スコア'].isna(), '順位_num'] = valid_count + 1  # 欠損値は最後尾
    grp['順位_disp'] = grp.apply(lambda row: row['順位_num'] if pd.notna(row['スコア']) else "NA", axis=1)
    return grp

# ---------------------------
# グラフ描画関数
# ---------------------------
def update_chart(b):
    clear_output(wait=True)

    # ウィジェットの再表示
    ui_items = [exam_dropdown, sort_method_dropdown, factor_dropdown1, factor_dropdown2,
                show_labels_checkbox, font_size_text, threshold_text, mean_button, update_chart_button]
    for w in ui_items:
        display(w)

    # 選択された試験IDのデータを抽出
    selected_exam = exam_dropdown.value
    filtered_df = df[df["試験ID"] == selected_exam].copy()

    # 単一試験内でスコア降順（欠損値は最後）にソートし、順位を計算
    filtered_df = filtered_df.sort_values(by='スコア', ascending=False, na_position='last')
    filtered_df = compute_rank(filtered_df)

    # 色分けのため、因子属性の選択状況を判定
    factor1 = factor_dropdown1.value
    factor2 = factor_dropdown2.value

    if factor1 != "None" and factor2 != "None":
        # 両方選択された場合、2つの属性の値を結合した列を作成
        filtered_df['CompositeFactor'] = filtered_df[factor1].astype(str) + "_" + filtered_df[factor2].astype(str)
        color_field = alt.Color('CompositeFactor:N', legend=alt.Legend(title=f'{factor1} + {factor2}'))
        selection_field = 'CompositeFactor'
    elif factor1 != "None":
        color_field = alt.Color(f"{factor1}:N", legend=alt.Legend(title=factor1))
        selection_field = factor1
    elif factor2 != "None":
        color_field = alt.Color(f"{factor2}:N", legend=alt.Legend(title=factor2))
        selection_field = factor2
    else:
        color_field = alt.Color('Name:N', scale=alt.Scale(domain=ordered_names),
                                legend=alt.Legend(title='生徒名'))
        selection_field = 'Name'

    # Ctrlキーを押しながら複数選択できるように設定(, toggle="event.ctrlKey")
    selection = alt.selection_multi(fields=[selection_field], bind='legend')

    # 並び順の設定
    if sort_method_dropdown.value == "ID順":
        sort_order = ordered_names
    else:
        sort_order = alt.EncodingSortField(field='順位_num', order='ascending')

    base = alt.Chart(filtered_df).encode(
        x=alt.X('Name:N', sort=sort_order, title='Name'),
        y=alt.Y('スコア:Q', title='スコア'),
        tooltip=[
            alt.Tooltip('Name:N', title='Name'),
            alt.Tooltip('スコア:Q', title='スコア'),
            alt.Tooltip('順位_disp:N', title='順位')
        ]
    )

    bars = base.mark_bar().encode(
        color=alt.condition(selection,
                            color_field,
                            alt.value('lightgray'))
    ).add_selection(selection)

    layered = bars

    if show_labels_checkbox.value:
        text = base.mark_text(dy=-5, color='black', fontSize=font_size_text.value).encode(
            text=alt.Text('スコア:Q', format=".1f")
        )
        layered = alt.layer(bars, text)

    try:
        threshold = float(threshold_text.value)
        threshold_df = pd.DataFrame({'y': [threshold]})
        rule = alt.Chart(threshold_df).mark_rule(strokeDash=[5, 5], color='red').encode(
            y=alt.Y('y:Q')
        )
        layered = layered + rule
    except ValueError:
        pass

    chart = layered.properties(
        title=f'試験ID: {selected_exam} の成績'
    )

    display(chart)

# ---------------------------
# ボタン・ウィジェットのコールバック
# ---------------------------
def set_mean_value(b):
    selected_exam = exam_dropdown.value
    filtered_df = df[df["試験ID"] == selected_exam]
    if not filtered_df.empty:
        mean_val = filtered_df['スコア'].mean()
        threshold_text.value = str(round(mean_val, 2))
    update_chart(None)

mean_button.on_click(set_mean_value)
update_chart_button.on_click(update_chart)
exam_dropdown.observe(update_chart, names='value')
sort_method_dropdown.observe(update_chart, names='value')
factor_dropdown1.observe(update_chart, names='value')
factor_dropdown2.observe(update_chart, names='value')
show_labels_checkbox.observe(update_chart, names='value')
font_size_text.observe(update_chart, names='value')

# ---------------------------
# 初期ウィジェットの表示
# ---------------------------
ui_items = [exam_dropdown, sort_method_dropdown, factor_dropdown1, factor_dropdown2,
            show_labels_checkbox, font_size_text, threshold_text, mean_button, update_chart_button]
for w in ui_items:
    display(w)

update_chart(None)


Dropdown(description='試験ID:', index=6, options=('AiGROW_4月_思考力/論理性', 'AiGROW_4月_判断力/決断力', 'AiGROW_4月_表現力', 'Ai…

Dropdown(description='並び順:', index=1, options=('ID順', '順位順'), value='順位順')

Dropdown(description='因子属性1:', index=2, options=('None', 'Class', 'Gender', 'Eng-QAdvice', 'Eng-AI', 'Eng-Grou…

Dropdown(description='因子属性2:', options=('None', 'Class', 'Gender', 'Eng-QAdvice', 'Eng-AI', 'Eng-GroupStyle', …

Checkbox(value=False, description='スコア表示')

IntText(value=10, description='文字サイズ:')

Text(value='', description='閾値:', placeholder='半角数字を入力')

Button(description='平均値を自動入力', style=ButtonStyle())

Button(description='グラフを更新', style=ButtonStyle())

Deprecated since `altair=5.0.0`. Use selection_point instead.
  selection = alt.selection_multi(fields=[selection_field], bind='legend')
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(selection)
