In [None]:
# ==========================================
# SETUP BLOCK 
# ==========================================

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency

# ---- Imports from project files ----
sys.path.append(os.path.abspath(".."))
from Helper_funtions import (
    clean_up_subjects,
    calculate_true_false_score,
    calculate_Internet_terms_understanding_score,
    group_internet_understanding
)
from lists import (
    multiple_choice_questions,
    likert_mapping,
    comparison_pairs_by_AI_questions,
    cross_tab_titles_and_colors
    
)
from answer_categories import question_orders

# ---- General plot style ----
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 5)

# ---- Data loading ----
DATA_FILE = os.path.join("..", "Data", "Fertige Tabelle.xlsx")
df = pd.read_excel(DATA_FILE)
df.columns = df.columns.str.strip()

# Clean up multi-subject columns
for col in ["Most used subjects", "Preferred Subjects", "Least preferred Subjects"]:
    if col in df.columns:
        df = clean_up_subjects(df, col)

# Calculate additional scores
if all(q in df.columns for q in ["True/False_1", "True/False_2"]):
    df = calculate_true_false_score(df)

if any(col.startswith("Internet terms_") for col in df.columns):
    df = calculate_Internet_terms_understanding_score(df)
    df = group_internet_understanding(df)

print("✅ Setup complete – DataFrame loaded and preprocessed")
print(f"Rows: {len(df)}, Columns: {len(df.columns)}")

In [None]:
# ==========================================
# BUILD COUNT COLUMNS FROM MULTI-CHOICE TEXT
# ==========================================

def build_count_from_multichoice(df_in: pd.DataFrame, source_col: str, new_col: str) -> pd.DataFrame:
   
    if source_col not in df_in.columns:
        print(f"⚠️ Source column '{source_col}' not in DataFrame; cannot build '{new_col}'.")
        return df_in

    def _count_items(x):
        if pd.isna(x):
            return np.nan
        s = str(x).strip()
        if s == "" or s.lower() in {"nan", "none"}:
            return np.nan

        s = s.replace(";", ",").replace("[", "").replace("]", "").replace("'", "").replace('"', "")
        parts = [p.strip() for p in s.split(",")]
        parts = [p for p in parts if p]

        seen = set()
        unique_parts = []
        for p in parts:
            if p not in seen:
                seen.add(p)
                unique_parts.append(p)
        return len(unique_parts) if unique_parts else np.nan

    df_in[new_col] = df_in[source_col].apply(_count_items).astype("Int64")
    print(f"✅ Built count column '{new_col}' from '{source_col}'.")
    return df_in


df = build_count_from_multichoice(df, "Reasons to use AI", "Reasons to use AI (Count)")

df = build_count_from_multichoice(df, "Purposes to use AI", "Purposes to use AI (Count)")


try:
    min_c = int(df["Reasons to use AI (Count)"].min())
    max_c = int(df["Reasons to use AI (Count)"].max())
    if "question_orders" in globals():
        question_orders["Reasons to use AI (Count)"] = list(range(min_c, max_c + 1))
except Exception:
    pass

In [None]:
# ------ bar charts ------

def build_pair_dataframe(df_in: pd.DataFrame, left: str, right: str) -> pd.DataFrame:
    left_is_multi  = left  in multiple_choice_questions
    right_is_multi = right in multiple_choice_questions

    tmp = df_in[[left, right]].dropna().copy()

    if left_is_multi:
        tmp[left] = tmp[left].astype(str).str.split(",")
        tmp = tmp.explode(left)
    else:
        tmp[left] = tmp[left].astype(str).str.strip()

    if right_is_multi:
        tmp[right] = tmp[right].astype(str).str.split(",")
        tmp = tmp.explode(right)
    else:
        tmp[right] = tmp[right].astype(str).str.strip()

    # strip after explode & drop empties
    tmp[left]  = tmp[left].astype(str).str.strip()
    tmp[right] = tmp[right].astype(str).str.strip()
    tmp = tmp[(tmp[left] != "") & (tmp[right] != "")]
    return tmp


def order_rows_cols(ct: pd.DataFrame, rows_key: str, cols_key: str) -> pd.DataFrame:
    # columns
    if cols_key in question_orders:
        col_order = [v for v in question_orders[cols_key] if v in ct.columns]
        remaining_cols = [v for v in ct.columns if v not in col_order]
        ct = ct[col_order + remaining_cols]
    else:
        try:
            ct = ct[sorted(ct.columns, key=lambda x: float(x))]
        except Exception:
            pass

    # rows
    if rows_key in question_orders:
        row_order = [v for v in question_orders[rows_key] if v in ct.index]
        remaining_rows = [v for v in ct.index if v not in row_order]
        ct = ct.reindex(row_order + remaining_rows)
    else:
        try:
            ct.index = pd.to_numeric(ct.index)
            ct = ct.sort_index()
        except Exception:
            pass
    return ct


for base_question, compare_list in comparison_pairs_by_AI_questions.items():
    for compare_question in compare_list:
        # prepare data
        data = build_pair_dataframe(df, base_question, compare_question)
        if data.empty:
            print(f"⚠️ No overlapping data for '{base_question}' × '{compare_question}'. Skipping.")
            continue

        # crosstabulations
        ct = pd.crosstab(data[base_question], data[compare_question])

        # check for same labels
        if not ct.columns.is_unique:
            ct = ct.T.groupby(level=0).sum().T
        if not ct.index.is_unique:
            ct = ct.groupby(level=0).sum()

        # order
        ct = order_rows_cols(ct, rows_key=base_question, cols_key=compare_question)

        # table output
        print(f"\n📊 {compare_question} within each {base_question} (rows sum to 100%)")
        print("Counts:\n", ct)

        # custom titles
        title_and_colors = cross_tab_titles_and_colors.get(
            (base_question, compare_question),
            [f"{compare_question} within each {base_question} (100% stacked)"]
        )
        plot_title = title_and_colors[0]

        # blue palette in 5 shades
        blue_palette = sns.color_palette("Blues", n_colors=5)

        if len(title_and_colors) > 1:
            colors = title_and_colors[1:]
            ax = ct.plot(kind="bar", stacked=True, figsize=(10, 6), color=colors, width=0.9)
        else:
            ax = ct.plot(kind="bar", stacked=True, figsize=(10, 6), color=blue_palette, width=0.9)
        
        ax.set_title(plot_title)
        ax.set_ylabel("Percentage (%)")
        ax.set_xlabel(base_question)
        ax.set_ylim(0, 100)
        plt.xticks(rotation=45, ha="right")
        plt.legend(title=compare_question, bbox_to_anchor=(1.01, 1), loc="upper left")
        plt.tight_layout()
        plt.show()


In [None]:
# ==========================================
# SIGNIFICANCE TESTS for AI×AI crosstabs
# ==========================================

def cramers_v_corrected(chi2, ct):
    """Bias-corrected Cramér’s V (Bergsma, 2013)."""
    n = ct.values.sum()
    if n == 0:
        return np.nan
    r, k = ct.shape
    phi2 = chi2 / n
    if n > 1:
        phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
        rcorr = r - ((r - 1) ** 2) / (n - 1)
        kcorr = k - ((k - 1) ** 2) / (n - 1)
    else:
        phi2corr, rcorr, kcorr = np.nan, r, k
    denom = min(rcorr - 1, kcorr - 1)
    return np.sqrt(phi2corr / denom) if denom > 0 else np.nan

ai_ai_tests = []

# main loop
for base_question, compare_list in comparison_pairs_by_AI_questions.items():
    for compare_question in compare_list:
        try:
            # prepare paired data 
            data = build_pair_dataframe(df, base_question, compare_question)
            if data.empty:
                print(f"⚠️ No overlapping data for '{base_question}' × '{compare_question}'. Skipping.")
                continue

            # contingency table
            ct = pd.crosstab(data[base_question], data[compare_question])

            # ensure unique index/columns
            if not ct.columns.is_unique:
                ct = ct.T.groupby(level=0).sum().T
            if not ct.index.is_unique:
                ct = ct.groupby(level=0).sum()

            # order rows/cols 
            ct = order_rows_cols(ct, rows_key=base_question, cols_key=compare_question)

            # drop zero-sum rows/cols (safety)
            ct = ct.loc[ct.sum(axis=1) > 0, ct.sum(axis=0) > 0]
            if ct.shape[0] < 2 or ct.shape[1] < 2:
                print(f"⚠️ Contingency too small for '{base_question}' × '{compare_question}'. Skipping.")
                continue

            # Chi-square test
            chi2, p, dof, expected = chi2_contingency(ct.values, correction=False)
            expected_df = pd.DataFrame(expected, index=ct.index, columns=ct.columns)

            # effect size + assumption checks
            v = cramers_v_corrected(chi2, ct)
            min_exp = float(expected_df.values.min())
            prop_lt5 = float((expected_df.values < 5).mean())
            n_total = int(ct.values.sum())

            # console summary
            print(f"\n🧪 Significance: {base_question} × {compare_question}")
            print(f"χ²({dof}) = {chi2:.3f}, p = {p:.4f},  Cramér’s V = {v:.3f}")
            print(f"Assumptions: min(expected) = {min_exp:.2f}, %cells<5 = {100*prop_lt5:.1f}%")

            # collect results
            ai_ai_tests.append({
                "base_question": base_question,
                "compare_question": compare_question,
                "n": n_total,
                "rows": ct.shape[0],
                "cols": ct.shape[1],
                "chi2": chi2,
                "dof": dof,
                "p": p,
                "cramers_v": v,
                "min_expected": min_exp,
                "prop_expected_lt5": prop_lt5,
            })

        except Exception as e:
            print(f" Failed significance for {base_question} × {compare_question}: {e}")

# results dataframe
ai_ai_tests_df = (
    pd.DataFrame(
        ai_ai_tests,
        columns=["base_question","compare_question","n","rows","cols",
                 "chi2","dof","p","cramers_v","min_expected","prop_expected_lt5"]
    )
    .sort_values(["base_question","compare_question"])
    .reset_index(drop=True)
)

print("\n✅ AI×AI significance finished.")
print(f"Total tests: {len(ai_ai_tests_df)}")

# optional Excel export
EXPORT = True
EXPORT_PATH = os.path.join("..", "Data/test_results", "ai_ai_crosstabs_significance.xlsx")
if EXPORT:
    with pd.ExcelWriter(EXPORT_PATH, engine="xlsxwriter") as writer:
        ai_ai_tests_df.to_excel(writer, index=False, sheet_name="chi2_summary")
    print(f"💾 Exported to: {EXPORT_PATH}")



In [None]:
# ==========================================
# SPEARMAN TREND TESTS (ordinal × ordinal)
# ==========================================

from scipy.stats import spearmanr

# flexible mapping to numeric
def map_to_numeric_flexible(series: pd.Series, varname: str) -> pd.Series:
    s = series.astype(str)

    # project-specific likert mapping
    if 'likert_mapping' in globals() and varname in likert_mapping:
        mp = {str(k): v for k, v in likert_mapping[varname].items()}
        return s.map(mp)

    # project-specific category order
    if 'question_orders' in globals() and varname in question_orders:
        order = [str(x) for x in question_orders[varname]]
        mapper = {cat: i for i, cat in enumerate(order)}
        out = s.map(mapper)
        if out.notna().any():
            return out.astype(float)

    # orders
    orders = [
        ["Nie","Selten","Manchmal","Oft","Sehr oft"],
        ["Nie","Seltener","Etwa 1 Mal pro Woche","Mehrmals pro Woche","Täglich"],
        ["Gar nicht","Eher wenig","Teils/teils","Eher gut","Sehr gut"],
        ["Gar nicht verlässlich","Wenig verlässlich","Unsicher / Ich habe keine Meinung",
         "Teils/teils","Eher verlässlich","Sehr verlässlich"],
        ["Kein Verständnis","Schlechtes Verständnis","Mittelmässiges Verständnis",
         "Gutes Verständnis","Völliges Verständnis"],
        ["Stört mich sehr","Stört mich ein wenig","Neutral / Mir egal","Finde ich gut"],
    ]
    su = set(s.unique())
    for order in orders:
        if su.issubset(set(order)):
            mapper = {cat: i for i, cat in enumerate(order)}
            return s.map(mapper).astype(float)

    # fallback, numeric conversion
    return pd.to_numeric(series, errors="coerce")


# order categories
def make_ordered_group(series: pd.Series, varname: str) -> pd.Categorical:
    s = series.astype(str)
    uniq = sorted(s.unique())

    # project-specific order if available
    if 'question_orders' in globals() and varname in question_orders:
        desired = [str(x) for x in question_orders[varname]]
        cats = [c for c in desired if c in s.unique()] + [c for c in uniq if c not in desired]
    else:
        # try numeric sort, fallback alphabetic
        try:
            nums = sorted({float(x) for x in s.unique()})
            cats = [str(int(x)) if float(x).is_integer() else str(x) for x in nums]
        except Exception:
            cats = uniq
    return pd.Categorical(s, categories=cats, ordered=True)


#run Spearman test 
def run_spearman_pair(df_in: pd.DataFrame, x: str, y: str):
    # skip text MC
    if 'multiple_choice_questions' in globals():
        if (x in multiple_choice_questions) and "(Count)" not in x:
            print(f"Spearman skipped for {x} × {y}: '{x}' is MC-text, use (Count) column.")
            return None
        if (y in multiple_choice_questions) and "(Count)" not in y:
            print(f"Spearman skipped for {x} × {y}: '{y}' is MC-text, use (Count) column.")
            return None

    X_raw = df_in[x]
    Y_raw = df_in[y]

    # map to numeric
    X = map_to_numeric_flexible(X_raw, x)
    Y = map_to_numeric_flexible(Y_raw, y)

    # build data
    data = pd.DataFrame({x: X, y: Y, f"{x}__raw": X_raw, f"{y}__raw": Y_raw}).dropna()
    if data.empty:
        print(f"No data for Spearman {x} × {y}.")
        return None

    # Spearman test
    rho, p = spearmanr(data[x].values, data[y].values)

    # medians grouped by x
    x_lab = make_ordered_group(data[f"{x}__raw"], x)
    data["_x_lab"] = x_lab
    median_by_x = data.groupby("_x_lab", observed=True)[y].median().reset_index()
    median_by_x.columns = [x, f"median_{y}"]

    # medians grouped by y
    y_lab = make_ordered_group(data[f"{y}__raw"], y)
    data["_y_lab"] = y_lab
    median_by_y = data.groupby("_y_lab", observed=True)[x].median().reset_index()
    median_by_y.columns = [y, f"median_{x}"]

    res = {
        "x": x, "y": y,
        "n": int(len(data)),
        "rho": float(rho),
        "p": float(p),
        "direction": "positive" if rho > 0 else ("negative" if rho < 0 else "zero"),
        "median_by_x": median_by_x,
        "median_by_y": median_by_y,
    }
    print(f"Spearman {x} × {y}: ρ={rho:.3f}, p={p:.4g}, n={len(data)}")
    return res


# run multiple pairs
def run_spearman_pairs(df_in: pd.DataFrame, pairs, export_path=None):
    all_rows = []
    all_median_x = []
    all_median_y = []

    for (x, y) in pairs:
        try:
            out = run_spearman_pair(df_in, x, y)
            if out is None:
                continue
            all_rows.append({
                "x": out["x"], "y": out["y"],
                "n": out["n"], "rho": out["rho"], "p": out["p"],
                "direction": out["direction"]
            })
            tmpx = out["median_by_x"].copy(); tmpx["pair"] = f"{x} × {y}"
            tmpy = out["median_by_y"].copy(); tmpy["pair"] = f"{x} × {y}"
            all_median_x.append(tmpx); all_median_y.append(tmpy)
        except Exception as e:
            print(f"Spearman failed for {x} × {y}: {e}")

    # build outputs
    summary = (pd.DataFrame(all_rows, columns=["x","y","n","rho","p","direction"])
               .sort_values(["x","y"]).reset_index(drop=True))
    med_x = pd.concat(all_median_x, ignore_index=True) if all_median_x else pd.DataFrame()
    med_y = pd.concat(all_median_y, ignore_index=True) if all_median_y else pd.DataFrame()

    print("\n✅ Spearman run finished.")
    if len(summary):
        print(summary.to_string(index=False))
    else:
        print("No successful pairs.")

    # export results
    if export_path:
        with pd.ExcelWriter(export_path, engine="xlsxwriter") as writer:
            summary.to_excel(writer, index=False, sheet_name="spearman_summary")
            if not med_x.empty: med_x.to_excel(writer, index=False, sheet_name="medians_by_x")
            if not med_y.empty: med_y.to_excel(writer, index=False, sheet_name="medians_by_y")
        print(f"Exported to: {export_path}")

    return summary, med_x, med_y

spearman_pairs = [
    ("Use AI school and freetime", "Usefullness AI"),
    ("Use AI school and freetime", "Reliability AI"),
    ("Use AI school and freetime", "Mates using AI"),
    ("Use AI school and freetime", "Deal with AI"),
    ("Reliability AI", "Teachers preparing lessons"),
    ("Reliability AI", "Teachers giving grades"),
    ("Frequency use of AI_school", "Help of AI"),
    ("Frequency use of AI_school", "Reasons to use AI (Count)"),
    ("Frequency use of AI_school", "Purposes to use AI (Count)")
]

summary_df, medx_df, medy_df = run_spearman_pairs(
    df, spearman_pairs,
    export_path=os.path.join("..","Data/test_results","spearman_results.xlsx")
)



In [None]:
# ============================================================
# PAIRWISE 2×2 TESTS (auto Fisher or Chi-square)
# ============================================================

from itertools import combinations
from scipy.stats import fisher_exact


# helpers, fallbacks 

if 'build_pair_dataframe' not in globals():
    def build_pair_dataframe(df_in: pd.DataFrame, left: str, right: str) -> pd.DataFrame:
        """two-column frame, explode multi-choice, strip blanks."""
        left_is_multi  = left  in multiple_choice_questions
        right_is_multi = right in multiple_choice_questions
        tmp = df_in[[left, right]].dropna().copy()
        if left_is_multi:
            tmp[left] = tmp[left].astype(str).str.split(","); tmp = tmp.explode(left)
        else:
            tmp[left] = tmp[left].astype(str).str.strip()
        if right_is_multi:
            tmp[right] = tmp[right].astype(str).str.split(","); tmp = tmp.explode(right)
        else:
            tmp[right] = tmp[right].astype(str).str.strip()
        tmp[left]  = tmp[left].astype(str).str.strip()
        tmp[right] = tmp[right].astype(str).str.strip()
        tmp = tmp[(tmp[left] != "") & (tmp[right] != "")]
        return tmp

if 'order_rows_cols' not in globals():
    def order_rows_cols(ct: pd.DataFrame, rows_key: str, cols_key: str) -> pd.DataFrame:
        """Order rows/cols by project-defined order if available; else try numeric, else keep."""
        # columns
        if 'question_orders' in globals() and cols_key in question_orders:
            want = [v for v in question_orders[cols_key] if v in ct.columns]
            rest = [v for v in ct.columns if v not in want]
            ct = ct[want + rest] if want else ct
        else:
            try: ct = ct[sorted(ct.columns, key=lambda x: float(x))]
            except: pass
        # rows
        if 'question_orders' in globals() and rows_key in question_orders:
            want = [v for v in question_orders[rows_key] if v in ct.index]
            rest = [v for v in ct.index if v not in want]
            ct = ct.reindex(want + rest)
        else:
            try:
                ct.index = pd.to_numeric(ct.index); ct = ct.sort_index()
            except: pass
        return ct

def test_2x2(a, b, c, d, alternative="two-sided", prefer="auto", yates=False):
    """
    Perform a 2×2 test
    returns dict with method, p, chi2, dof, odds_ratio, min_expected
    skips when margins are degenerate
    """
    table = np.array([[int(a), int(b)], [int(c), int(d)]], dtype=int)

    # skip if any row/column sum is zero
    row_sums = table.sum(axis=1)
    col_sums = table.sum(axis=0)
    if (row_sums[0] == 0) or (row_sums[1] == 0) or (col_sums[0] == 0) or (col_sums[1] == 0):
        return {
            "method": "skip", "reason": "degenerate margins (zero row/column total)",
            "p": np.nan, "chi2": np.nan, "dof": 1, "odds_ratio": np.nan, "min_expected": 0.0,
            "table": table,
        }

    # expected counts
    N = table.sum()
    expected = np.outer(row_sums, col_sums) / N
    min_exp = expected.min()

    # fisher for low expectations, else Chi-square
    method = "chi2"
    if prefer == "fisher" or (prefer == "auto" and (min_exp < 5 or (table == 0).any())):
        method = "fisher"

    if method == "fisher":
        orat, p = fisher_exact(table, alternative=alternative)
        chi2_val, dof = np.nan, 1
        odds_ratio = float(orat) if np.isfinite(orat) else np.nan
    else:
        try:
            chi2_val, p, dof, _ = chi2_contingency(table, correction=yates)
        except Exception:
            # fallback to fisher if chi-square fails
            orat, p = fisher_exact(table, alternative=alternative)
            chi2_val, dof = np.nan, 1
            odds_ratio = float(orat) if np.isfinite(orat) else np.nan
            return {
                "method": "fisher", "p": float(p), "chi2": float(chi2_val), "dof": int(dof),
                "odds_ratio": float(odds_ratio), "min_expected": float(min_exp), "table": table,
            }
        # odds ratio correction
        a_, b_, c_, d_ = table.astype(float).ravel()
        if 0 in (a_, b_, c_, d_):
            a_, b_, c_, d_ = a_+0.5, b_+0.5, c_+0.5, d_+0.5
        odds_ratio = (a_ * d_) / (b_ * c_)

    return {
        "method": method, "p": float(p), "chi2": float(chi2_val), "dof": int(dof),
        "odds_ratio": float(odds_ratio), "min_expected": float(min_exp), "table": table,
    }

def pairwise_2x2_from_crosstab(ct: pd.DataFrame, row_cat: str,
                               prefer="auto", alternative="two-sided", yates=False):
    """Run pairwise 2×2 tests across all column groups"""
    if row_cat not in ct.index:
        raise ValueError(f"'{row_cat}' not in crosstab index")
    totals = ct.sum(axis=0)
    cols = ct.columns.tolist()
    rows = []
    for g1, g2 in combinations(cols, 2):
        a = int(ct.loc[row_cat, g1])    # hits in g1
        b = int(totals[g1] - a)         # non-hits in g1
        c = int(ct.loc[row_cat, g2])    # hits in g2
        d = int(totals[g2] - c)         # non-hits in g2
        res = test_2x2(a, b, c, d, alternative=alternative, prefer=prefer, yates=yates)
        rows.append({
            "row_cat": row_cat,
            "g1": g1, "g2": g2,
            "a": a, "b": b, "c": c, "d": d,
            "n_g1": int(totals[g1]), "n_g2": int(totals[g2]),
            "method": res["method"], "p": res["p"], "chi2": res["chi2"], "dof": res["dof"],
            "odds_ratio": res["odds_ratio"], "min_expected": res["min_expected"],
        })
    return pd.DataFrame(rows)

def pairwise_2x2_all_rows(ct: pd.DataFrame, prefer="auto", alternative="two-sided", yates=False):
    """Apply pairwise_2x2_from_crosstab to every row category"""
    out = []
    for rc in ct.index:
        df_rc = pairwise_2x2_from_crosstab(ct, rc, prefer=prefer, alternative=alternative, yates=yates)
        df_rc.insert(0, "row_cat_all", rc)
        out.append(df_rc)
    return pd.concat(out, ignore_index=True) if out else pd.DataFrame()

# build crosstabs + run pairwise 2×2 for each pair ----------
def run_pairwise_2x2(df_in: pd.DataFrame, pairs, prefer="auto", yates=False,
                     only_show_sig=True, alpha=0.05,
                     export=True, export_path=os.path.join("..","Data","pairwise_2x2_results.xlsx")):
    """build crosstabs for pairs, run pairwise 2×2, print/export results"""
    all_results = []
    with pd.ExcelWriter(export_path, engine="xlsxwriter") if export else nullcontext() as writer:
        for (left, right) in pairs:
            try:
                data = build_pair_dataframe(df_in, left, right)
                if data.empty:
                    print(f"No data for {left} × {right}. Skipping.")
                    continue

                ct = pd.crosstab(data[left], data[right])

                # collapse duplicate labels
                if not ct.columns.is_unique: ct = ct.T.groupby(level=0).sum().T
                if not ct.index.is_unique:   ct = ct.groupby(level=0).sum()

                ct = order_rows_cols(ct, rows_key=left, cols_key=right)

                # drop zero-sum columns
                ct = ct.loc[ct.sum(axis=1) > 0, ct.sum(axis=0) > 0]
                if ct.shape[0] < 1 or ct.shape[1] < 2:
                    print(f"Contingency too small for {left} × {right}. Skipping.")
                    continue

                res = pairwise_2x2_all_rows(ct, prefer=prefer, alternative="two-sided", yates=yates)
                if res.empty:
                    print(f"No pairwise results for {left} × {right}.")
                    continue
                res.insert(0, "left_var", left)
                res.insert(1, "right_var", right)
                all_results.append(res)

                # 
                show = res if not only_show_sig else res[res["p"] < alpha]
                print(f"\nPairwise 2×2: {left} × {right}  —  tests={len(res)}  (showing {'all' if not only_show_sig else f'p<{alpha}'})")
                if show.empty:
                    print("  (no significant pairs)")
                else:
                    print(show.sort_values("p").head(20).to_string(index=False))

                # export sheet per pair
                if export:
                    sheet_name = (left[:15] + "×" + right[:15]).replace("/", "_")
                    res.to_excel(writer, index=False, sheet_name=sheet_name)

            except Exception as e:
                print(f"Failed pairwise 2×2 for {left} × {right}: {e}")

    combined = pd.concat(all_results, ignore_index=True) if all_results else pd.DataFrame()
    if export:
        print(f"\nExported to: {export_path}")
    return combined

# Helper for 'with' when export=False
from contextlib import contextmanager
@contextmanager
def nullcontext():
    """Minimal no-op context manager"""
    yield None

# --------- 2x2 pairs ---------
pairwise_2x2_pairs = [
    ("Use AI school and freetime", "Concerns AI"),
    ("Used AI", "Reliability AI")
]

# --------- Run ---------
pairwise_results = run_pairwise_2x2(
    df, pairwise_2x2_pairs,
    prefer="auto",   #fisher or chi2
    yates=False,     
    only_show_sig=True,
    alpha=0.05,
    export=True,
    export_path=os.path.join("..","Data/test_results","pairwise_2x2_results.xlsx")
)


In [None]:
# individual graph

usage = ["Daily", "Several times a week", "About once a week", "Rarely", "Never"]

df_under = pd.DataFrame([
    [8,14,22,10,2],   # Daily
    [7,21,29,7,4],    # Several times a week
    [3,6,19,10,0],    # About once a week
    [0,12,15,3,1],    # Rarely
    [0,5,3,2,0],      # Never
], index=usage, columns=["Very good","Rather good","Neither good nor bad","Rather little","Not at all"])

df_deal = pd.DataFrame([
    [24,26,5,1,0],
    [15,35,18,0,0],
    [6,16,15,1,0],
    [2,12,12,4,1],
    [0,6,3,1,0],
], index=usage, columns=["Very good","Rather good","Neither good nor bad","Rather poor","Poor"])

#  Likert mapping to scores (1–5)
map_under = {
    "Very good": 5, "Rather good": 4, "Neither good nor bad": 3,
    "Rather little": 2, "Not at all": 1
}
map_deal  = {
    "Very good": 5, "Rather good": 4, "Neither good nor bad": 3,
    "Rather poor": 2, "Poor": 1
}

def mean_score(df, mapping):
    scores = pd.Series(mapping)
    return (df.mul(scores, axis=1).sum(axis=1) / df.sum(axis=1))

mean_under = mean_score(df_under, map_under)   # Understanding
mean_deal  = mean_score(df_deal,  map_deal)    # Dealing with AI

# Combine + plot 
means = pd.DataFrame({
    "Understanding": mean_under,
    "Dealing with AI": mean_deal
}, index=usage)

ax = means.plot(kind="bar", figsize=(9,4), width=0.8)

ax.set_title("Mean score (1–5): Understanding vs. Dealing with AI by usage", fontsize=14)
ax.set_ylabel("Mean score (1–5)", fontsize=12)
ax.set_xlabel("AI usage (school + free time)", fontsize=12)
ax.set_ylim(1, 5)
plt.xticks(rotation=0, fontsize=11)
plt.yticks(fontsize=11)
plt.grid(False)
ax.legend(title=None, fontsize=11)

for p in ax.patches:
    h = p.get_height()
    ax.annotate(f"{h:.2f}", (p.get_x() + p.get_width()/2, h),
                ha="center", va="bottom", fontsize=10, xytext=(0,2), textcoords="offset points")

plt.tight_layout()
plt.show()

print(means.round(2))




In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

# INDIVIDUAL GRAPH ---------------------

# data by hand
df_de = pd.DataFrame(
    {
        "Sehr gut":       [2, 5, 18, 7],
        "Eher gut":       [13, 33, 37, 6],
        "Neutral":        [13, 22, 14, 0],
        "Eher schlecht":  [9,  2,  1,  0],
    },
    index=["Selten", "Manchmal", "Häufig", "Immer"]
)

# map english conotation
col_map = {
    "Sehr gut": "Very good",
    "Eher gut": "Rather good",
    "Neutral": "Neutral",
    "Eher schlecht": "Rather poor",
}
index_map = {
    "Selten": "Rarely",
    "Manchmal": "Sometimes",
    "Häufig": "Often",
    "Immer": "Always",
}

df = df_de.rename(columns=col_map).rename(index=index_map)

# make order
order = ["Always", "Often", "Sometimes", "Rarely"]
df = df.reindex(order)

# colours
colors = ["#08519c", "#3182bd", "#6baed6", "#c6dbef"]  # Very good ... Rather poor

# plot
ax = df.plot(
    kind="bar",
    stacked=True,
    figsize=(10, 6),
    width=0.85,
    color=colors
)

ax.set_title("Helpfulness of AI tools by frequency of AI usage in school context", fontsize=14, weight="bold")
ax.set_xlabel("Frequency of AI usage in school context")
ax.set_ylabel("Count")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
plt.xticks(rotation=20, ha="right")

# legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], title="Help of AI",
          loc="upper right", framealpha=0.9)

plt.tight_layout()
plt.show()




In [None]:
import re
import pandas as pd
import matplotlib.pyplot as plt

# configuration
COL_FREQ     = "Frequency use of AI_school"
COL_REASONS  = "Reasons to use AI"     
COL_PURPOSES = "Purposes to use AI"    

# helper
def count_multiselect_unique(val):
    """Zählt eindeutige, nicht-leere Auswahloptionen (Komma/Semikolon-getrennt)."""
    if pd.isna(val):
        return 0
    if isinstance(val, list):
        tokens = [str(x).strip() for x in val]
    else:
        tokens = re.split(r"\s*[;,]\s*", str(val))
        tokens = [t.strip() for t in tokens]
    tokens = [t for t in tokens if t]
    return len(set(tokens))

def pick_freq_order(index_values):
    #order
    order = [
        ["Always", "Often", "Sometimes", "Rarely", "Never"],
        ["Daily", "Several times a week", "About once a week", "Rarely", "Never"],
        ["Immer", "Häufig", "Manchmal", "Selten", "Nie"],
        ["Täglich","Mehrmals pro Woche","Etwa 1 Mal pro Woche","Seltener","Nie"]
    ]
    present = set(index_values)
    best = None
    best_hits = -1
    for cand in order:
        hits = sum(1 for c in cand if c in present)
        if hits > best_hits:
            best, best_hits = cand, hits
    if best_hits >= 2:
        # use preferred order
        return [x for x in best if x in present]
    # fallback
    return list(index_values)

def make_mean_count_chart(df_in, count_col, freq_col, title):
    agg = (
        df_in.groupby(freq_col)[count_col]
             .agg(mean="mean", n="count")
             .sort_index()
    )
    order = pick_freq_order(agg.index)
    agg = agg.reindex(order)

    # plot
    ax = agg["mean"].plot(kind="bar", figsize=(9,4), width=0.8, color="cornflowerblue")
    ax.set_title(title, fontsize=16, weight="bold")
    ax.set_xlabel("AI usage frequency", fontsize=12)
    ax.set_ylabel("Mean number of selections", fontsize=12)
    plt.xticks(rotation=0)
    plt.yticks(fontsize=11)
    plt.grid(False)

    # n per bar
    for p, n in zip(ax.patches, agg["n"].astype(int).values):
        ax.annotate(f"n={n}",
                    (p.get_x() + p.get_width()/2, p.get_height()),
                    ha="center", va="bottom", fontsize=9, xytext=(0,3),
                    textcoords="offset points")

    plt.tight_layout()
    plt.show()

    # print table
    print("\nTable –", title)
    print(agg.round({"mean":2}))

# calculation
df["reasons_count"]  = df[COL_REASONS].apply(count_multiselect_unique)
df["purposes_count"] = df[COL_PURPOSES].apply(count_multiselect_unique)

# 
sub_rea = df.dropna(subset=[COL_FREQ]).copy()
sub_pur = df.dropna(subset=[COL_FREQ]).copy()

# graph
make_mean_count_chart(
    sub_rea, "reasons_count", COL_FREQ,
    "Mean number of selected reasons by AI usage frequency"
)

make_mean_count_chart(
    sub_pur, "purposes_count", COL_FREQ,
    "Mean number of selected purposes by AI usage frequency"
)


In [None]:
import re, unicodedata
import pandas as pd
import matplotlib.pyplot as plt

# -------------------- CONFIG --------------------
COL_FREQ     = "Frequency use of AI_school"    
COL_REASONS  = "Reasons to use AI"      
COL_PURPOSES = "Purposes to use AI"     

# -------------------- Helpers --------------------
def _nf(s: str) -> str:
    """Lowercase + ASCII-Folding (ä->a, ö->o, ß->ss) + nur a-z0-9."""
    s = str(s).strip().lower()
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    return re.sub(r"[^a-z0-9]+", "", s)

FREQ_MAP = {
    "immer": "Always",
    "haufig": "Often",
    "manchmal": "Sometimes",
    "selten": "Rarely",
    "nie": None,
}

def canon_freq(x):
    if pd.isna(x):
        return None
    k = _nf(x)
    return FREQ_MAP.get(k, None)  # unbekannte Labels ausschließen

def count_multiselect_unique(val):
    """Zählt eindeutige, nicht-leere Auswahloptionen (Komma/Semikolon-getrennt)."""
    if pd.isna(val):
        return 0
    if isinstance(val, list):
        tokens = [str(x).strip() for x in val]
    else:
        tokens = re.split(r"\s*[;,]\s*", str(val))
        tokens = [t.strip() for t in tokens]
    tokens = [t for t in tokens if t]
    return len(set(tokens))

def make_mean_count_chart(df_in, count_col, freq_col, title):
    ORDER = ["Always", "Often", "Sometimes", "Rarely"]
    sub = df_in.dropna(subset=[freq_col]).copy()
    agg = (
        sub.groupby(freq_col)[count_col]
           .agg(mean="mean", n="count")
           .reindex(ORDER)
    )

    # Plot
    ax = agg["mean"].plot(kind="bar", figsize=(9,4), width=0.8, color="cornflowerblue")
    ax.set_title(title, fontsize=16, weight="bold")
    ax.set_xlabel("AI usage frequency", fontsize=12)
    ax.set_ylabel("Mean number of selections", fontsize=12)
    plt.xticks(rotation=0)
    plt.yticks(fontsize=11)
    plt.grid(False)

    # n-Labels
    for p, n in zip(ax.patches, agg["n"].fillna(0).astype(int).values):
        ax.annotate(f"n={n}", (p.get_x() + p.get_width()/2, p.get_height()),
                    ha="center", va="bottom", fontsize=9, xytext=(0,3),
                    textcoords="offset points")

    plt.tight_layout()
    plt.show()

    # Tabelle
    print("\nTable –", title)
    print(agg.round({"mean": 2}))

# -------------------- Berechnung --------------------
df["freq4_en"] = df[COL_FREQ].apply(canon_freq)

df["reasons_count"]  = df[COL_REASONS].apply(count_multiselect_unique)
df["purposes_count"] = df[COL_PURPOSES].apply(count_multiselect_unique)

make_mean_count_chart(
    df, "reasons_count", "freq4_en",
    "Mean number of selected reasons by AI usage frequency"
)

make_mean_count_chart(
    df, "purposes_count", "freq4_en",
    "Mean number of selected purposes by AI usage frequency"
)


In [None]:
#USEFULNESS X AI USAGE

# ---- Fill in the total respondents per usage group (counts) ----
totals = {
    "Daily": 56,                   # <-- PUT TOTAL HERE
    "Several times per week": 68,  # <-- PUT TOTAL HERE
    "About once per week": 38,     # <-- PUT TOTAL HERE
    "Rarely": 31                   # <-- PUT TOTAL HERE
}

# ---- Your percentage table (from the message) mapped to English labels ----
percent = pd.DataFrame(
    {
        "About once per week": [7.9, 55.3, 31.6, 5.3, 0.0],
        "Several times per week": [29.4, 42.6, 27.9, 0.0, 0.0],
        "Rarely": [0.0, 32.3, 61.3, 6.5, 0.0],
        "Daily": [75.0, 23.2, 1.8, 0.0, 0.0],
    },
    index=["Very useful", "Quite useful", "Neutral", "Slightly useful", "Not useful at all"]
)

# Reorder x-axis as requested
order_x = ["Daily", "Several times per week", "About once per week", "Rarely"]
percent = percent[order_x]

# ---- Convert percentages to counts using 'totals' ----
totals_vec = pd.Series(totals)[order_x].astype(float)
counts = (percent * totals_vec.values).div(100)
counts = counts.round(0).astype(int)  # round to integers

# ---- Stack order (dark → light) ----
order_stack = ["Very useful", "Quite useful", "Neutral", "Slightly useful", "Not useful at all"]
counts = counts.loc[order_stack]

# ---- Colors: high = dark blue (bottom), low = light blue (top) ----
colors = {
    "Very useful": "#08306b",
    "Quite useful": "#2171b5",
    "Neutral": "#59a3ce",
    "Slightly useful": "#83b3e2",
    "Not useful at all": "#c4d9ee",
}

# ---- Plot counts (stacked) ----
ax = counts.T.plot(
    kind="bar",
    stacked=True,
    figsize=(10, 6),
    color=[colors[k] for k in order_stack],
    width=0.9
)

ax.set_title("Usefulness of AI by AI usage (school + freetime)", fontsize=16, fontweight="bold")
ax.set_ylabel("Number of respondents", fontsize=14, fontweight="bold")
ax.set_xlabel("AI usage (school + freetime)", fontsize=14, fontweight="bold")
plt.xticks(rotation=0, fontsize=12)
plt.yticks(fontsize=12)

# Legend inside, ordered so dark (Very useful) at the bottom
handles, labels = ax.get_legend_handles_labels()
lookup = {lab: h for h, lab in zip(handles, labels)}
legend_display = order_stack[::-1]  # show top→bottom: light→dark
ax.legend([lookup[l] for l in legend_display], legend_display,
          title="Usefulness", loc="upper right",
          fontsize=12, title_fontsize=13, framealpha=0.9)

plt.tight_layout()
plt.show()



In [None]:
# CONCERNS X AI USAGE

# ----- Daten (Counts) -----
data = {
    "Daily": [18, 31, 7],
    "Several times per week": [32, 23, 13],
    "About once per week": [12, 10, 16],
    "Rarely": [14, 6, 11],
    "Never": [3, 4, 3],
}
stack_order = ["Yes", "No", "Never thought about it"]  # ursprüngliche Reihenfolge
df = pd.DataFrame(data, index=stack_order)

# ----- Plot (gestapelt) -----
colors = {
    "Yes": "green",
    "No": "red",
    "Never thought about it": "#4A90E2"
}

ax = df.T.plot(
    kind="bar", stacked=True, figsize=(10, 6),
    color=[colors[k] for k in stack_order], width=0.9
)

# Titel & Achsen
ax.set_title("Concern about AI by AI usage (school + free time)", fontsize=16, fontweight="bold")
ax.set_ylabel("Number of respondents", fontsize=14, fontweight="bold")
ax.set_xlabel("AI usage (school + free time)", fontsize=14, fontweight="bold")

# x-Ticks leicht schräg
plt.xticks(rotation=20, ha="right", fontsize=12)
plt.yticks(fontsize=12)

# --- Legende invertiert (hell oben → dunkel unten) ---
handles, labels = ax.get_legend_handles_labels()
legend_display = stack_order[::-1]  # invertiert
lookup = {lab: h for lab, h in zip(labels, handles)}
ax.legend([lookup[l] for l in legend_display], legend_display,
          title="Concerns", loc="upper right",
          fontsize=12, title_fontsize=13, framealpha=0.9)

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Eingabetabelle (Counts) ---
data = {
    "Stört mich sehr":               [2, 6, 8, 3, 3, 0],
    "Stört mich ein wenig":          [0, 17, 26, 5, 0, 0],
    "Neutral / Mir egal":            [6, 50, 47, 7, 0, 5],
    "Finde ich gut":                 [2, 8, 4, 0, 0, 0],
    "Finde ich sehr gut":            [0, 3, 1, 0, 0, 0],
}
rows = [
    "Sehr verlässlich",
    "Eher verlässlich",
    "Teils/teils",
    "Wenig verlässlich",
    "Gar nicht verlässlich",
    "Unsicher / Ich habe keine Meinung",
]
ct = pd.DataFrame(data, index=rows)

# --- Mapping Reliability -> Score (1..5); "Unsure" wird ausgeschlossen ---
score_map = {
    "Gar nicht verlässlich": 1,
    "Wenig verlässlich":     2,
    "Teils/teils":           3,
    "Eher verlässlich":      4,
    "Sehr verlässlich":      5,
}
reliability_rows = list(score_map.keys())

# nur Zeilen mit Scores nehmen
ct_scored = ct.loc[reliability_rows]

# n je Haltung (ohne Unsure)
n_per_att = ct_scored.sum(axis=0)

# gewichtete Mittelwerte je Haltung
weights = pd.Series(score_map)
mean_per_att = (ct_scored.mul(weights, axis=0).sum(axis=0) / n_per_att).astype(float)

# X-Achse englisch + in gewünschter Reihenfolge
x_map = {
    "Finde ich sehr gut":   "I really like it",
    "Finde ich gut":        "I like it",
    "Neutral / Mir egal":   "Neutral / I don't care",
    "Stört mich ein wenig": "Bothers me a bit",
    "Stört mich sehr":      "Bothers me a lot",
}
order_x = ["I really like it", "I like it", "Neutral / I don't care", "Bothers me a bit", "Bothers me a lot"]

mean_en = mean_per_att.rename(index=x_map).reindex(order_x)
n_en = n_per_att.rename(index=x_map).reindex(order_x)

# --- Plot wie im Screenshot ---
fig, ax = plt.subplots(figsize=(12, 4.5))

bars = ax.bar(mean_en.index, mean_en.values, color="#1f77b4", edgecolor="black")

# n über die Balken schreiben
for bar, n in zip(bars, n_en.values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
            f"n={int(n)}", ha="center", va="bottom", fontsize=11, fontweight="bold")

# Titel (zweizeilig wie im Beispiel)
ax.set_title("Mean Score (1–5): Reliability by approval of teachers using AI tools to prepare lessons",
             fontsize=16, fontweight="bold", pad=10)

ax.set_ylabel("Mean score (1–5)", fontsize=13, fontweight="bold")
ax.set_xlabel("Attitude", fontsize=13, fontweight="bold")

# y-Achse auf 1..5 „einrasten“ lassen
ax.set_ylim(0.9, 5.1)
ax.set_yticks([1,2,3,4,5])
ax.tick_params(axis="y", labelsize=12)

# x-Labels schräg wie im Screenshot
plt.xticks(rotation=35, ha="right", fontsize=12)

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# --- Eingabetabelle (Counts) ---
data = {
    "Stört mich sehr":               [3, 34, 56, 11, 3, 1],
    "Stört mich ein wenig":          [2, 26, 19, 3, 0, 1],
    "Neutral / Mir egal":            [2, 19, 9, 1, 0, 3],
    "Finde ich gut":                 [3, 3, 1, 0, 0, 0],
    "Finde ich sehr gut":            [0, 2, 1, 0, 0, 0],
}
rows = [
    "Sehr verlässlich",
    "Eher verlässlich",
    "Teils/teils",
    "Wenig verlässlich",
    "Gar nicht verlässlich",
    "Unsicher / Ich habe keine Meinung",
]
ct = pd.DataFrame(data, index=rows)

# --- Mapping Reliability -> Score (1..5); "Unsure" ausschließen ---
score_map = {
    "Gar nicht verlässlich": 1,
    "Wenig verlässlich":     2,
    "Teils/teils":           3,
    "Eher verlässlich":      4,
    "Sehr verlässlich":      5,
}
reliability_rows = list(score_map.keys())
ct_scored = ct.loc[reliability_rows]

# n je Haltung (ohne Unsure)
n_per_att = ct_scored.sum(axis=0)

# gewichtete Mittelwerte je Haltung
weights = pd.Series(score_map)
mean_per_att = (ct_scored.mul(weights, axis=0).sum(axis=0) / n_per_att).astype(float)

# X-Achse englisch + Reihenfolge
x_map = {
    "Finde ich sehr gut":   "I really like it",
    "Finde ich gut":        "I like it",
    "Neutral / Mir egal":   "Neutral / I don't care",
    "Stört mich ein wenig": "Bothers me a bit",
    "Stört mich sehr":      "Bothers me a lot",
}
order_x = ["I really like it", "I like it", "Neutral / I don't care", "Bothers me a bit", "Bothers me a lot"]

mean_en = mean_per_att.rename(index=x_map).reindex(order_x)
n_en = n_per_att.rename(index=x_map).reindex(order_x)

# --- Plot ---
fig, ax = plt.subplots(figsize=(12, 4.5))

bars = ax.bar(mean_en.index, mean_en.values, color="#1f77b4", edgecolor="black")

# n über den Balken
for bar, n in zip(bars, n_en.values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
            f"n={int(n)}", ha="center", va="bottom", fontsize=11, fontweight="bold")

ax.set_title("Mean Score (1–5): Reliability by approval of teachers using AI tools to give grades",
             fontsize=16, fontweight="bold", pad=10)
ax.set_ylabel("Mean score (1–5)", fontsize=13, fontweight="bold")
ax.set_xlabel("Attitude", fontsize=13, fontweight="bold")

ax.set_ylim(0.9, 5.1)
ax.set_yticks([1, 2, 3, 4, 5])
ax.tick_params(axis="y", labelsize=12)

plt.xticks(rotation=35, ha="right", fontsize=12)

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# --- Eingabetabellen ---
reasons_df = pd.DataFrame(
    {
        1: [20, 18, 3, 1],
        2: [12, 15, 15, 4],
        3: [3, 14, 33, 4],
        4: [1, 7, 13, 2],
        5: [0, 4, 6, 2],
    },
    index=["Rarely", "Sometimes", "Often", "Always"]
)

purposes_df = pd.DataFrame(
    {
        1: [11, 8, 1, 2],
        2: [11, 14, 5, 1],
        3: [7, 20, 22, 1],
        4: [5, 14, 16, 6],
        5: [2, 3, 15, 2],
        6: [1, 2, 6, 1],
        7: [0, 1, 5, 0],
    },
    index=["Rarely", "Sometimes", "Often", "Always"]
)

# --- Reorder index so Always is first ---
order_x = ["Always", "Often", "Sometimes", "Rarely"]
reasons_df = reasons_df.reindex(order_x)
purposes_df = purposes_df.reindex(order_x)

# --- Funktion zur Berechnung von Mean Scores ---
def compute_mean_scores(df):
    total = df.sum(axis=1)
    mean_score = (df.mul(df.columns, axis=1).sum(axis=1) / total).round(2)
    return mean_score, total

# --- Plotfunktion ---
def plot_mean(mean_score, total, title):
    fig, ax = plt.subplots(figsize=(9, 5))
    bars = ax.bar(mean_score.index, mean_score.values, color="#1f77b4", edgecolor="black")

    # n über Balken schreiben
    for bar, n in zip(bars, total.values):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
                f"n={int(n)}", ha="center", va="bottom", fontsize=11, fontweight="bold")

    ax.set_title(title, fontsize=15, fontweight="bold", pad=12)
    ax.set_ylabel("Mean number of selections", fontsize=13, fontweight="bold")
    ax.set_xlabel("Frequency of AI usage", fontsize=13, fontweight="bold")
    ax.tick_params(axis="x", labelsize=12)
    ax.tick_params(axis="y", labelsize=12)

    plt.ylim(0, max(mean_score.values) + 1)
    plt.tight_layout()
    plt.show()

# --- Reasons → Purposes ---
mean_reasons, n_reasons = compute_mean_scores(reasons_df)
plot_mean(mean_reasons, n_reasons, "Mean selection of purposes to use AI by AI usage frequency")

# --- Purposes → Motivations ---
mean_purposes, n_purposes = compute_mean_scores(purposes_df)
plot_mean(mean_purposes, n_purposes, "Mean selection of motivations to use AI by AI usage frequency")

