In [None]:
# ==========================================
# SETUP BLOCK
# ==========================================

import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(os.path.abspath(".."))

from Helper_functions import (
    clean_up_subjects,
    calculate_true_false_score,
    calculate_internet_terms_understanding_score,
    group_internet_understanding,
    analyze_distribution,
    analyze_subject_distribution
)

from lists import (
    demographic_columns,
    multiple_choice_questions,
    single_choice_questions
)

from answer_categories import COLUMN_ALIASES

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 5)

DATA_FILE = os.path.join("..", "Data", "Fertige Tabelle.xlsx")
df = pd.read_excel(DATA_FILE)
df.columns = df.columns.astype(str).str.strip()

df = df.rename(columns=COLUMN_ALIASES)

for col in ["Most used subjects", "Preferred Subjects", "Least preferred Subjects"]:
    if col in df.columns:
        df = clean_up_subjects(df, col)

true_false_cols = [f"True/False_{i}" for i in range(1, 7)]
if all(c in df.columns for c in true_false_cols):
    df = calculate_true_false_score(df)

if any(c.startswith("Internet terms_") for c in df.columns):
    df = calculate_internet_terms_understanding_score(df)
    df = group_internet_understanding(df)

print("Setup complete – DataFrame loaded and preprocessed")
print(f"Rows: {len(df)}, Columns: {len(df.columns)}")


In [None]:
# --------- Shows distribution in table form -----------

non_demo_single = [c for c in single_choice_questions if c not in demographic_columns and c in df.columns]
non_demo_multi = [c for c in multiple_choice_questions if c not in demographic_columns and c in df.columns]

for column in non_demo_single:
    analyze_distribution(df, column)

for column in non_demo_multi:
    analyze_subject_distribution(df, column)


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

os.makedirs("figures", exist_ok=True)

# Daten
data = {
    "ChatGPT": 184,
    "Gemini (Google)": 36,
    "Microsoft Copilot": 28,
    "Deep Seek": 21,
    "Meta AI (WhatsApp)": 18,
    "My AI (Snapchat)": 12,
    "Perplexity": 4,
    "Claude AI": 4,
    "GitHub Copilot": 2,
    "Gamma AI": 1,
    "Character.ai": 1,
    "Chai AI": 1,
    "Hausaufgaben Hans": 1,
    "Aria (Opera Browser AI)": 1,
    "Duck.ai": 1,
    "Grok (Twitter)": 1,
}

# Anzahl der Befragten, die diese Frage beantwortet haben (bei dir 184 = 95.3% -> N = 193)
N = 193

df_plot = pd.DataFrame.from_dict(data, orient="index", columns=["Count"])

# Alle Plattformen mit Count == 1 zusammenfassen
count_one_sum = int(df_plot.loc[df_plot["Count"] == 1, "Count"].sum())
df_plot = df_plot[df_plot["Count"] != 1]

if count_one_sum > 0:
    df_plot.loc["Other platforms (single mentions)"] = count_one_sum

# Prozent berechnen
df_plot["Percent"] = (df_plot["Count"] / N * 100).round(1)

# Sortieren für horizontale Balken (klein -> groß)
df_plot = df_plot.sort_values("Count", ascending=True)

# Farbskala (Blautöne)
colors = plt.cm.Blues(
    [0.35 + i * 0.6 / len(df_plot) for i in range(len(df_plot))]
)

fig, ax = plt.subplots(figsize=(10, 7))

bars = ax.barh(df_plot.index, df_plot["Count"], color=colors)

# Prozentlabels am Ende der Balken
for bar, pct in zip(bars, df_plot["Percent"]):
    ax.text(
        bar.get_width() + 1,
        bar.get_y() + bar.get_height() / 2,
        f"{pct}%",
        va="center",
        fontsize=10
    )

ax.set_title("AI platforms used by students", fontsize=16, fontweight="bold")
ax.set_xlabel("Number of respondents")
ax.set_ylabel("AI platform")

fig.tight_layout()
fig.savefig("figures/ai_platforms_used_grouped_once.pdf", bbox_inches="tight")
plt.show()
plt.close(fig)
