In [None]:
# ==========================================
# SETUP BLOCK 
# ==========================================

import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ---- Imports from project files ----
sys.path.append(os.path.abspath(".."))
from Helper_funtions import (
    clean_up_subjects,
    calculate_true_false_score,
    calculate_Internet_terms_understanding_score,
    group_internet_understanding,
    analyze_distribution, 
    analyze_subject_distribution
)
from lists import (
    demographic_columns,
    multiple_choice_questions,
    single_choice_questions,
    likert_questions,
    likert_mapping
)
from answer_categories import question_orders

# ---- General plot style ----
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 5)

# ---- Data loading ----
DATA_FILE = os.path.join("..", "Data", "Fertige Tabelle.xlsx")
df = pd.read_excel(DATA_FILE)
df.columns = df.columns.str.strip()

# Clean up multi-subject columns
for col in ["Most used subjects", "Preferred Subjects", "Least preferred Subjects"]:
    if col in df.columns:
        df = clean_up_subjects(df, col)

# Calculate additional scores
if all(q in df.columns for q in ["True/False_1", "True/False_2"]):
    df = calculate_true_false_score(df)

if any(col.startswith("Internet terms_") for col in df.columns):
    df = calculate_Internet_terms_understanding_score(df)
    df = group_internet_understanding(df)

print("✅ Setup complete – DataFrame loaded and preprocessed")
print(f"Rows: {len(df)}, Columns: {len(df.columns)}")


In [None]:
# --------- Demographic Analysis, table form ---------

# Gender
analyze_distribution(df, "Gender", "Gender Distribution")

# Age
analyze_distribution(df, "Age", "Age Distribution")

# Education Level
analyze_distribution(df, "Education Level", "Students’ Education Level")

# Hours per week
analyze_distribution(df, "Hours per week for school", "Hours per week distribution")

# Parents' Education Levels
for parent_col in ["Educational Level parent_1", "Educational Level parent_2"]:
    analyze_distribution(df, parent_col, f"Parental Education – {parent_col}")

# CRT Score
analyze_distribution(df, "CRT_points", "Distribution of CRT Score")


# Favorite subjects
analyze_subject_distribution(df, "Preferred Subjects")

# Least favorite subjects
analyze_subject_distribution(df, "Least preferred Subjects")


In [None]:
# ------- Distribution for demographics with graphs and tables -------

# Add helper function path
sys.path.append(os.path.abspath(".."))
from Helper_funtions import analyze_subject_distribution_changed, analyze_distribution_changed

# Function to show both plot and table
def show_distribution_with_plot(df, column, title=None):
    result = analyze_distribution_changed(df, column, return_df=True)

    if result is not None:
        result.plot(kind="bar", y="Percentage", legend=False, title=title or column)
        plt.ylabel("Percentage")
        plt.xlabel(column)
        plt.xticks(rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        
        # print table
        print(f"\n🔹 Tabelle für: {title or column}")
        print(result.to_string())

# ---- Demographic Analysis ----

show_distribution_with_plot(df, "Gender", "Gender Distribution")
show_distribution_with_plot(df, "Age", "Age Distribution")
show_distribution_with_plot(df, "Education Level", "Students’ Education Level")
show_distribution_with_plot(df, "Hours per week for school", "Hours per Week Distribution")

for parent_col in ["Educational Level parent_1", "Educational Level parent_2"]:
    show_distribution_with_plot(df, parent_col, f"Parental Education – {parent_col}")

show_distribution_with_plot(df, "CRT_points", "CRT Score Distribution")

# ---- Subject Preferences ----

def show_subject_distribution_with_plot(df, column, title=None):
    result = analyze_subject_distribution_changed(df, column, return_df=True)

    if result is not None:
        result.plot(kind="bar", y="Percentage of respondents", legend=False, title=title or column)
        plt.ylabel("Percentage of respondents")
        plt.xlabel(column)
        plt.xticks(rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        
        print(f"\n🔹 Tabelle für: {title or column}")
        print(result.to_string())

show_subject_distribution_with_plot(df, "Preferred Subjects", "Favorite Subjects")
show_subject_distribution_with_plot(df, "Least preferred Subjects", "Least Favorite Subjects")
