In [None]:
import streamlit as st
import pandas as pd

# --- Streamlit App Title ---
st.set_page_config(page_title="Consistency Checker", layout="wide")
st.title("📊 Questionnaire Logic Checker")

# --- File Uploads ---
st.markdown("Upload your questionnaire responses and optionally a benchmark dataset from last year.")
uploaded_file = st.file_uploader("📁 Upload response CSV", type=["csv"], key="responses")
benchmark_file = st.file_uploader("📁 Upload benchmark CSV (optional)", type=["csv"], key="benchmark")

# --- Tolerance Margin ---
margin = st.slider("⚙️ Select tolerance margin (±)", 0, 2, 1)

# --- Define Logic Consistency Rules ---
rules = {
    ("Q1_Satisfaction", "Q2_Preference"): {
        5: [4, 5], 4: [3, 4, 5], 3: [2, 3, 4], 2: [1, 2, 3], 1: [1, 2]
    },
    ("Q1_Satisfaction", "Q3_Recommend"): {
        5: [4, 5], 4: [3, 4, 5], 3: [2, 3, 4], 2: [1, 2, 3], 1: [1, 2]
    },
    ("Q2_Preference", "Q4_Repurchase"): {
        5: [4, 5], 4: [3, 4, 5], 3: [2, 3, 4], 2: [1, 2, 3], 1: [1, 2]
    },
}

# --- Optional: Benchmark Thresholds ---
benchmarks = {}
if benchmark_file:
    benchmark_df = pd.read_csv(benchmark_file)
    benchmarks = benchmark_df.mean(numeric_only=True).to_dict()

# --- Consistency & Benchmark Checker ---
def check_row(row, rules, margin, benchmarks):
    inconsistencies = []
    recommendations = []
    benchmark_flags = []
    risk_score = 0

    # --- Logic checks ---
    for (q1, q2), mapping in rules.items():
        if q1 not in row or q2 not in row:
            continue

        q1_val = row[q1]
        q2_val = row[q2]
        valid = mapping.get(q1_val, [])
        margin_range = list(range(max(1, min(valid) - margin), min(5, max(valid) + margin) + 1))

        if q2_val not in margin_range:
            inconsistencies.append(f"{q1}={q1_val} vs {q2}={q2_val}")
            closest = min(valid, key=lambda x: abs(x - q2_val)) if valid else q2_val
            recommendations.append(f"Adjust {q2} to {closest}")
            risk_score += 1
        else:
            recommendations.append(f"{q2} OK for {q1}={q1_val}")

    # --- Benchmark checks ---
    for col, target in benchmarks.items():
        if col in row:
            actual = row[col]
            deviation = abs(actual - target)
            if deviation > 0.5:  # tolerance threshold can be dynamic
                benchmark_flags.append(f"{col}: {actual} vs {target:.2f}")
                risk_score += 1

    return pd.Series({
        "Consistency_Check": " | ".join(inconsistencies) if inconsistencies else "Consistent",
        "Recommendations": " | ".join(recommendations),
        "Benchmark_Flags": " | ".join(benchmark_flags) if benchmark_flags else "OK",
        "Risk_Score": risk_score
    })

# --- Run the analysis ---
if uploaded_file:
    df = pd.read_csv(uploaded_file)

    with st.spinner("🔍 Checking consistency and benchmarks..."):
        results = df.apply(check_row, axis=1, args=(rules, margin, benchmarks))
        df_result = pd.concat([df, results], axis=1)

    st.success("✅ Check completed!")
    st.subheader("📋 Results Preview")
    st.dataframe(df_result, use_container_width=True)

    # --- Download Button ---
    csv = df_result.to_csv(index=False).encode('utf-8')
    st.download_button("💾 Download Checked CSV", csv, "checked_responses.csv", "text/csv")
else:
    st.info("Please upload a CSV file to begin.")