In [None]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# =============================
# Streamlit Page Config
# =============================
st.set_page_config(page_title="User Segmentation Dashboard", layout="wide")
st.title("ðŸ“Š App User Segmentation Dashboard")
st.write("Customer Segmentation using K-Means Clustering")

# =============================
# Load Processed Clustered Data
# =============================
@st.cache_data
def load_data():
    path = "data/processed/clustered_users.csv"  # Already saved final dataset
    df = pd.read_csv(path)
    return df

df_final = load_data()

# =============================
# Sidebar Navigation
# =============================
page = st.sidebar.radio(
    "Select Page",
    ["Cluster Profile & Customer Identification", "Cluster Distribution", "PCA Cluster Visualization"]
)

# =============================
# Page 1: Cluster Profile & Customer Identification
# =============================
if page == "Cluster Profile & Customer Identification":
    st.header("ðŸ“Š Cluster Profile (Mean Behavior)")
    numeric_cols = df_final.select_dtypes(include=["int64","float64"]).columns
    numeric_cols = [c for c in numeric_cols if c not in ["cluster", "PCA1", "PCA2"]]  # Exclude meta cols
    cluster_profile = df_final.groupby("cluster_label")[numeric_cols].mean()
    st.dataframe(cluster_profile)

    st.header("ðŸ‘¤ Customer-Level Identification")
    selected_cluster = st.selectbox(
        "Select Cluster",
        sorted(df_final["cluster_label"].unique())
    )
    cluster_users = df_final[df_final["cluster_label"] == selected_cluster]
    st.write(f"Users in Cluster '{selected_cluster}': {cluster_users.shape[0]}")
    st.dataframe(cluster_users.head(20))

# =============================
# Page 2: Cluster Distribution
# =============================
elif page == "Cluster Distribution":
    st.header("ðŸ“Œ Cluster Distribution")
    cluster_counts = df_final["cluster"].value_counts().sort_index()

    fig, ax = plt.subplots()
    cluster_counts.plot(kind="bar", ax=ax)
    ax.set_xlabel("Cluster")
    ax.set_ylabel("Number of Users")
    st.pyplot(fig)

# =============================
# Page 3: PCA Cluster Visualization
# =============================
elif page == "PCA Cluster Visualization":
    st.header("ðŸ“ˆ PCA Cluster Visualization")
    fig, ax = plt.subplots(figsize=(8,6))
    sns.scatterplot(
        data=df_final,
        x="PCA1",
        y="PCA2",
        hue="cluster_label",
        palette="Set1",
        ax=ax
    )
    ax.set_title("PCA of User Segments")
    st.pyplot(fig)

    st.markdown("""
    **Legend for Clusters:**  
    - High Engagement  
    - Moderate Engagement  
    - Low Engagement / At-Risk  
    - Occasional Users
    """)

In [None]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# =============================
# Streamlit Page Config
# =============================
st.set_page_config(page_title="User Segmentation Dashboard", layout="wide")
st.title("ðŸ“Š App User Segmentation Dashboard")
st.write("Customer Segmentation using K-Means Clustering")

# =============================
# Load Processed Clustered Data
# =============================
@st.cache_data
def load_data():
    path = "data/processed/clustered_users.csv"  # Already saved final dataset
    df = pd.read_csv(path)
    return df

df_final = load_data()

# =============================
# Sidebar Info
# =============================
st.sidebar.header("Project Info")
st.sidebar.write("Total Users:", df_final.shape[0])
st.sidebar.write("Total Features:", df_final.shape[1])

# =============================
# Cluster Distribution
# =============================
st.subheader("ðŸ“Œ Cluster Distribution")

cluster_counts = df_final["cluster"].value_counts().sort_index()

fig1, ax1 = plt.subplots()
cluster_counts.plot(kind="bar", ax=ax1)
ax1.set_xlabel("Cluster")
ax1.set_ylabel("Number of Users")
st.pyplot(fig1)

# =============================
# PCA Visualization
# =============================
st.subheader("ðŸ“ˆ PCA Cluster Visualization")

fig2, ax2 = plt.subplots(figsize=(8,6))
sns.scatterplot(
    data=df_final, 
    x="PCA1", 
    y="PCA2", 
    hue="cluster_label",  # Use labels for friendly names
    palette="Set1",
    ax=ax2
)
ax2.set_title("PCA of User Segments")
st.pyplot(fig2)

# =============================
# Cluster Profiling
# =============================
st.subheader("ðŸ“Š Cluster Profile (Mean Behavior)")

numeric_cols = df_final.select_dtypes(include=["int64","float64"]).columns
numeric_cols = [c for c in numeric_cols if c not in ["cluster", "PCA1", "PCA2"]]  # Exclude meta cols

cluster_profile = df_final.groupby("cluster_label")[numeric_cols].mean()
st.dataframe(cluster_profile)

# =============================
# Customer-Level Identification
# =============================
st.subheader("ðŸ‘¤ Customer-Level Identification")

selected_cluster = st.selectbox(
    "Select Cluster",
    sorted(df_final["cluster_label"].unique())
)

cluster_users = df_final[df_final["cluster_label"] == selected_cluster]

st.write(f"Users in Cluster '{selected_cluster}': {cluster_users.shape[0]}")
st.dataframe(cluster_users.head(20))

# =============================
# Business Insights
# =============================
st.subheader("ðŸ’¼ Business Insights")

st.markdown("""
**High Engagement Users**  
- Ideal for loyalty programs and premium offers  

**Moderate Engagement Users**  
- Target with feature-based engagement campaigns  

**Low Engagement / At-Risk Users**  
- Focus on retention and reactivation strategies  

**Occasional Users**  
- Improve onboarding and awareness campaigns  
""")