In [2]:
# INTERACTIVE DASHBOARD â€“ CHECKBOX + DROPDOWN UI (Single cell)

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import widgets, HBox, VBox, Layout
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import plotly.express as px
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

try:
    df = pd.read_csv("transactions.csv")
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
    else:
        df['Date'] = pd.to_datetime('today') - pd.to_timedelta(np.random.randint(0,365,size=len(df)), unit='D')
except Exception as e:
    n = 1000
    rng = np.random.default_rng(42)
    df = pd.DataFrame({
        "Transaction_Id": np.arange(1, n+1),
        "Customer_Id": rng.integers(1, 201, n),
        "Date": pd.to_datetime("2024-01-01") + pd.to_timedelta(rng.integers(0, 365, n), unit='D'),
        "Item": rng.choice(["Milk","Bread","Eggs","Butter","Juice","Yogurt","Cheese","Coffee","Tea","Sugar"], size=n),
        "Amount": np.round(rng.uniform(20, 700, n),2),
        "Quantity": rng.integers(1,6,n)
    })
    print("Warning: 'transactions.csv' not found â€” using synthetic demo data.")

current_date = df['Date'].max() + pd.Timedelta(days=1)
rfm = df.groupby("Customer_Id").agg({
    "Date": lambda x: (current_date - x.max()).days,
    "Transaction_Id": "nunique",
    "Amount": "sum"
}).reset_index()
rfm.columns = ["Customer_Id", "Recency", "Frequency", "Monetary"]

sc = StandardScaler()
rfm_scaled = sc.fit_transform(rfm[["Recency","Frequency","Monetary"]])
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
rfm["Cluster"] = kmeans.fit_predict(rfm_scaled)

graph_options = [
    "Histogram (Matplotlib)",
    "Bar (Matplotlib)",
    "Scatter (Matplotlib)",
    "Pair Plot (Seaborn)",
    "Top Items (Plotly)",
    "RFM Distribution (Seaborn)",
    "Cluster Plot (Plotly)",
    "Top Items (Bokeh)"
]

graph_selector = widgets.SelectMultiple(
    options=graph_options,
    value=["Histogram (Matplotlib)"],
    description="Graphs",
    rows=8,
    layout=Layout(width="360px"),
    style={'description_width': 'initial'}
)

x_dropdown = widgets.Dropdown(
    options=[c for c in df.columns if df[c].dtype != 'O' or c=='Quantity'] + ["Amount","Quantity"],
    value="Amount" if "Amount" in df.columns else df.columns[0],
    description="X (matplotlib):"
)

y_dropdown = widgets.Dropdown(
    options=[c for c in df.columns if np.issubdtype(df[c].dtype, np.number)],
    value="Quantity" if "Quantity" in df.columns else None,
    description="Y (matplotlib):"
)

rfm_dropdown = widgets.Dropdown(
    options=["Recency", "Frequency", "Monetary"],
    value="Recency",
    description="RFM Feature:"
)

top_n_slider = widgets.IntSlider(
    value=8,
    min=3,
    max=20,
    step=1,
    description="Top N Items:",
    continuous_update=False
)

pairplot_vars = widgets.SelectMultiple(
    options=[col for col in rfm.columns if col not in ['Customer_Id','Cluster']],
    value=["Recency","Frequency","Monetary"],
    description="Pairplot vars:",
    rows=3
)

button = widgets.Button(
    description="Generate Dashboard",
    button_style="success",
    layout=Layout(width='220px')
)

output = widgets.Output(layout=Layout(border='1px solid gray'))

def generate_dashboard(_):
    with output:
        output.clear_output(wait=True)
        selections = list(graph_selector.value)
        if not selections:
            print("Select at least one graph from the list.")
            return
        for sel in selections:
            print(f"--- {sel} ---")
            if sel == "Histogram (Matplotlib)":
                fig, ax = plt.subplots(figsize=(8,4))
                sns.histplot(df[x_dropdown.value], bins=20, kde=True, ax=ax)
                ax.set_title(f"Histogram of {x_dropdown.value}")
                plt.show()

            if sel == "Bar (Matplotlib)":
                top = df['Item'].value_counts().head(top_n_slider.value)
                fig, ax = plt.subplots(figsize=(8,4))
                top.plot(kind='bar', ax=ax)
                ax.set_title(f"Top {top_n_slider.value} Items (Matplotlib)")
                ax.set_xlabel("Item")
                ax.set_ylabel("Count")
                plt.xticks(rotation=45)
                plt.show()

            if sel == "Scatter (Matplotlib)":
                if y_dropdown.value is None:
                    print("No numeric Y column available for scatter.")
                else:
                    fig, ax = plt.subplots(figsize=(7,5))
                    ax.scatter(df[x_dropdown.value], df[y_dropdown.value], alpha=0.6, s=20)
                    ax.set_xlabel(x_dropdown.value)
                    ax.set_ylabel(y_dropdown.value)
                    ax.set_title(f"Scatter: {y_dropdown.value} vs {x_dropdown.value}")
                    plt.show()

            if sel == "Pair Plot (Seaborn)":
                vars_sel = list(pairplot_vars.value)
                if len(vars_sel) < 2:
                    print("Choose at least 2 variables for pairplot.")
                else:
                    sns.pairplot(rfm[vars_sel + ['Cluster']].dropna(), hue='Cluster', palette='bright', corner=True)
                    plt.suptitle("Pairplot (RFM + Cluster)", y=1.02)
                    plt.show()

            if sel == "Top Items (Plotly)":
                item_counts = df["Item"].value_counts().head(top_n_slider.value)
                fig = px.bar(
                    x=item_counts.values,
                    y=item_counts.index,
                    orientation="h",
                    title=f"Top {top_n_slider.value} Purchased Items (Plotly)",
                    labels={'x':'Count', 'y':'Item'},
                    text=item_counts.values
                )
                fig.update_layout(height=400)
                fig.show()

            if sel == "RFM Distribution (Seaborn)":
                fig, ax = plt.subplots(figsize=(8,4))
                sns.histplot(rfm[rfm_dropdown.value], bins=20, kde=True, ax=ax)
                ax.set_title(f"{rfm_dropdown.value} Distribution (RFM)")
                plt.show()

            if sel == "Cluster Plot (Plotly)":
                fig = px.scatter(
                    rfm,
                    x="Frequency",
                    y="Monetary",
                    color="Cluster",
                    title="Customer Cluster Scatter Plot (Plotly)",
                    hover_data=["Customer_Id", "Recency"]
                )
                fig.update_traces(marker=dict(size=10))
                fig.show()

            if sel == "Top Items (Bokeh)":
                item_counts = df["Item"].value_counts().head(top_n_slider.value).reset_index()
                item_counts.columns = ["Item", "Count"]
                p = figure(
                    y_range=list(item_counts["Item"][::-1]),
                    height=300,
                    title=f"Top {top_n_slider.value} Items (Bokeh)"
                )
                p.hbar(y='Item', right='Count', height=0.6, source=item_counts)
                p.xaxis.axis_label = "Count"
                show(p)

button.on_click(generate_dashboard)

controls_left = VBox([graph_selector, top_n_slider, button], layout=Layout(width="40%"))
controls_right = VBox([x_dropdown, y_dropdown, rfm_dropdown, pairplot_vars], layout=Layout(width="60%"))
ui = HBox([controls_left, controls_right], layout=Layout(margin='8px'))

display(VBox([widgets.HTML("<h3 style='color:darkgreen'>ðŸ§­ Interactive Customer Analytics Dashboard</h3>"), ui, output]))


VBox(children=(HTML(value="<h3 style='color:darkgreen'>ðŸ§­ Interactive Customer Analytics Dashboard</h3>"), HBoxâ€¦