In [None]:
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

def create_interactive_dashboard(customers):
    # Prepare data for interactivity
    segments = sorted(customers['cluster'].unique())
    segment_names = {i: f"Segment {i}" for i in segments}
    
    # For Jupyter Notebook environment, use JupyterDash
    try:
        from jupyter_dash import JupyterDash
        app = JupyterDash(__name__)
    except ImportError:
        app = dash.Dash(__name__)
    
    # Create app layout
    app.layout = html.Div([
        html.H1("Interactive Customer Segmentation Analysis", style={'textAlign': 'center'}),
        
        # Dropdown for segment selection
        html.Div([
            html.Label("Select Segments to Display:"),
            dcc.Dropdown(
                id='segment-dropdown',
                options=[{'label': segment_names[i], 'value': i} for i in segments],
                value=segments,  # Default to all segments
                multi=True
            )
        ], style={'width': '30%', 'margin': '10px'}),
        
        # Slider for CLV filtering
        html.Div([
            html.Label("Filter by CLV Range:"),
            dcc.RangeSlider(
                id='clv-slider',
                min=customers['clv_1yr'].min(),
                max=customers['clv_1yr'].max(),
                step=(customers['clv_1yr'].max() - customers['clv_1yr'].min()) / 100,
                marks={int(customers['clv_1yr'].min()): f'${int(customers["clv_1yr"].min())}',
                       int(customers['clv_1yr'].max()): f'${int(customers["clv_1yr"].max())}'},
                value=[customers['clv_1yr'].min(), customers['clv_1yr'].max()]
            )
        ], style={'width': '70%', 'margin': '20px'}),
        
        # Interactive charts container
        html.Div([
            dcc.Graph(id='segment-scatter-plot'),
            dcc.Graph(id='clv-histogram')
        ], style={'display': 'flex', 'flexWrap': 'wrap'}),
        
        # Detail metrics table
        html.Div(id='segment-metrics-table')
    ])
    
    # Define callbacks
    @app.callback(
        [Output('segment-scatter-plot', 'figure'),
         Output('clv-histogram', 'figure'),
         Output('segment-metrics-table', 'children')],
        [Input('segment-dropdown', 'value'),
         Input('clv-slider', 'value')]
    )
    def update_graphs(selected_segments, clv_range):
        # Filter data based on user selections
        filtered_df = customers[
            (customers['cluster'].isin(selected_segments)) & 
            (customers['clv_1yr'] >= clv_range[0]) & 
            (customers['clv_1yr'] <= clv_range[1])
        ]
        
        # Create scatter plot
        scatter_fig = px.scatter(
            filtered_df, 
            x='recency', 
            y='clv_1yr',
            color='cluster',
            color_discrete_sequence=px.colors.qualitative.Bold,
            hover_data=['frequency', 'monetary', 'tenure'],
            labels={
                'recency': 'Days Since Last Transaction',
                'clv_1yr': 'Customer Lifetime Value ($)',
                'cluster': 'Segment'
            },
            title='Customer Positioning: CLV vs Recency'
        )
        
        # Add hover template for more detailed information
        scatter_fig.update_traces(
            hovertemplate='<b>CLV:</b> $%{y:.2f}<br>' +
                          '<b>Recency:</b> %{x} days<br>' +
                          '<b>Purchase Frequency:</b> %{customdata[0]:.1f}<br>' +
                          '<b>Avg. Purchase:</b> $%{customdata[1]:.2f}<br>' +
                          '<b>Tenure:</b> %{customdata[2]} days<extra></extra>'
        )
        
        # Create histogram
        hist_fig = px.histogram(
            filtered_df,
            x='clv_1yr',
            color='cluster',
            nbins=50,
            color_discrete_sequence=px.colors.qualitative.Bold,
            labels={'clv_1yr': 'Customer Lifetime Value ($)'},
            title='CLV Distribution by Segment'
        )
        
        # Calculate segment metrics for the table
        segment_metrics = []
        for segment in selected_segments:
            segment_data = filtered_df[filtered_df['cluster'] == segment]
            segment_metrics.append(html.Tr([
                html.Td(f"Segment {segment}"),
                html.Td(f"{len(segment_data)}"),
                html.Td(f"${segment_data['clv_1yr'].mean():.2f}"),
                html.Td(f"${segment_data['clv_1yr'].sum():.2f}"),
                html.Td(f"{segment_data['clv_1yr'].sum() / filtered_df['clv_1yr'].sum():.1%}")
            ]))
        
        metrics_table = html.Table([
            html.Thead(html.Tr([
                html.Th("Segment"), 
                html.Th("Count"), 
                html.Th("Avg CLV"), 
                html.Th("Total Value"), 
                html.Th("% of Portfolio")
            ])),
            html.Tbody(segment_metrics)
        ], style={'width': '100%', 'border': '1px solid black', 'borderCollapse': 'collapse'})
        
        return scatter_fig, hist_fig, metrics_table
    
    return app


In [2]:
# Load your customer data
customers = pd.read_csv('C:/Users/carlo/Documents/4.DS/CAT3.CustomerLVS/data/processed/customer_segments.csv')

# Verify the customers DataFrame exists and check its columns
print("DataFrame loaded successfully!")
print(f"Number of rows: {len(customers)}")
print(f"Columns available: {customers.columns.tolist()}")


DataFrame loaded successfully!
Number of rows: 4500
Columns available: ['client_id', 'recency_days', 'frequency', 'monetary_value', 'predicted_transactions_1yr', 'expected_avg_profit', 'clv_1yr', 'cluster', 'segment_name']


In [None]:
# Usage 
# In a notebook, you would run:
app = create_interactive_dashboard(client_id)
app.run_server(mode='inline')


JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



AttributeError: 'super' object has no attribute 'run_server'