# **Dashboard**

# 1. Imports and Loading

In [96]:
# Import libraries
import pandas as pd
import numpy as np
import itertools

from dash import Dash, html, dash_table, dcc, callback, Output, Input, MATCH, callback_context, State, dash
import plotly.express as px
import plotly.graph_objects as go
import dash_bootstrap_components as dbc
from plotly.subplots import make_subplots

from sklearn.cluster import AgglomerativeClustering, KMeans
from sklearn.metrics import silhouette_score



In [97]:
# Ensuring pandas always prints all columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_colwidth', 1000)

In [98]:
# Load the data
path = '/home/shadybea/OneDrive/General/Data Mining/Project/'

data = pd.read_csv(f'{path}DM2425_ABCDEats_DATASET.csv')
regulars = pd.read_csv(f'{path}rfm_regulars.csv', index_col=0)

# 2. Preprocessing

In [99]:
_rename_dict = {
        'customer_region' : 'cust_region'
        , 'payment_method' : 'pay_method'
        , 'customer_age' : 'cust_age'
        , 'vendor_count' : 'n_vendor'
        , 'product_count' : 'n_product'
        , 'n_order' : 'n_order'
        , 'is_chain' : 'n_chain'
        , 'CUI_American' : 'american'
        , 'CUI_Asian' : 'asian'
        , 'CUI_Beverages' : 'beverages'
        , 'CUI_Cafe' : 'cafe'
        , 'CUI_Chicken Dishes' : 'chicken_dishes'
        , 'CUI_Chinese' : 'chinese'
        , 'CUI_Desserts' : 'desserts'
        , 'CUI_Healthy' : 'healthy'
        , 'CUI_Indian' : 'indian'
        , 'CUI_Italian' : 'italian'
        , 'CUI_Japanese' : 'japanese'
        , 'CUI_Noodle Dishes' : 'noodle_dishes'
        , 'CUI_OTHER' : 'other'
        , 'CUI_Street Food / Snacks' : 'street_food_snacks'
        , 'CUI_Thai' : 'thai'
}

# Rename the columns for easier reference
data.rename(columns=_rename_dict, inplace=True)

In [100]:
# Make the integer value of the customer hex values, the index. 
data['customer_id'] = data['customer_id'].apply(lambda x: int(x, 16))

# Set 'customer_id' as the index
data = data[~data['customer_id'].duplicated()].set_index('customer_id')

In [101]:
# Fill missing values for first_order
data.loc[data['first_order'].isna(), 'first_order'] = 0

In [102]:
# Fill missing values for HR_0
sum_week = data[[f"DOW_{n}" for n in range(7)]].sum(axis=1)
sum_day = data[[f"HR_{n}" for n in range(24)]].sum(axis=1)

data.loc[data['HR_0'].isna(), 'HR_0'] = (sum_week - sum_day)

In [103]:
# has at least one vendor
has_vendor = data['n_vendor'] != 0 

# has at least one product
has_product = data['n_product'] != 0 

# purchase must have been made on a valid dow
some_day = (data[[f"DOW_{n}" for n in range(7)]] != 0).any(axis = 1) 

# purchase must have been made at a valid hour
some_hour = (data[[f"HR_{n}" for n in range(24)]] != 0).any(axis = 1)  

# some type of cuisine must have been ordered
some_food = (data[data.columns[9:24]] != 0).any(axis = 1) 

data = data[(has_vendor & has_product & some_day & some_hour & some_food)]  # And we drop these values

In [104]:
# Customer Region
data.loc[data['cust_region'] == '-', 'cust_region'] = '8670'
data.loc[data['cust_region'].isin(['2440', '2490']), 'cust_region'] = '2400'

# Add the feature Customer CIty
data['cust_city'] = data['cust_region'].apply(lambda x: x[0])

In [105]:
# Last Promo
data.loc[data['last_promo'] == '-', 'last_promo'] = 'NO_PROMO'

In [106]:
# Tidying up datatypes
for col in data.iloc[:, 0:9]:
    if col in ['last_promo', 'pay_method']:
        data[col] = data[col].astype(object)
    else:
        data[col] = data[col].astype('Int64')

for col in data.iloc[:, 9:24]:
    data[col] = data[col].astype(float)

for col in data.iloc[:, 24:]:
    data[col] = data[col].astype('Int64')

In [107]:
# Categorical variables
non_metric_features = ['cust_region', 'last_promo', 'pay_method', 'cust_city']

# Hour of day variables
hour_features = data.columns[31:55].to_list()

# Day of week variables
day_features = data.columns[24:31].to_list()

# Cuisine features
cuisine_features = data.columns[9:24].to_list()

# Metric variables, that are not above
metric_features = data.columns.drop(non_metric_features).drop(hour_features).drop(day_features).drop(cuisine_features).to_list()

In [108]:
# Total amount spent by customer on all types of cuisine
data['total_amt'] = data[cuisine_features].sum(axis=1)

# Number of orders made by the customer
data['n_order'] = data[day_features].sum(axis=1)

# Amount spent on average per product
data['avg_amt_per_product'] = data['total_amt'] / data['n_product']

# Amount spent on average per order
data['avg_amt_per_order'] = data['total_amt'] / data['n_order']

# Amount spent on average per vendor
data['avg_amt_per_vendor'] = data['total_amt'] / data['n_vendor']

# Total days as customer
data['days_cust'] = data['last_order'] - data['first_order']

# Average days between orders
data['avg_days_to_order'] = data['days_cust'] / data['n_order']

# Days the customer is due, according to their average days between orders
data['days_due'] = 90 - data['last_order'] + data['avg_days_to_order']

# Percentage of orders placed to restaurants that are part of a chain
data['per_chain_order'] = data['n_chain'] / data['n_order']

# And we add these tese features to the metric features list.
metric_features.extend([
    'n_order'
    ,'per_chain_order'
    ,'total_amt'
    ,'avg_amt_per_order'
    ,'avg_amt_per_product'
    ,'avg_amt_per_vendor'
    ,'days_cust'
    ,'avg_days_to_order'
    ,'days_due'
])

In [109]:
# Create a mask to check if each day column is populated
mask = data[[f'DOW_{i}' for i in range(7)]] > 0

# Sum over the mask to get the count of days with purchases for each row
data.loc[:, 'n_days_week'] = mask.sum(axis=1)

# Updating the list of metric features
metric_features.append('n_days_week')

In [110]:
# Create a mask to check if each hour column is populated
mask = data[hour_features] > 0

# Sum over the mask to get the count of hours with purchases for each row
data.loc[:, 'n_times_day'] = mask.sum(axis=1)

# Updating the list of metric features
metric_features.append('n_times_day')

In [111]:
# Flag customers who have purchased in more than one day
data['regular'] = (data['days_cust'] > 1)

non_metric_features.append('regular')

In [112]:
# Create a mask where values are greater than zero (indicating an order)
mask = data[cuisine_features] > 0

# Use mask to get the number of cuisines for each row
data.loc[:, 'n_cuisines'] = mask.sum(axis=1)

# Updating the metric_features_list
metric_features.append('n_cuisines')

In [113]:
# Dropping specified columns and getting remaining columns as a list
targets = data.drop(columns=[
    'cust_age'
    , 'first_order'
    , 'last_order'
    , 'days_cust'
    , 'days_due'
    , 'avg_days_to_order'
    , 'per_chain_order'
    , 'cust_region'
    , 'cust_city'
    , 'last_promo'
    , 'pay_method'
    , 'n_cuisines'
    , 'regular'
] + hour_features + day_features).columns.tolist()

# Initialize an empty dfFrame to store log-transformed columns
log_transformed = pd.DataFrame()

# Apply log1p to each column in targets and add it to log_transformed with the prefix 'log_'
for col in targets:
    log_transformed[f"log_{col}"] = np.log1p(data[col])

# We create a list of log_features to assist us in our exploration
log_features = log_transformed.columns.tolist()

# Concatenate the original dfFrame with the new log-transformed dfFrame
data = pd.concat([data, log_transformed], axis=1)

In [114]:
# Initialize dictionaries for feature groups with flags and relevant columns
feature_groups = {
    'foodie': ['n_vendor', 'n_product', 'n_order', 'n_cuisines'],
    'gluttonous': ['avg_amt_per_order', 'total_amt', 'n_chain'],
    'loyal': ['avg_amt_per_vendor'] + cuisine_features
}


# Create columns to hold the flags for each feature group
data['foodie_flag'] = 0
data['gluttonous_flag'] = 0
data['loyal_flag'] = 0

# Function to calculate IQR bounds
def calculate_bounds(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return lower_bound, upper_bound

# Assign flags for each feature group
for group, features in feature_groups.items():
    for feature in features:
        log_feature = f"log_{feature}"
        
        if feature == 'n_cuisines':
            log_feature = feature
        
        lower_bound, upper_bound = calculate_bounds(data.loc[(data['regular'] == 1) & (data[feature] > 0), log_feature])
        
        # Mark outliers for each group
        if group == 'foodie':
            data.loc[data['regular'] == 1, 'foodie_flag'] |= (
                data.loc[data['regular'] == 1, log_feature] > upper_bound
            ).astype(int)
        elif group == 'gluttonous':
            data.loc[data['regular'] == 1, 'gluttonous_flag'] |= (
                data.loc[data['regular'] == 1, log_feature] > upper_bound
            ).astype(int)
        elif group == 'loyal':
            data.loc[data['regular'] == 1, 'loyal_flag'] |= (
                data.loc[data['regular'] == 1, log_feature] > upper_bound
            ).astype(int)

# Display results
for group in ['foodie_flag', 'gluttonous_flag', 'loyal_flag']:
    print(f"Number of customers flagged as {group.split('_')[0]}:", data[group].sum())

non_metric_features.extend([
    'foodie_flag'
    ,'gluttonous_flag'
    ,'loyal_flag'
])

Number of customers flagged as foodie: 2857
Number of customers flagged as gluttonous: 648
Number of customers flagged as loyal: 455


In [115]:
def top_n(row, col_list, n):
    # Sort the specified columns in descending order
    sorted_row = row[col_list].sort_values(ascending=False)

    # Get the unique sorted values
    unique_sorted_values = sorted_row.unique()

    # Ensure there are enough unique values to determine the n-th largest
    if len(unique_sorted_values) >= n:
        nth_value = unique_sorted_values[n - 1]  # Get the n-th largest unique value

        # If the n-th value is 0, return None
        if nth_value == 0:
            return None
        
        # If n > 1, check for uniqueness against the (n-1)-th largest
        if n > 1:
            prev_value = unique_sorted_values[n - 2]  # (n-1)-th largest unique value
            # If nth_value is equal to the (n-1)-th value, we don't want to return it
            if nth_value == prev_value:
                return None
        
        # Return the index of the n-th largest value
        return sorted_row[sorted_row == nth_value].index[0]

    # Return None if conditions are not met
    return None

In [116]:
data['top_cuisine'] = data.apply(top_n, col_list=cuisine_features, n=1, axis=1)

non_metric_features.append('top_cuisine')

In [117]:
# Average amount spent per day as customer
data['avg_amt_per_day'] = np.round(data['total_amt'] / data['days_cust'], 4)

# Average number of products ordered per day as customer
data['avg_product_per_day'] = np.round(data['n_product'] / data['days_cust'], 4)

# Average number of orders per day as customer
data['avg_order_per_day'] = np.round(data['n_order'] / data['days_cust'], 4)

metric_features.extend([
    'avg_amt_per_day'
    ,'avg_product_per_day'
    ,'avg_order_per_day'
])

In [118]:
data.loc[data['cust_age'].isna(), 'cust_age'] = data['cust_age'].mean().astype('int')

In [119]:
# Creating age buckets
data['age_bucket'] = np.where(
    data['cust_age'] < 25, '15-24', np.where(
        data['cust_age'] < 35, '25-34', np.where(
            data['cust_age'] < 45, '35-44', np.where(
                data['cust_age'] < 55, '45-54', np.where(
                    data['cust_age'] < 65, '55-64', '65+'
                )
            )
        )
    )
)

non_metric_features.insert(4, 'age_bucket')

# 3. Clustering

In [120]:
spending_diversity_features = ['total_amt', 'n_cuisines', 'n_vendor', 'n_product']
spending_diversity_df = regulars[spending_diversity_features].copy()

n_clusters = 4

# Get the labels from hierarchical chosen clustering solution
cluster = AgglomerativeClustering(n_clusters=n_clusters, metric="euclidean", linkage="ward")
hc_labels = cluster.fit_predict(spending_diversity_df)

# Calculate centroids based on these hierarchical clusters
centroids = []
for i in range(n_clusters):
    cluster_points = spending_diversity_df[hc_labels == i]  # Get points belonging to cluster i
    centroid = cluster_points.mean(axis=0)  # Calculate the mean of these points (centroid)
    centroids.append(centroid)

centroids = np.array(centroids)

# Hierarchical Clustreing centroids as seeds for Kmeans init
kmeans = KMeans(n_clusters=n_clusters, init=centroids, n_init=1, random_state=20)  # n_init=1 since we are providing initial centroids
kmeans.fit(spending_diversity_df)

# Calculate the silhouette score to evaluate clustering
# silhouette_avg = silhouette_score(spending_diversity_df, kmeans.labels_)
# print(f"Silhouette Score: {silhouette_avg}")

spending_diversity_df = pd.concat([
    spending_diversity_df, 
    pd.Series(hc_labels, name='labels', index=spending_diversity_df.index),
    regulars[[col for col in regulars.columns if col not in spending_diversity_features]]
], axis=1)

# 4. Building the Dashboard

In [286]:
# Initialize the app
external_stylesheet = [dbc.themes.VAPOR]
app = Dash(__name__, external_stylesheets=external_stylesheet, suppress_callback_exceptions=True)

In [287]:
no_categorical = [col for col in data.columns if col not in non_metric_features]

In [288]:
# App layout
app.layout = html.Div([
    dbc.Navbar(
        dbc.Container([
            dbc.Nav([
                dbc.NavItem(dbc.NavLink("Home", href="/", id='home-button'), style={'margin-right': '15px'}),
                dbc.NavItem(dbc.NavLink("Basic Exploration", href="/single-feature", id='single-feature-button'), style={'margin-right': '15px'}),
                dbc.NavItem(dbc.NavLink("Pairplot Exploration", href="/pairplot", id='pairplot-button'), style={'margin-right': '15px'}),
                dbc.DropdownMenu([
                    dbc.DropdownMenuItem("Spending Diversity", href="/spending", id='spending-diversity-button'),
                    dbc.DropdownMenuItem("Geography", href="", id='geography-button'),
                    dbc.DropdownMenuItem("Cuisines", href="", id='cuisines-button'),
                    dbc.DropdownMenuItem("Time", href="", id='time-button'),
                ], label='Clustering', nav=True, in_navbar=True)
            ], className='mr-aulo', pills=True),
            dbc.NavbarBrand("ABCDEats, Inc.", href="#", className='ml-aulo')
        ]), color='primary', dark=True
    ),
    dcc.Location(id='url', refresh=False),
    html.Div(id='page-content')
])

In [289]:
home_layout = html.Div([
    dbc.Container([
        dbc.Row([dbc.Col(html.H1("This is a project developed by Martins & Fonseca Consulting on behalf of the Data Mining course"), width=12)])
    ])
])

## Single Feature

In [290]:
single_feature = html.Div([
    dbc.Container([
        dbc.Row([
            html.Div('Basic Exploration', className="text-primary text-center fs-3", style={'margin-bottom': '15px'})
        ]),
        dbc.Row([
            dbc.Col([
                html.Label("Select a Feature to Display:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='column-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col(id='slider-container')
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='condition-column-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='condition-dropdown',
                    options=[],
                    value=None,
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='filter-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='apply-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear All Filters', id='clear-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3, style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ]),
        dbc.Row([
            dbc.Col([
                dcc.Graph(figure={}, id='col_histogram')
            ], width=6)
            ,dbc.Col([
                dcc.Graph(figure={}, id='col_boxplot')
            ], width=6)
        ])
    ], style={'padding': '20px'})
])

In [291]:
# Show slider/checklist
@callback(
    Output('slider-container', 'children'),
    Input('column-dropdown', 'value')
)

def update_slider(col_chosen):
    if col_chosen in no_categorical:
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dcc.RangeSlider(
                id='value-input',
                min=data[col_chosen].min(),
                max=data[col_chosen].max(),
                step=1,
                tooltip={"always_visible": False, "placement": "bottom"},
                marks={i: str(i) for i in range(int(data[col_chosen].min()), int(data[col_chosen].max()) + 1, 10)},
                value=[data[col_chosen].min(), data[col_chosen].max()]
            )
        ]
    else:
        unique_values = data[col_chosen].unique().tolist()

        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dcc.Checklist(
                id='value-input',
                options=[{'label': html.Label(val, style={'margin-right': '15px', 'margin-left': '5px'}), 'value': val} for val in unique_values],
                value=unique_values,
                inline=True
            )
        ]

In [292]:
# Condition Interaction
@callback(
    Output('condition-dropdown', 'options'),
    Output('condition-dropdown', 'value'),
    Input('condition-column-dropdown', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [293]:
@callback(
    Output('filter-input-container', 'children'),
    Input('condition-column-dropdown', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Input(id='filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = data[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Select(
                id='filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [294]:
# Graphs Interaction
@callback(
    [Output('col_histogram', 'figure'),
    Output('col_boxplot', 'figure'),
    Output('apply-button', 'n_clicks'),
    Output('clear-button', 'n_clicks')],
    [Input('column-dropdown', 'value'),
    Input('value-input', 'value'),
    Input('condition-column-dropdown', 'value'),
    Input('condition-dropdown', 'value'),
    Input('filter-input', 'value'),
    Input('apply-button', 'n_clicks'),
    Input('clear-button', 'n_clicks')]
)

def update_graph(col_chosen, value_input, col_condition, condition, value, n_clicks, n_clicks_clear):
        
    if n_clicks_clear > 0:
        # Determine filtering logic for the input column
        if col_chosen in no_categorical:
            filtered_df = data[(data[col_chosen] >= value_input[0]) & (data[col_chosen] <= value_input[1])]
        elif col_chosen in non_metric_features:
            filtered_df = data[data[col_chosen].isin(value_input)]
        else:
            filtered_df = data
            
    else:
        # Determine filtering logic for the input column
        if col_chosen in no_categorical:
            filtered_df = data[(data[col_chosen] >= value_input[0]) & (data[col_chosen] <= value_input[1])]
        elif col_chosen in non_metric_features:
            filtered_df = data[data[col_chosen].isin(value_input)]
        else:
            filtered_df = data

        # Determine the filtering logic for the filtering column
        if value and n_clicks > 0:
            if condition == 'is':
                filtered_df = filtered_df[filtered_df[col_condition].astype('str') == str(value)]
            
            elif condition == 'is not':
                filtered_df = filtered_df[filtered_df[col_condition].stype('str') != str(value)]

            elif condition == 'greater than':
                filtered_df = filtered_df[filtered_df[col_condition] > float(value)]

            elif condition == 'less than':
                filtered_df = filtered_df[filtered_df[col_condition] < float(value)]

            elif condition == 'equal to':
                filtered_df = filtered_df[filtered_df[col_condition] == float(value)]

            elif condition == 'greater than or equal to':
                filtered_df = filtered_df[filtered_df[col_condition] >= float(value)]

            elif condition == 'less than or equal to':
                filtered_df = filtered_df[filtered_df[col_condition] <= float(value)]

    fig_hist = px.histogram(filtered_df, x=col_chosen)
    if col_chosen in no_categorical:
        fig_box = px.box(filtered_df, y=col_chosen)  # TODO: change color
        fig_box.update_traces(marker=dict(color="#E145B4"))
    else:
        # Create an empty figure with a transparent rectangle
        fig_box = go.Figure()

        fig_box.update_layout(
            shapes=[
                go.layout.Shape(
                    type="rect",
                    x0=0, x1=1, y0=0, y1=1,
                    xref="paper", yref="paper",
                    line=dict(color="white"),  # This draws a white rectangle
                    fillcolor="white"  # Transparent rectangle
                )
            ], xaxis=dict(showline=False, showgrid=False, zeroline=False, showticklabels=False),  # Hide x-axis
            yaxis=dict(showline=False, showgrid=False, zeroline=False, showticklabels=False)   # Hide y-axis
        )
    
    return fig_hist, fig_box, 0, 0

## Pairplot

In [295]:
pairplot = html.Div([
    dbc.Container([
        dbc.Row([
            html.Div('Pairplot Exploration', className="text-primary text-center fs-3", style={'margin-bottom': '15px'})
        ]),
        dbc.Row([
            dbc.Col([
                html.Label("Select a Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='pairplot-feature-1',  # Ensure this ID is correctly defined
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width=3),
            dbc.Col([
                html.Label("Select another Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='pairplot-feature-2',  # Ensure this ID is correctly defined
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width=3)
        ], style={'margin-bottom': '15px'}),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='filter-column-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='filter-condition-dropdown',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='pairplot-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            dbc.Col([
                dcc.Graph(figure={}, id='graph-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ])
    ], style={'padding': '20px'})
])

In [296]:
# Condition Interaction
@callback(
    Output('filter-condition-dropdown', 'options'),
    Output('filter-condition-dropdown', 'value'),
    Input('filter-column-dropdown', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [297]:
@callback(
    Output('pairplot-input-container', 'children'),
    Input('filter-column-dropdown', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Input(id='pairplot-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = data[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Select(
                id='pairplot-filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [298]:
# Callback to handle filtering and updating the pairplot
@callback(
    Output('graph-pairplot', 'figure'),
    Output('apply-filter-button', 'n_clicks'),
    Output('clear-filter-button', 'n_clicks'),
    Output('pairplot-filter-input', 'value'),
    Input('pairplot-feature-1', 'value'),
    Input('pairplot-feature-2', 'value'),
    Input('filter-column-dropdown', 'value'),
    Input('filter-condition-dropdown', 'value'),
    Input('pairplot-filter-input', 'value'),
    Input('apply-filter-button', 'n_clicks'),
    Input('clear-filter-button', 'n_clicks')
)

def update_pairplot(feature_1, feature_2, filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = data.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]

    # Create the pairplot based on the filtered data
    pairplot_figure = px.scatter_matrix(filtered_df, dimensions=[feature_1, feature_2])

    return pairplot_figure, 0, 0, filter_value  # Return the updated figure and reset the filter input

## Clustering

### Spending Diversity

In [299]:
spending = html.Div([
    dbc.Container([
        dbc.Row([
            html.Div('Spending Diversity Cluster Exploration', className="text-primary text-center fs-3", style={'margin-bottom': '15px'})
        ]),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='spending-filter-column',
                    options=[{'label': col, 'value': col} for col in spending_diversity_df.columns if col != 'labels'],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='spending-filter-condition',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                dbc.Input(id='spending-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='spending-apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='spending-clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            html.Label("Cluster Visualization"),
            dbc.Col([
                dcc.Graph(figure={}, id='graph-spending-diversity-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',
                'align-items': 'center',
            })
        ])
    ], style={'padding': '20px'})
])

In [300]:
# Condition Interaction
@callback(
    Output('spending-filter-condition', 'options'),
    Output('spending-filter-condition', 'value'),
    Input('spending-filter-column', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [301]:
@callback(
    Output('graph-spending-diversity-pairplot', 'figure'),
    Output('spending-apply-filter-button', 'n_clicks'),
    Output('spending-clear-filter-button', 'n_clicks'),
    Output('spending-filter-input', 'value'),
    Input('spending-filter-column', 'value'),
    Input('spending-filter-condition', 'value'),
    Input('spending-filter-input', 'value'),
    Input('spending-apply-filter-button', 'n_clicks'),
    Input('spending-clear-filter-button', 'n_clicks')
)

def update_spending_graph(filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = spending_diversity_df.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]
    
    combinations = list(itertools.combinations(spending_diversity_features, 2))
    n_combinations = len(combinations)

    # Define grid layout
    n_cols = 3  # Number of columns in the grid
    n_rows = (n_combinations + n_cols - 1) // n_cols  # Calculate number of rows needed
    fig = make_subplots(
        rows=n_rows, cols=n_cols, 
        subplot_titles=[f"{x} vs {y}" for x, y in combinations]
    )

    # Add scatter plots to each subplot
    row = col = 1
    for feature_x, feature_y in combinations:
        fig.add_trace(
            go.Scatter(
                x=filtered_df[feature_x],
                y=filtered_df[feature_y],
                mode='markers',
                marker=dict(
                    size=10,
                    color=filtered_df['labels'],
                    colorscale='Viridis',
                    showscale=False,
                    opacity=0.5
                ),
                text=filtered_df['labels'],
                name=f"{feature_x} vs {feature_y}",
                showlegend=True
            ),
            row=row,
            col=col
        )
        
        # Update row and column indices
        col += 1
        if col > n_cols:
            col = 1
            row += 1

    # Update layout
    fig.update_layout(
        height=n_rows * 400,
        width=1000,
        showlegend=False
    )

    return fig, 0, 0, filter_value

## Page Navigation

In [302]:
@callback(
    [Output('page-content', 'children'),
    Output('home-button', 'active'),
    Output('single-feature-button', 'active'),
    Output('pairplot-button', 'active'),
    Output('spending-diversity-button', 'active')],
    [Input('url', 'pathname')]
)

def display_page(pathname):
    if pathname == '/single-feature':
        return single_feature, False, True, False, False
    elif pathname == '/pairplot':
        return pairplot, False, False, True, False
    elif pathname == '/spending':
        return spending, False, False, False, True
    else:
        return home_layout, True, False, False, False

# Dashboard

In [303]:
# Run the app
if __name__ == '__main__':
    app.run(debug=True)