# **ABCDEats, Inc. Dashboard**

# 1. Imports and Loading

In [978]:
# Import libraries
import pandas as pd
import numpy as np
import random
import itertools
import joblib

from dash import Dash, html, dash_table, dcc, callback, Output, Input, MATCH, callback_context, State, dash
import plotly.express as px
import plotly.graph_objects as go
import dash_bootstrap_components as dbc
from plotly.subplots import make_subplots

from gower import gower_matrix
from sklearn.cluster import KMeans
from scipy.linalg import svd
from sklearn.preprocessing import normalize

In [979]:
# Ensuring pandas always prints all columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_colwidth', 1000)

In [980]:
# Load the data
PATH = 'data/'

In [981]:
data = pd.read_csv(f'{PATH}raw_data.csv', index_col=0)
regulars = pd.read_csv(f'{PATH}regulars.csv', index_col=0)

# 2. Mapping

In [982]:
mapping_dict = {
    'cust_region': 'Region',
    'cust_age': 'Age',
    'n_vendor': 'Vendor Count',
    'n_product': 'Product Count',
    'n_chain': 'Chain Restaurant Order Count',
    'first_order': 'First Order Date',
    'last_order': 'Last Order Date',
    'last_promo': 'Promotion',
    'pay_method': 'Payment Method',
    'american': 'American',
    'asian': 'Asian',
    'beverages': 'Beverages',
    'cafe': 'Cafe',
    'chicken_dishes': 'Chicken Dishes',
    'chinese': 'Chinese',
    'desserts': 'Desserts',
    'healthy': 'Healthy',
    'indian': 'Indian',
    'italian': 'Italian',
    'japanese': 'Japanese',
    'noodle_dishes': 'Noodle Dishes',
    'other': 'Other Cuisines',
    'street_food_snacks': 'Street Food & Snacks',
    'thai': 'Thai',
    'DOW_0': 'Sunday',
    'DOW_1': 'Monday',
    'DOW_2': 'Tuesday',
    'DOW_3': 'Wednesday',
    'DOW_4': 'Thursday',
    'DOW_5': 'Friday',
    'DOW_6': 'Saturday',
    'HR_0': '12AM',
    'HR_1': '1AM',
    'HR_2': '2AM',
    'HR_3': '3AM',
    'HR_4': '4AM',
    'HR_5': '5AM',
    'HR_6': '6AM',
    'HR_7': '7AM',
    'HR_8': '8AM',
    'HR_9': '9AM',
    'HR_10': '10AM',
    'HR_11': '11AM',
    'HR_12': '12PM',
    'HR_13': '1PM',
    'HR_14': '2PM',
    'HR_15': '3PM',
    'HR_16': '4PM',
    'HR_17': '5PM',
    'HR_18': '6PM',
    'HR_19': '7PM',
    'HR_20': '8PM',
    'HR_21': '9PM',
    'HR_22': '10PM',
    'HR_23': '11PM',
    'cust_city': 'City',
    'total_amt': 'Total Amount',
    'n_order': 'Order Count',
    'avg_amt_per_product': 'Avg Amount per Product',
    'avg_amt_per_order': 'Avg Amount per Order',
    'avg_amt_per_vendor': 'Avg Amount per Vendor',
    'days_cust': 'Days as Customer',
    'avg_days_to_order': 'Avg Days to Order',
    'days_due': 'Order Days Due',
    'per_chain_order': '% Orders in Chain Restaurant',
    'n_days_week': 'Days of Week Ordered Count',
    'n_times_day': 'Hours Ordered Count',
    'regular': 'Is Regular',
    'n_cuisines': 'Cuisines Count',
    'log_n_vendor': 'Log Vendor Count',
    'log_n_product': 'Log Product Count',
    'log_n_chain': 'Log Chain Restaurant Order Count',
    'log_american': 'Log American',
    'log_asian': 'Log Asian',
    'log_beverages': 'Log Beverages',
    'log_cafe': 'Log Cafe',
    'log_chicken_dishes': 'Log Chicken Dishes',
    'log_chinese': 'Log Chinese',
    'log_desserts': 'Log Desserts',
    'log_healthy': 'Log Healthy',
    'log_indian': 'Log Indian',
    'log_italian': 'Log Italian',
    'log_japanese': 'Log Japanese',
    'log_noodle_dishes': 'Log Noodle Dishes',
    'log_other': 'Log Other Cuisines',
    'log_street_food_snacks': 'Log Street Food & Snacks',
    'log_thai': 'Log Thai',
    'log_total_amt': 'Log Total Amount',
    'log_n_order': 'Log Order Count',
    'log_avg_amt_per_product': 'Log Avg Amount per Product',
    'log_avg_amt_per_order': 'Log Avg Amount per Order',
    'log_avg_amt_per_vendor': 'Log Avg Amount per Vendor',
    'log_n_days_week': 'Log Days of Week Ordered Count',
    'log_n_times_day': 'Log Hours Ordered Count',
    'foodie_flag': 'Is Foodie',
    'gluttonous_flag': 'Is Gluttonous',
    'loyal_flag': 'Is Loyal',
    'top_cuisine': 'Top Cuisine',
    'avg_amt_per_day': 'Avg Amount Spent per Day',
    'avg_product_per_day': 'Avg Products Ordered per Day',
    'avg_order_per_day': 'Avg Orders Placed per Day',
    'age_bucket': 'Age Bucket',
    'transaction_volume': 'Transaction Volume',
    'interaction_rate': 'Interaction Rate'
}

In [983]:
data.rename(columns=mapping_dict, inplace=True)

# 3. Clustering

In [984]:
encoded_features = [
    *[col for col in regulars.columns if col.startswith('cust_region')],
    *[col for col in regulars.columns if col.startswith('last_promo')],
    *[col for col in regulars.columns if col.startswith('pay_method')],
    *[col for col in regulars.columns if col.startswith('cust_city')],
    *[col for col in regulars.columns if col.startswith('age_bucket')]
]

In [985]:
encoder = joblib.load('models/hot_encoder.pkl')

regulars = pd.concat([
    regulars[[col for col in regulars.columns if col not in encoded_features]],
    pd.DataFrame(
        encoder.inverse_transform(regulars[encoded_features]),
        columns=['cust_region', 'last_promo', 'pay_method', 'cust_city', 'age_bucket'],
        index=regulars.index
    )    
], axis=1)

## 3.1. Spending Diversity

In [986]:
spending_diversity_features = ['total_amt', 'n_cuisines', 'n_vendor', 'n_product']
spending_diversity_df = regulars[spending_diversity_features].copy()

spending_diversity_algorithm = joblib.load('models/spending_clustering.pkl')

spending_labels = spending_diversity_algorithm.predict(spending_diversity_df)

spending_diversity_df = pd.concat([
    spending_diversity_df,
    pd.Series(spending_labels, name='labels', index=spending_diversity_df.index),
    regulars[[col for col in regulars.columns if col not in spending_diversity_features]]
], axis=1)

In [987]:
spending_diversity_profiling = pd.concat([
    spending_diversity_df['labels'],
    encoder.transform(regulars[['cust_region', 'last_promo', 'pay_method', 'cust_city', 'age_bucket']])
], axis=1)

In [988]:
std_scaler = joblib.load('models/std_scaler.pkl')

std_scaler_features = [
    'cust_age', 'n_vendor', 'n_product', 'n_chain', 'first_order', 'last_order', 'american', 'asian', 'beverages', 'cafe',
    'chicken_dishes', 'chinese', 'desserts', 'healthy', 'indian', 'italian', 'japanese', 'noodle_dishes', 'other', 'street_food_snacks', 'thai',
    'DOW_0', 'DOW_1', 'DOW_2', 'DOW_3', 'DOW_4', 'DOW_5', 'DOW_6', 'HR_0', 'HR_1', 'HR_2', 'HR_3', 'HR_4', 'HR_5', 'HR_6', 'HR_7', 'HR_8', 'HR_9',
    'HR_10', 'HR_11', 'HR_12', 'HR_13', 'HR_14', 'HR_15', 'HR_16', 'HR_17', 'HR_18', 'HR_19', 'HR_20', 'HR_21', 'HR_22', 'HR_23', 'total_amt',
    'n_order', 'avg_amt_per_product', 'avg_amt_per_order', 'avg_amt_per_vendor', 'days_cust', 'avg_days_to_order', 'days_due',
    'per_chain_order', 'n_days_week', 'n_times_day', 'n_cuisines', 'log_n_vendor', 'log_n_product', 'log_n_chain', 'log_american',
    'log_asian', 'log_beverages', 'log_cafe', 'log_chicken_dishes', 'log_chinese', 'log_desserts', 'log_healthy', 'log_indian',
    'log_italian', 'log_japanese', 'log_noodle_dishes', 'log_other', 'log_street_food_snacks', 'log_thai', 'log_total_amt', 'log_n_order',
    'log_avg_amt_per_product', 'log_avg_amt_per_order', 'log_avg_amt_per_vendor', 'log_n_days_week', 'log_n_times_day',
    'avg_amt_per_day', 'avg_product_per_day', 'avg_order_per_day'
]

spending_diversity_df = pd.concat([
    pd.DataFrame(std_scaler.inverse_transform(spending_diversity_df[std_scaler_features]), columns=std_scaler_features, index=spending_diversity_df.index),
    spending_diversity_df.drop(columns=std_scaler_features)
], axis=1)

In [989]:
spending_diversity_df.drop(columns=[col for col in spending_diversity_df.columns if col.startswith('log_')], inplace=True)

In [990]:
spending_diversity_df.rename(columns=mapping_dict, inplace=True)
spending_diversity_features = [mapping_dict.get(val, val) for val in spending_diversity_features]

## 3.2. Geography

In [991]:
geography_features = ['per_chain_order', 'log_total_amt', 'avg_amt_per_product', 'n_cuisines', 'cust_city_2.0', 'cust_city_4.0', 'cust_city_8.0']

geography_regulars = pd.concat([
    regulars.drop(columns='cust_city'),
    encoder.transform(regulars[['cust_region', 'last_promo', 'pay_method', 'cust_city', 'age_bucket']])
], axis=1)

geography_df = geography_regulars[geography_features].copy()

n_clusters = 3
spectral_df = geography_df.copy().sample(n=3000, random_state=1) 
arr_spectral_df = spectral_df.values

rbf_param = 3.141542

gower_dist = gower_matrix(arr_spectral_df)

K = np.exp(-rbf_param * gower_dist)
D = K.sum(axis=1)
D = np.sqrt(1/D)
M = np.multiply(D[np.newaxis, :], np.multiply(K, D[np.newaxis, :]))

U, Sigma, _ = svd(M, full_matrices=False, lapack_driver='gesvd')
Usubset = U[:, :n_clusters]

In [992]:
geography_algorithm = joblib.load('models/spectral_clustering.pkl')

geography_labels = geography_algorithm.predict(normalize(Usubset))

In [993]:
geography_df = pd.concat([
    spectral_df,
    pd.Series(geography_labels, name='labels', index=spectral_df.index),
    geography_regulars[[col for col in regulars.columns if col not in geography_features and col != 'cust_city']].loc[spectral_df.index]
], axis=1)

In [994]:
geography_profiling = pd.concat([
    geography_df['labels'],
    encoder.transform(regulars[['cust_region', 'last_promo', 'pay_method', 'cust_city', 'age_bucket']])
], axis=1)

In [995]:
geography_df = pd.concat([
    pd.DataFrame(std_scaler.inverse_transform(geography_df[std_scaler_features]), columns=std_scaler_features, index=geography_df.index),
    geography_df.drop(columns=std_scaler_features)
], axis=1)

In [996]:
geography_df.drop(columns=[col for col in geography_df.columns if col.startswith('log_') and col != 'log_total_amt'], inplace=True)

In [997]:
geography_df.rename(columns=mapping_dict, inplace=True)
geography_df.rename(columns={'cust_city_2.0': 'City 2', 'cust_city_4.0': 'City 4', 'cust_city_8.0': 'City 8'}, inplace=True)

geography_features = [mapping_dict.get(val, val) for val in geography_features]
geography_features[4:] = ['City 4', 'City 8', 'City 2']

## 3.3. Cuisines

In [998]:
cuisines_features = [
    'log_american', 'log_asian', 'log_beverages', 'log_cafe', 'log_chinese', 'log_desserts', 'log_healthy', 'log_indian',
    'log_italian', 'log_japanese', 'log_noodle_dishes', 'log_other', 'log_street_food_snacks', 'log_thai', 'log_chicken_dishes'
]

cuisines_factors = regulars[cuisines_features].copy()

spca = joblib.load('models/spca.pkl')

spca_array = spca.transform(cuisines_factors)

In [999]:
spca_df = pd.DataFrame(
    spca_array,
    columns=[f"Component_{i+1}" for i in range(2)],
    index=cuisines_factors.index
)

In [1000]:
columns_to_add = ['log_total_amt', 'log_avg_amt_per_product']

cuisines_df = pd.concat([
    spca_df,
    regulars[columns_to_add]
], axis=1)

In [1001]:
cuisines_algorithm = joblib.load('models/cuisine_clustering.pkl')

cuisines_labels = cuisines_algorithm.predict(cuisines_df)

cuisines_df = pd.concat([
    cuisines_df,
    pd.Series(cuisines_labels, name='labels', index=cuisines_df.index),
    regulars[[col for col in regulars.columns if col not in columns_to_add]]
], axis=1)

In [1002]:
cuisines_profiling = pd.concat([
    cuisines_df['labels'],
    encoder.transform(regulars[['cust_region', 'last_promo', 'pay_method', 'cust_city', 'age_bucket']])
], axis=1)

In [1003]:
cuisines_df = pd.concat([
    pd.DataFrame(std_scaler.inverse_transform(cuisines_df[std_scaler_features]), columns=std_scaler_features, index=cuisines_df.index),
    cuisines_df.drop(columns=std_scaler_features)
], axis=1)

In [1004]:
cuisines_df.drop(columns=[col for col in cuisines_df.columns if col.startswith('log_') and col not in columns_to_add], inplace=True)

In [1005]:
cuisines_df.rename(columns=mapping_dict, inplace=True)
cuisines_features = spca_df.columns.tolist() + [mapping_dict.get(item, item) for item in columns_to_add]

## 3.4. Time

In [1006]:
regulars_time_df = pd.concat([
    pd.DataFrame(std_scaler.inverse_transform(regulars[std_scaler_features]), columns=std_scaler_features, index=regulars.index),
    regulars.drop(columns=std_scaler_features)
], axis=1)

In [1007]:
time_features = [
    *regulars.columns[28: 52].tolist(),
    *regulars.columns[21: 28].tolist()
]
time_df = regulars_time_df[time_features].copy()

minmax_scaler = joblib.load('models/minmax_scaler.pkl')

time_df = minmax_scaler.transform(time_df)

In [1008]:
nmf = joblib.load('models/nmf.pkl')

H = nmf.components_.round(decimals=3)
H_df = pd.DataFrame(H, columns=time_df.columns, index=[f"Factor_{i+1}" for i in range(H.shape[0])]).T.reset_index().rename(columns={'index': 'Feature'})

W = nmf.transform(time_df).round(decimals=3)
W_df = pd.DataFrame(W, columns=[f"Factor_{i+1}" for i in range(W.shape[1])], index=time_df.index)

columns_to_add = ['total_amt', 'avg_amt_per_product', 'n_chain', 'n_cuisines']
additional_data = minmax_scaler.fit_transform(regulars_time_df[columns_to_add])

# Concatenate the transformed data with the additional columns
time_df = pd.concat([W_df, additional_data], axis=1)

In [1010]:
sm = joblib.load('models/minisom.pkl')

weights_flat = sm.get_weights().reshape((5 * 5), len(time_df.columns))

In [1011]:
time_algorithm = joblib.load('models/hour_clustering.pkl')

time_labels = time_algorithm.predict(weights_flat)
kmeans_matrix = time_labels.reshape((5, 5))
bmu_index = np.array([sm.winner(x) for x in time_df.values])

som_final_labels = [kmeans_matrix[i[0]][i[1]] for i in bmu_index]

time_df = pd.concat([
    time_df.drop(columns=columns_to_add),
    pd.Series(som_final_labels, name='labels', index=time_df.index),
    regulars_time_df
], axis=1)

In [1012]:
time_profiling = pd.concat([
    time_df['labels'],
    encoder.transform(regulars[['cust_region', 'last_promo', 'pay_method', 'cust_city', 'age_bucket']])
], axis=1)

In [1013]:
time_df.drop(columns=[col for col in time_df.columns if col.startswith('log_')], inplace=True)

In [1014]:
time_df.rename(columns=mapping_dict, inplace=True)
time_features = time_df.columns[:4].tolist() + [mapping_dict.get(item, item) for item in columns_to_add]

# 4. Building the Dashboard

In [1015]:
non_metric_features = [
    'Region', 'Promotion', 'Payment Method', 'City', 'Age Bucket', 
    'Is Regular', 'Is Foodie', 'Is Gluttonous', 'Is Loyal', 'Top Cuisine'
]
no_categorical = [col for col in data.columns if col not in non_metric_features]

In [1673]:
# Initialize the app
external_stylesheet = [dbc.themes.VAPOR]
app = Dash(__name__, external_stylesheets=external_stylesheet, suppress_callback_exceptions=True)

In [1674]:
# App layout
app.layout = html.Div([
    dbc.Navbar(
        dbc.Container([
            dbc.Nav([
                dbc.NavItem(dbc.NavLink("Home", href="/", id='home-button'), style={'margin-right': '15px'}),
                dbc.DropdownMenu([
                    dbc.DropdownMenuItem("Basic Exploration", href="/single-feature", id='single-feature-button'),
                    dbc.DropdownMenuItem("Pairplot Exploration", href="/pairplot", id='pairplot-button'),
                ], label="Exploration", nav=True, in_navbar=True, style={'margin-right': '15px'}),
                dbc.DropdownMenu([
                    dbc.DropdownMenuItem("Spending Diversity", href="/spending", id='spending-diversity-button'),
                    dbc.DropdownMenuItem("Geography", href="/geography", id='geography-button'),
                    dbc.DropdownMenuItem("Cuisines", href="/cuisines", id='cuisines-button'),
                    dbc.DropdownMenuItem("Time", href="/time", id='time-button'),
                ], label='Clustering', nav=True, in_navbar=True)
            ], className='mr-aulo', pills=True),
            dbc.NavbarBrand("ABCDEats, Inc.", href="#", className='ml-aulo')
        ]), color='primary', dark=True
    ),
    dcc.Location(id='url', refresh=False),
    html.Div(id='page-content')
])

## Home

In [1675]:
home_layout = html.Div([
    dbc.Container([
        dbc.Row([
            html.H1('ABCDEats, Inc. Visual Interface', className="text-center"),
            html.H1('for Data Visualization and Clustering Exploration', className="text-center", style={'margin-bottom': '30px'}),
            html.H2('Project Developed By Martins & Fonseca Consulting', className="text-primary text-center"),
            html.H3('MDSAA-DS Data Mining 2024/2025', className="text-primary text-center")
        ]),
        dbc.Row(style={'height': '50px'}),
        dbc.Row([
            html.P(
                "After exploring and segmenting ABDCEats, Inc.'s customers, we have decided to develop this tool that takes the user\
                experience of navigating accross the data to the next level, allowing you, the decision maker, to get insights quicker\
                and with greater precision than ever before. With this application you can:"
            , style={'textAlign': 'justify', 'color': 'white'}),
            html.Ul(
                children=[
                    html.Li("Explore each feature alone and against others;"),
                    html.Li("Explore each clustering perspective; and"),
                    html.Li("Profile any new customer based on any features you desire.")
                ]
            , style={'margin-bottom': '15px', 'margin-left': '50px', 'color': 'white'})
        ]),
        dbc.Row([
            dbc.Col([
                html.H4("Segmentation Overview", className='text-secondary', style={'margin-bottom': '15px'}),
            ],width=6),
            dbc.Col([
                dash_table.DataTable(
                    data=H_df.to_dict('records'), columns=[{'name': col, 'id': col} for col in H_df.columns], id='home-nmf-table',
                    style_header={'backgroundColor': '#30115E', 'color': 'white', 'textAlign': 'left'},
                    style_cell={'backgroundColor': '#6F42C1', 'color': 'white', 'textAlign': 'left'},
                ),
                html.Label("Customer Hourly Segmentation")
            ], width=6)
        ])
    ], style={'padding': '20px'})
])

## Single Feature

In [1676]:
single_feature = html.Div([
    dbc.Container([
        dbc.Row([
            html.Div('Basic Exploration', className="text-primary text-center fs-3", style={'margin-bottom': '15px'})
        ]),
        dbc.Row([
            dbc.Col([
                html.Label("Select a Feature to Display:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='column-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col(id='slider-container')
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='condition-column-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='condition-dropdown',
                    options=[],
                    value=None,
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='filter-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='apply-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear All Filters', id='clear-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3, style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ]),
        dbc.Row([
            dbc.Col([
                dcc.Graph(figure={}, id='col_histogram')
            ], width=6)
            ,dbc.Col([
                dcc.Graph(figure={}, id='col_boxplot')
            ], width=6)
        ])
    ], style={'padding': '20px'})
])

In [1677]:
# Show slider/checklist
@callback(
    Output('slider-container', 'children'),
    Input('column-dropdown', 'value')
)

def update_slider(col_chosen):
    if col_chosen in no_categorical:
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dcc.RangeSlider(
                id='value-input',
                min=data[col_chosen].min(),
                max=data[col_chosen].max(),
                step=1,
                tooltip={"always_visible": False, "placement": "bottom"},
                marks={i: str(i) for i in range(int(data[col_chosen].min()), int(data[col_chosen].max()) + 1, 10)},
                value=[data[col_chosen].min(), data[col_chosen].max()]
            )
        ]
    else:
        unique_values = data[col_chosen].unique().tolist()

        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dcc.Checklist(
                id='value-input',
                options=[{'label': html.Label(val, style={'margin-right': '15px', 'margin-left': '5px'}), 'value': val} for val in unique_values],
                value=unique_values,
                inline=True
            )
        ]

In [1678]:
# Condition Interaction
@callback(
    Output('condition-dropdown', 'options'),
    Output('condition-dropdown', 'value'),
    Input('condition-column-dropdown', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [1679]:
@callback(
    Output('filter-input-container', 'children'),
    Input('condition-column-dropdown', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Input(id='filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = data[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Select(
                id='filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [1680]:
# Graphs Interaction
@callback(
    [Output('col_histogram', 'figure'),
    Output('col_boxplot', 'figure'),
    Output('apply-button', 'n_clicks'),
    Output('clear-button', 'n_clicks')],
    [Input('column-dropdown', 'value'),
    Input('value-input', 'value'),
    Input('condition-column-dropdown', 'value'),
    Input('condition-dropdown', 'value'),
    Input('filter-input', 'value'),
    Input('apply-button', 'n_clicks'),
    Input('clear-button', 'n_clicks')]
)

def update_graph(col_chosen, value_input, col_condition, condition, value, n_clicks, n_clicks_clear):
        
    if n_clicks_clear > 0:
        # Determine filtering logic for the input column
        if col_chosen in no_categorical:
            filtered_df = data[(data[col_chosen] >= value_input[0]) & (data[col_chosen] <= value_input[1])]
        elif col_chosen in non_metric_features:
            filtered_df = data[data[col_chosen].isin(value_input)]
        else:
            filtered_df = data
            
    else:
        # Determine filtering logic for the input column
        if col_chosen in no_categorical:
            filtered_df = data[(data[col_chosen] >= value_input[0]) & (data[col_chosen] <= value_input[1])]
        elif col_chosen in non_metric_features:
            filtered_df = data[data[col_chosen].isin(value_input)]
        else:
            filtered_df = data

        # Determine the filtering logic for the filtering column
        if value and n_clicks > 0:
            if condition == 'is':
                filtered_df = filtered_df[filtered_df[col_condition].astype('str') == str(value)]
            
            elif condition == 'is not':
                filtered_df = filtered_df[filtered_df[col_condition].stype('str') != str(value)]

            elif condition == 'greater than':
                filtered_df = filtered_df[filtered_df[col_condition] > float(value)]

            elif condition == 'less than':
                filtered_df = filtered_df[filtered_df[col_condition] < float(value)]

            elif condition == 'equal to':
                filtered_df = filtered_df[filtered_df[col_condition] == float(value)]

            elif condition == 'greater than or equal to':
                filtered_df = filtered_df[filtered_df[col_condition] >= float(value)]

            elif condition == 'less than or equal to':
                filtered_df = filtered_df[filtered_df[col_condition] <= float(value)]

    fig_hist = px.histogram(filtered_df, x=col_chosen)
    if col_chosen in no_categorical:
        fig_box = px.box(filtered_df, y=col_chosen)  # TODO: change color
        fig_box.update_traces(marker=dict(color="#E145B4"))
    else:
        # Create an empty figure with a transparent rectangle
        fig_box = go.Figure()

        fig_box.update_layout(
            shapes=[
                go.layout.Shape(
                    type="rect",
                    x0=0, x1=1, y0=0, y1=1,
                    xref="paper", yref="paper",
                    line=dict(color="white"),  # This draws a white rectangle
                    fillcolor="white"  # Transparent rectangle
                )
            ], xaxis=dict(showline=False, showgrid=False, zeroline=False, showticklabels=False),  # Hide x-axis
            yaxis=dict(showline=False, showgrid=False, zeroline=False, showticklabels=False)   # Hide y-axis
        )
    
    return fig_hist, fig_box, 0, 0

## Pairplot

In [1681]:
pairplot = html.Div([
    dbc.Container([
        dbc.Row([
            html.Div('Pairplot Exploration', className="text-primary text-center fs-3", style={'margin-bottom': '15px'})
        ]),
        dbc.Row([
            dbc.Col([
                html.Label("Select a Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='pairplot-feature-1',  # Ensure this ID is correctly defined
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width=3),
            dbc.Col([
                html.Label("Select another Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='pairplot-feature-2',  # Ensure this ID is correctly defined
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width=3)
        ], style={'margin-bottom': '15px'}),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='filter-column-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns],
                    value=data.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='filter-condition-dropdown',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='pairplot-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            dbc.Col([
                dcc.Graph(figure={}, id='graph-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ])
    ], style={'padding': '20px'})
])

In [1682]:
# Condition Interaction
@callback(
    Output('filter-condition-dropdown', 'options'),
    Output('filter-condition-dropdown', 'value'),
    Input('filter-column-dropdown', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [1683]:
@callback(
    Output('pairplot-input-container', 'children'),
    Input('filter-column-dropdown', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Input(id='pairplot-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = data[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", style={'margin-bottom': '10px'}),
            dbc.Select(
                id='pairplot-filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [1684]:
# Callback to handle filtering and updating the pairplot
@callback(
    Output('graph-pairplot', 'figure'),
    Output('apply-filter-button', 'n_clicks'),
    Output('clear-filter-button', 'n_clicks'),
    Output('pairplot-filter-input', 'value'),
    Input('pairplot-feature-1', 'value'),
    Input('pairplot-feature-2', 'value'),
    Input('filter-column-dropdown', 'value'),
    Input('filter-condition-dropdown', 'value'),
    Input('pairplot-filter-input', 'value'),
    Input('apply-filter-button', 'n_clicks'),
    Input('clear-filter-button', 'n_clicks')
)

def update_pairplot(feature_1, feature_2, filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = data.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]

    # Create the pairplot based on the filtered data
    pairplot_figure = px.scatter_matrix(filtered_df, dimensions=[feature_1, feature_2])

    return pairplot_figure, 0, 0, filter_value  # Return the updated figure and reset the filter input

## Clustering

### Spending Diversity

In [1685]:
spending = html.Div([
    dbc.Container([
        dbc.Row([
            html.H1('Spending Diversity Cluster Exploration', className="text-center", style={'margin-bottom': '15px'})
        ]),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='spending-filter-column',
                    options=[{'label': col, 'value': col} for col in spending_diversity_df.columns if col != 'labels'],
                    value=spending_diversity_df.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='spending-filter-condition',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='spending-filter-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='spending-apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='spending-clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            html.H2("Cluster Visualization", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='graph-spending-diversity-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',
                'align-items': 'center',
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling Heatmap", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='profiling-spending-heatmap')
            ], style={
                'display': 'flex',
                'justify-content': 'left',
                'align-items': 'center',
            }, width=7),
            dbc.Col([
                dcc.Markdown('''
                    **Cluster 0 – Adventurous High-Spenders:** These customers are explorers, frequently trying new cuisines
                    and vendors. They also buy many products and spend a significant amount of money. They value
                    variety and have high purchasing power, likely enjoying discovering new options and experiences.
                             
                    **Cluster 1 – Loyal High-Spenders:** These customers spend a lot of money and buy many products but
                    prefer sticking to familiar cuisines and vendors. They exhibit loyalty to a select range of offerings while
                    demonstrating significant spending capacity.
                             
                    **Cluster 2 – Low-Spending Minimalists:** These customers have the lowest spending, try the fewest
                    cuisines and vendors, and purchase the least products. They are cost-conscious and not very
                    exploratory, possibly focusing on essentials or sticking to a routine.
                             
                    **Cluster 3 – Exploratory Budget-Conscious:** These customers enjoy trying different cuisines and
                    vendors but do not purchase many products or spend much money. They prioritize variety and
                    experiences but are budget-conscious or limit their purchases.
                ''')
            ], width=5)
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling by Category", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                html.Label("Select a Profiling Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='spending-profiling-select',
                    options=[{'label': col, 'value': col} for col in non_metric_features[:5]],
                    style={'margin-bottom': '15px'}
            )], width=3),
            dash_table.DataTable(data=pd.DataFrame().to_dict('records'), columns=[], id='spending-profiling-table', style_table={},
                style_header={'backgroundColor': '#30115E', 'color': 'white', 'textAlign': 'left'}, style_cell={'backgroundColor': '#6F42C1', 'color': 'white'},
                style_data_conditional=[]
            )
        ])
    ], style={'padding': '20px'})
])

In [1686]:
@callback(
    Output('spending-profiling-table', 'data'),
    Output('spending-profiling-table', 'columns'),
    Output('spending-profiling-table', 'style_data_conditional'),
    Output('spending-profiling-table', 'style_table'),
    Input('spending-profiling-select', 'value')
)

def update_profiling_table(feature):
    if feature is not None:
        feature = next((key for key, value in mapping_dict.items() if value == feature), None)
        
        df = spending_diversity_profiling.groupby('labels')[[col for col in spending_diversity_profiling.columns if col.startswith(feature)]].mean().round(2).reset_index()

        headers = df.columns

        headers = [value.replace(f'{feature}_', '').split('.')[0].replace('_', ' ') for value in headers[1:]]

        feature = mapping_dict.get(feature, feature)

        headers.insert(0, f'Labels (% {feature})')

        df.columns = headers

        columns = [{'name': col, 'id': col} for col in df.columns]

        cell_style = [{'if': {'column_id': col}, 'width': 'auto', 'whiteSpace': 'normal', 'textAlign': 'left'} for col in df.columns]

        table_style = {
            'border': '1px solid white',
            'width': '50%',  # Allow table to take the minimum width required
            'overflowX': 'auto',  # Enable horizontal scrolling if content overflows
            'padding': '0px',  # Remove padding to reduce extra space
            'margin': '0px',  # Remove margin to reduce extra space
        }
        
        return df.to_dict('records'), columns, cell_style, table_style
    
    else:
        return pd.DataFrame().to_dict('records'), [], [], {}

In [1687]:
# Condition Interaction
@callback(
    Output('spending-filter-condition', 'options'),
    Output('spending-filter-condition', 'value'),
    Input('spending-filter-column', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [1688]:
@callback(
    Output('spending-filter-input-container', 'children'),
    Input('spending-filter-column', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Input(id='spending-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = spending_diversity_df[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Select(
                id='spending-filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [1689]:
@callback(
    Output('graph-spending-diversity-pairplot', 'figure'),
    Output('profiling-spending-heatmap', 'figure'),
    Output('spending-apply-filter-button', 'n_clicks'),
    Output('spending-clear-filter-button', 'n_clicks'),
    Output('spending-filter-input', 'value'),
    Input('spending-filter-column', 'value'),
    Input('spending-filter-condition', 'value'),
    Input('spending-filter-input', 'value'),
    Input('spending-apply-filter-button', 'n_clicks'),
    Input('spending-clear-filter-button', 'n_clicks')
)

def update_spending_graph(filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = spending_diversity_df.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]

    filtered_df['labels'] = filtered_df['labels'].astype(str)
    
    combinations = list(itertools.combinations(spending_diversity_features, 2))
    n_combinations = len(combinations)

    # Define grid layout
    n_cols = 3
    n_rows = (n_combinations + n_cols - 1) // n_cols

    used_labels = set()

    # Create subplots
    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[f"{x} vs {y}" for x, y in combinations]
    )

    # Add scatter plots using Plotly Express
    row = col = 1
    for feature_x, feature_y in combinations:
        scatter_fig = px.scatter(
            filtered_df,
            x=feature_x,
            y=feature_y,
            color='labels',
            opacity=0.5,
            color_discrete_sequence=px.colors.qualitative.Vivid
        )

        # Add traces from Plotly Express figure to the subplot
        for trace in scatter_fig.data:
            if trace.name in used_labels:
                trace.showlegend = False
            else:
                used_labels.add(trace.name)

            fig.add_trace(trace, row=row, col=col)
        
        # Update row and column indices
        col += 1
        if col > n_cols:
            col = 1
            row += 1

    # Update layout
    fig.update_layout(
        height=n_rows * 400,
        width=1000,
        showlegend=True,
        legend_title_text='Clusters'
    )

    hm = px.imshow(
        filtered_df[spending_diversity_features + ['labels']].groupby('labels').mean().T,
        text_auto=".2f",
        color_continuous_scale="rdylgn",
        labels={"x": "Cluster Labels", "y": "Features", "color": "Mean Value"}
    )

    hm.update_layout(
        xaxis_title="Cluster Labels",
        yaxis_title="Features",
    )

    return fig, hm, 0, 0, filter_value

### Geography

In [1690]:
geography = html.Div([
    dbc.Container([
        dbc.Row([
            html.H1('Geography Cluster Exploration', className="text-center", style={'margin-bottom': '15px'})
        ]),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='geography-filter-column',
                    options=[{'label': col, 'value': col} for col in geography_df.columns if col != 'labels'],
                    value=geography_df.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='geography-filter-condition',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='geography-filter-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='geography-apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='geography-clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            html.H2("Cluster Visualization", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='graph-geography-diversity-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',
                'align-items': 'center',
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling Heatmap", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='profiling-geography-heatmap')
            ], style={
                'display': 'flex',
                'justify-content': 'left',
                'align-items': 'center',
            }, width=7),
            dbc.Col([
                dcc.Markdown('''
                    **Cluster 0 - City 8:** Customers have a below average propensity to consume from chained restaurants, while having
                    a high propensity to spend more both in aggregate as well as per product.
                             
                    **Cluster 1 - City4:** Customers display a moderate propensity towards spending, as well as a moderate interest in
                    experimenting with new cuisines.
                             
                    **Cluster 2 - City2:** Customers show a preference for chained restaurant food, having a preference for spending
                    less in aggregate than their peers, and less on each product, while being the ones that tend to purchase
                    from the highest number of cuisines.
                ''')
            ], width=5)
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling by Category", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                html.Label("Select a Profiling Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='geography-profiling-select',
                    options=[{'label': col, 'value': col} for col in non_metric_features[:5]],
                    style={'margin-bottom': '15px'}
            )], width=3),
            dash_table.DataTable(data=pd.DataFrame().to_dict('records'), columns=[], id='geography-profiling-table', style_table={},
                style_header={'backgroundColor': '#30115E', 'color': 'white', 'textAlign': 'left'}, style_cell={'backgroundColor': '#6F42C1', 'color': 'white'},
                style_data_conditional=[]
            )
        ])
    ], style={'padding': '20px'})
])

In [1691]:
@callback(
    Output('geography-profiling-table', 'data'),
    Output('geography-profiling-table', 'columns'),
    Output('geography-profiling-table', 'style_data_conditional'),
    Output('geography-profiling-table', 'style_table'),
    Input('geography-profiling-select', 'value')
)

def update_profiling_table(feature):
    if feature is not None:
        feature = next((key for key, value in mapping_dict.items() if value == feature), None)
        
        df = geography_profiling.groupby('labels')[[col for col in geography_profiling.columns if col.startswith(feature)]].mean().round(2).reset_index()

        headers = df.columns

        headers = [value.replace(f'{feature}_', '').split('.')[0].replace('_', ' ') for value in headers[1:]]

        feature = mapping_dict.get(feature, feature)

        headers.insert(0, f'Labels (% {feature})')

        df.columns = headers

        columns = [{'name': col, 'id': col} for col in df.columns]

        cell_style = [{'if': {'column_id': col}, 'width': 'auto', 'whiteSpace': 'normal', 'textAlign': 'left'} for col in df.columns]

        table_style = {
            'border': '1px solid white',
            'width': '50%',  # Allow table to take the minimum width required
            'overflowX': 'auto',  # Enable horizontal scrolling if content overflows
            'padding': '0px',  # Remove padding to reduce extra space
            'margin': '0px',  # Remove margin to reduce extra space
        }
        
        return df.to_dict('records'), columns, cell_style, table_style
    
    else:
        return pd.DataFrame().to_dict('records'), [], [], {}

In [1692]:
# Condition Interaction
@callback(
    Output('geography-filter-condition', 'options'),
    Output('geography-filter-condition', 'value'),
    Input('geography-filter-column', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical or col_chosen in geography_features:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [1693]:
@callback(
    Output('geography-filter-input-container', 'children'),
    Input('geography-filter-column', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical or col_chosen in geography_features:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Input(id='geography-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = geography_df[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Select(
                id='geography-filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [1694]:
@callback(
    Output('graph-geography-diversity-pairplot', 'figure'),
    Output('profiling-geography-heatmap', 'figure'),
    Output('geography-apply-filter-button', 'n_clicks'),
    Output('geography-clear-filter-button', 'n_clicks'),
    Output('geography-filter-input', 'value'),
    Input('geography-filter-column', 'value'),
    Input('geography-filter-condition', 'value'),
    Input('geography-filter-input', 'value'),
    Input('geography-apply-filter-button', 'n_clicks'),
    Input('geography-clear-filter-button', 'n_clicks')
)

def update_geography_graph(filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = geography_df.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]

    filtered_df['labels'] = filtered_df['labels'].astype(str)
    
    combinations = list(itertools.combinations(geography_features, 2))
    n_combinations = len(combinations)

    # Define grid layout
    n_cols = 3
    n_rows = (n_combinations + n_cols - 1) // n_cols

    used_labels = set()

    # Create subplots
    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[f"{x} vs {y}" for x, y in combinations]
    )

    # Add scatter plots using Plotly Express
    row = col = 1
    for feature_x, feature_y in combinations:
        scatter_fig = px.scatter(
            filtered_df,
            x=feature_x,
            y=feature_y,
            color='labels',
            opacity=0.5,
            color_discrete_sequence=px.colors.qualitative.Vivid
        )

        # Add traces from Plotly Express figure to the subplot
        for trace in scatter_fig.data:
            if trace.name in used_labels:
                trace.showlegend = False
            else:
                used_labels.add(trace.name)

            fig.add_trace(trace, row=row, col=col)
        
        # Update row and column indices
        col += 1
        if col > n_cols:
            col = 1
            row += 1

    # Update layout
    fig.update_layout(
        height=n_rows * 400,
        width=1300,
        showlegend=True,
        legend_title_text='Clusters'
    )

    hm = px.imshow(
        filtered_df[geography_features + ['labels']].groupby('labels').mean().T,
        text_auto=".2f",
        color_continuous_scale="rdylgn",
        labels={"x": "Cluster Labels", "y": "Features", "color": "Mean Value"}
    )

    hm.update_layout(
        xaxis_title="Cluster Labels",
        yaxis_title="Features",
    )

    return fig, hm, 0, 0, filter_value

### Cuisines

In [1695]:
cuisines = html.Div([
    dbc.Container([
        dbc.Row([
            html.H1('Cuisines Cluster Exploration', className="text-center", style={'margin-bottom': '15px'})
        ]),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='cuisines-filter-column',
                    options=[{'label': col, 'value': col} for col in cuisines_df.columns if col != 'labels'],
                    value=cuisines_df.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='cuisines-filter-condition',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='cuisines-filter-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='cuisines-apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='cuisines-clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            html.H2("Cluster Visualization", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='graph-cuisines-diversity-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',
                'align-items': 'center',
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling Heatmap", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='profiling-cuisines-heatmap')
            ], style={
                'display': 'flex',
                'justify-content': 'left',
                'align-items': 'center',
            }, width=7),
            dbc.Col([
                dcc.Markdown('''
                    **Component 1 – Preference for Casual and Street-Style Dining:** High contribution from Asian,
                    Beverages, Desserts and Street Food and Snacks; and negative contribution from Cafe, Indian, Italian,
                    Other and Thai. This may indicate a preference for casual, street-style dining over more formal dining
                    options.
                             
                    **Component 2 – Preference for Comfort/Chinese-style Meals:** High contribution from Chinese, Noodle
                    Dishes, Chicken Dishes and Other; and negative contribution from Asian, Street Food and Snacks,
                    American, Cafe and Italian. This may indicate a preference for comfort/chinese-style meals.
                             
                    **Cluster 0 - Grab to Go:** was associated with negative values for components 1 and 2 of the sPCA and marginally
                    positive values for the log variables, implying a small propensity to cuisines that have negative
                    coefficients in both components 1 and 2.
                             
                    **Cluster 1 - Comfort:** was associated with a large propensity towards consumption, with high logged feature
                    values, and a high coefficient for component 1, and negative coefficient for component 2, meaning
                    these customers display a strong preferences for positive coefficient cuisines in component 1 and
                    negative coefficient cuisines in component 2.
                             
                    **Cluster 2 - Italian American:** shows a strong preference for component 2 positive coefficients and is only slightly positively
                    associated with component 1 negative coefficient cuisines, while exhibiting a trend to expend as little
                    as possible and opt for cheap products.
                ''')
            ], width=5)
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling by Category", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                html.Label("Select a Profiling Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='cuisines-profiling-select',
                    options=[{'label': col, 'value': col} for col in non_metric_features[:5]],
                    style={'margin-bottom': '15px'}
            )], width=3),
            dash_table.DataTable(data=pd.DataFrame().to_dict('records'), columns=[], id='cuisines-profiling-table', style_table={},
                style_header={'backgroundColor': '#30115E', 'color': 'white', 'textAlign': 'left'}, style_cell={'backgroundColor': '#6F42C1', 'color': 'white'},
                style_data_conditional=[]
            )
        ])
    ], style={'padding': '20px'})
])

In [1696]:
@callback(
    Output('cuisines-profiling-table', 'data'),
    Output('cuisines-profiling-table', 'columns'),
    Output('cuisines-profiling-table', 'style_data_conditional'),
    Output('cuisines-profiling-table', 'style_table'),
    Input('cuisines-profiling-select', 'value')
)

def update_profiling_table(feature):
    if feature is not None:
        feature = next((key for key, value in mapping_dict.items() if value == feature), None)
        
        df = cuisines_profiling.groupby('labels')[[col for col in cuisines_profiling.columns if col.startswith(feature)]].mean().round(2).reset_index()

        headers = df.columns

        headers = [value.replace(f'{feature}_', '').split('.')[0].replace('_', ' ') for value in headers[1:]]

        feature = mapping_dict.get(feature, feature)

        headers.insert(0, f'Labels (% {feature})')

        df.columns = headers

        columns = [{'name': col, 'id': col} for col in df.columns]

        cell_style = [{'if': {'column_id': col}, 'width': 'auto', 'whiteSpace': 'normal', 'textAlign': 'left'} for col in df.columns]

        table_style = {
            'border': '1px solid white',
            'width': '50%',  # Allow table to take the minimum width required
            'overflowX': 'auto',  # Enable horizontal scrolling if content overflows
            'padding': '0px',  # Remove padding to reduce extra space
            'margin': '0px',  # Remove margin to reduce extra space
        }
        
        return df.to_dict('records'), columns, cell_style, table_style
    
    else:
        return pd.DataFrame().to_dict('records'), [], [], {}

In [1697]:
# Condition Interaction
@callback(
    Output('cuisines-filter-condition', 'options'),
    Output('cuisines-filter-condition', 'value'),
    Input('cuisines-filter-column', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical or col_chosen in cuisines_features:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [1698]:
@callback(
    Output('cuisines-filter-input-container', 'children'),
    Input('cuisines-filter-column', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical or col_chosen in cuisines_features:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Input(id='cuisines-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = cuisines_df[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Select(
                id='cuisines-filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [1699]:
@callback(
    Output('graph-cuisines-diversity-pairplot', 'figure'),
    Output('profiling-cuisines-heatmap', 'figure'),
    Output('cuisines-apply-filter-button', 'n_clicks'),
    Output('cuisines-clear-filter-button', 'n_clicks'),
    Output('cuisines-filter-input', 'value'),
    Input('cuisines-filter-column', 'value'),
    Input('cuisines-filter-condition', 'value'),
    Input('cuisines-filter-input', 'value'),
    Input('cuisines-apply-filter-button', 'n_clicks'),
    Input('cuisines-clear-filter-button', 'n_clicks')
)

def update_cuisines_graph(filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = cuisines_df.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]

    filtered_df['labels'] = filtered_df['labels'].astype(str)
    
    combinations = list(itertools.combinations(cuisines_features, 2))
    n_combinations = len(combinations)

    # Define grid layout
    n_cols = 3
    n_rows = (n_combinations + n_cols - 1) // n_cols

    used_labels = set()

    # Create subplots
    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[f"{x} vs {y}" for x, y in combinations]
    )

    # Add scatter plots using Plotly Express
    row = col = 1
    for feature_x, feature_y in combinations:
        scatter_fig = px.scatter(
            filtered_df,
            x=feature_x,
            y=feature_y,
            color='labels',
            opacity=0.5,
            color_discrete_sequence=px.colors.qualitative.Vivid
        )

        # Add traces from Plotly Express figure to the subplot
        for trace in scatter_fig.data:
            if trace.name in used_labels:
                trace.showlegend = False
            else:
                used_labels.add(trace.name)

            fig.add_trace(trace, row=row, col=col)
        
        # Update row and column indices
        col += 1
        if col > n_cols:
            col = 1
            row += 1

    # Update layout
    fig.update_layout(
        height=n_rows * 400,
        width=1300,
        showlegend=True,
        legend_title_text='Clusters'
    )

    hm = px.imshow(
        filtered_df[cuisines_features + ['labels']].groupby('labels').mean().T,
        text_auto=".2f",
        color_continuous_scale="rdylgn",
        labels={"x": "Cluster Labels", "y": "Features", "color": "Mean Value"}
    )

    hm.update_layout(
        xaxis_title="Cluster Labels",
        yaxis_title="Features",
    )

    return fig, hm, 0, 0, filter_value

### Time

In [1700]:
time = html.Div([
    dbc.Container([
        dbc.Row([
            html.H1('Time Cluster Exploration', className="text-center", style={'margin-bottom': '15px'})
        ]),
        # Filter Section
        dbc.Row([
            dbc.Col([
                html.Label("Select a Filter Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='time-filter-column',
                    options=[{'label': col, 'value': col} for col in time_df.columns if col != 'labels'],
                    value=time_df.columns[0],  # Set default value to the first column
                    style={'max-width': '250px'}
                )
            ], width = 3),
            dbc.Col([
                html.Label("Select a Filter Condition:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='time-filter-condition',
                    options=[],
                    value=None,  # Default filter condition
                    style={'margin-bottom': '10px'}
                ),
                html.Div(id='time-filter-input-container')
            ], width=6),
            dbc.Col([
                dbc.Button('Apply Filter', id='time-apply-filter-button', n_clicks=0, style={'margin-right': '10px'}),
                dbc.Button('Clear Filter', id='time-clear-filter-button', n_clicks=0, style={'margin-left': '10px'})
            ], width=3,  style={
                'display': 'flex',
                'justify-content': 'flex-end',  # Centers the button horizontally
                'align-items': 'center',  # Centers the button vertically
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row([
            html.H2("Cluster Visualization", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='graph-time-diversity-pairplot')
            ], style={
                'display': 'flex',
                'justify-content': 'center',
                'align-items': 'center',
            })
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling Heatmap", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                dcc.Graph(figure={}, id='profiling-time-heatmap')
            ], style={
                'display': 'flex',
                'justify-content': 'left',
                'align-items': 'center',
            }, width=7),
            dbc.Col([
                dcc.Markdown('''
                    **Cluster 0 – Premiums:** High loading on avg_amt_per_product. These customers buy expensive
                    products having no time preference. Almost no customers are from city 2.
                    
                    **Cluster 1 – Nighttime Gourmet:** High loadings on Factor 1 and avg_amt_per_product. These customers
                    buy expensive products during the night. These customers are almost exclusively from city 8.
                             
                    **Cluster 2 – Afternoon Moderate Spenders:** Moderate loading on avg_amt_per_product. These
                    customers tend to buy in the mid afternoon and average-priced products. Most customers are from
                    region 2360.
                             
                    **Cluster 3 – Adventurous Workforce:** High loadings on Factor 3 and n_cuisines. These customers like to
                    eat from different cuisines during lunch or dinner times and from chain restaurants. Most customers
                    are from region 2360.
                ''')
            ], width=5)
        ], style={'margin-bottom': '15px'}),
        dbc.Row(style={'height': '15px'}),
        dbc.Row([
            html.H2("Cluster Profiling by Category", className='text-secondary', style={'margin-bottom': '20px'}),
            dbc.Col([
                html.Label("Select a Profiling Feature:", className='text-primary', style={'margin-bottom': '10px'}),
                dbc.Select(
                    id='time-profiling-select',
                    options=[{'label': col, 'value': col} for col in non_metric_features[:5]],
                    style={'margin-bottom': '15px'}
            )], width=3),
            dash_table.DataTable(data=pd.DataFrame().to_dict('records'), columns=[], id='time-profiling-table', style_table={},
                style_header={'backgroundColor': '#30115E', 'color': 'white', 'textAlign': 'left'}, style_cell={'backgroundColor': '#6F42C1', 'color': 'white'},
                style_data_conditional=[]
            )
        ])
    ], style={'padding': '20px'})
])

In [1701]:
@callback(
    Output('time-profiling-table', 'data'),
    Output('time-profiling-table', 'columns'),
    Output('time-profiling-table', 'style_data_conditional'),
    Output('time-profiling-table', 'style_table'),
    Input('time-profiling-select', 'value')
)

def update_profiling_table(feature):
    if feature is not None:
        feature = next((key for key, value in mapping_dict.items() if value == feature), None)
        
        df = time_profiling.groupby('labels')[[col for col in time_profiling.columns if col.startswith(feature)]].mean().round(2).reset_index()

        headers = df.columns

        headers = [value.replace(f'{feature}_', '').split('.')[0].replace('_', ' ') for value in headers[1:]]

        feature = mapping_dict.get(feature, feature)

        headers.insert(0, f'Labels (% {feature})')

        df.columns = headers

        columns = [{'name': col, 'id': col} for col in df.columns]

        cell_style = [{'if': {'column_id': col}, 'width': 'auto', 'whiteSpace': 'normal', 'textAlign': 'left'} for col in df.columns]

        table_style = {
            'border': '1px solid white',
            'width': '50%',  # Allow table to take the minimum width required
            'overflowX': 'auto',  # Enable horizontal scrolling if content overflows
            'padding': '0px',  # Remove padding to reduce extra space
            'margin': '0px',  # Remove margin to reduce extra space
        }
        
        return df.to_dict('records'), columns, cell_style, table_style
    
    else:
        return pd.DataFrame().to_dict('records'), [], [], {}

In [1702]:
# Condition Interaction
@callback(
    Output('time-filter-condition', 'options'),
    Output('time-filter-condition', 'value'),
    Input('time-filter-column', 'value')
)

def update_condition_dropdown(col_chosen):
    if col_chosen in no_categorical or col_chosen in time_features:
        conditions = ['greater than', 'less than', 'equal to', 'greater than or equal to', 'less than or equal to']
    
    else:
        conditions = ['is', 'is not']

    options = [{'label': cond, 'value': cond} for cond in conditions]

    return options, conditions[0] if conditions else None

In [1703]:
@callback(
    Output('time-filter-input-container', 'children'),
    Input('time-filter-column', 'value')
)


def update_filter_input(col_chosen):
    if col_chosen in no_categorical or col_chosen in time_features:
        # If the selected column is continuous, show the range slider
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Input(id='time-filter-input', type='text', value='', placeholder="Enter value", style={'margin-bottom': '15px'})
        ]
    else:
        # If the selected column is categorical, show the dropdown
        unique_values = time_df[col_chosen].unique().tolist()
        return [
            html.Label("Filter the Displayed Feature:", className='text-primary', style={'margin-bottom': '10px'}),
            dbc.Select(
                id='time-filter-input',
                options=[{'label': val, 'value': val} for val in unique_values],
                value=unique_values[0] if unique_values else None,  # Default to the first value if exists
                style={'margin-bottom': '15px'}
            )
        ]

In [1704]:
@callback(
    Output('graph-time-diversity-pairplot', 'figure'),
    Output('profiling-time-heatmap', 'figure'),
    Output('time-apply-filter-button', 'n_clicks'),
    Output('time-clear-filter-button', 'n_clicks'),
    Output('time-filter-input', 'value'),
    Input('time-filter-column', 'value'),
    Input('time-filter-condition', 'value'),
    Input('time-filter-input', 'value'),
    Input('time-apply-filter-button', 'n_clicks'),
    Input('time-clear-filter-button', 'n_clicks')
)

def update_time_graph(filter_column, filter_condition, filter_value, apply_clicks, clear_clicks):
    # Apply the filter to the DataFrame
    filtered_df = time_df.copy()
    
    # If the 'Clear Filter' button was clicked, reset filter
    if clear_clicks > 0:
        filter_value = ''
    
    elif filter_value and filter_column and apply_clicks > 0:
        if filter_condition == 'is':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') == str(filter_value)]
        elif filter_condition == 'is not':
            filtered_df = filtered_df[filtered_df[filter_column].astype('str') != str(filter_value)]
        elif filter_condition == 'greater than':
            filtered_df = filtered_df[filtered_df[filter_column] > float(filter_value)]

        elif filter_condition == 'less than':
            filtered_df = filtered_df[filtered_df[filter_column] < float(filter_value)]

        elif filter_condition == 'equal to':
            filtered_df = filtered_df[filtered_df[filter_column] == float(filter_value)]

        elif filter_condition == 'greater than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] >= float(filter_value)]

        elif filter_condition == 'less than or equal to':
            filtered_df = filtered_df[filtered_df[filter_column] <= float(filter_value)]

    filtered_df['labels'] = filtered_df['labels'].astype(str)
    
    combinations = list(itertools.combinations(time_features, 2))
    n_combinations = len(combinations)

    # Define grid layout
    n_cols = 3
    n_rows = (n_combinations + n_cols - 1) // n_cols

    used_labels = set()

    # Create subplots
    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=[f"{x} vs {y}" for x, y in combinations]
    )

    # Add scatter plots using Plotly Express
    row = col = 1
    for feature_x, feature_y in combinations:
        scatter_fig = px.scatter(
            filtered_df,
            x=feature_x,
            y=feature_y,
            color='labels',
            opacity=0.5,
            color_discrete_sequence=px.colors.qualitative.Vivid
        )

        # Add traces from Plotly Express figure to the subplot
        for trace in scatter_fig.data:
            if trace.name in used_labels:
                trace.showlegend = False
            else:
                used_labels.add(trace.name)

            fig.add_trace(trace, row=row, col=col)
        
        # Update row and column indices
        col += 1
        if col > n_cols:
            col = 1
            row += 1

    # Update layout
    fig.update_layout(
        height=n_rows * 400,
        width=1300,
        showlegend=True,
        legend_title_text='Clusters'
    )

    hm = px.imshow(
        filtered_df[time_features + ['labels']].groupby('labels').mean().T,
        text_auto=".2f",
        color_continuous_scale="rdylgn",
        labels={"x": "Cluster Labels", "y": "Features", "color": "Mean Value"}
    )

    hm.update_layout(
        xaxis_title="Cluster Labels",
        yaxis_title="Features",
    )

    return fig, hm, 0, 0, filter_value

## Page Navigation

In [1705]:
@callback(
    [Output('page-content', 'children'),
    Output('home-button', 'active'),
    Output('single-feature-button', 'active'),
    Output('pairplot-button', 'active'),
    Output('spending-diversity-button', 'active'),
    Output('geography-button', 'active'),
    Output('cuisines-button', 'active'),
    Output('time-button', 'active')],
    [Input('url', 'pathname')]
)

def display_page(pathname):
    if pathname == '/single-feature':
        return single_feature, False, True, False, False, False, False, False
    elif pathname == '/pairplot':
        return pairplot, False, False, True, False, False, False, False
    elif pathname == '/spending':
        return spending, False, False, False, True, False, False, False
    elif pathname == '/geography':
        return geography, False, False, False, False, True, False, False
    elif pathname == '/cuisines':
        return cuisines, False, False, False, False, False, True, False
    elif pathname == '/time':
        return time, False, False, False, False, False, False, True
    else:
        return home_layout, True, False, False, False, False, False, False

# Dashboard

In [1706]:
# Run the app
if __name__ == '__main__':
    app.run(debug=True, port=8052)