In [4]:
import pandas as pd
import dash
from dash import html, dcc, Input, Output
import plotly.express as px

data = pd.read_csv('cleaned_data.csv')

In [5]:

# Define the age and income groups
age_groups = {
    '0-17': (0, 17),
    '18-24': (18, 24),
    '25-34': (25, 34),
    '35-44': (35, 44),
    '45-54': (45, 54),
    '55+': (55, data['Age'].max()),
}

income_groups = {
    '0-20k': (0, 20000),
    '20k-40k': (20000, 40000),
    '40k-60k': (40000, 60000),
    '60k-80k': (60000, 80000),    
    '80k-100k': (80000, 100000),
    '100k-120k': (100000, 120000),
    '120k-140k': (120000, 140000),
    '140k-160k': (140000, 160000),
    '160k+': (160000, data['Annual_Income'].max()),
}

occupation_groups = {
    'Lawyer': 'Lawyer', 
    'Mechanic': 'Mechanic', 
    'Media_Manager': 'Media_Manager', 
    'Doctor': 'Doctor', 
    'Journalist': 'Journalist',
    'Accountant': 'Accountant', 
    'Manager': 'Manager', 
    'Entrepreneur': 'Entrepreneur', 
    'Scientist': 'Scientist', 
    'Architect': 'Architect',
    'Teacher': 'Teacher', 
    'Engineer': 'Engineer', 
    'Writer': 'Writer', 
    'Developer': 'Developer', 
    'Musician': 'Musician'
}

x_labels ={'Payment_of_Min_Amount': 'Payment of Minimum Amount',
            'Behaviour_Spending_Level': 'Behaviour Spending Level',
            'Payment_Behaviour': 'Payment Behaviour',
            'Num_Credit_Card': 'Number of creditcards',
            'Num_Bank_Accounts': 'Number of bank accounts'}

## Plotly dash graphs

In [7]:
# Initialize the Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id='subgroup-select',
        options=[
            {'label': 'Age', 'value': 'Age'},
            {'label': 'Income', 'value': 'Income'},
            {'label': 'Occupation', 'value': 'Occupation'}
            # Add other subgroups as needed
        ],
        value='Age'  # Default value
    ),
    dcc.Dropdown(
        id='segment-select',
        # Options will be set based on the callback
    ),
    dcc.Dropdown(
        id='behavior-select',
        options=[
            {'label': 'Payment of Minimum Amount', 'value': 'Payment_of_Min_Amount'},
            {'label': 'Behaviour Spending Level', 'value': 'Behaviour_Spending_Level'},
            {'label': 'Payment Behaviour', 'value': 'Payment_Behaviour'},
            {'label': 'Number of creditcards', 'value': 'Num_Credit_Card'},
            {'label': 'Number of bank accounts', 'value': 'Num_Bank_Accounts'}
            # Add other behaviors as needed
        ],
        value='Payment_of_Min_Amount'  # Default value
    ),
    dcc.Dropdown(
        id='graph-type-select',
        options=[
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Box Plot', 'value': 'box'},
            {'label': 'Heatmap', 'value': 'heatmap'}
        ],
        value='bar'  # Default graph type
    ),
    dcc.Graph(id='graph')
])

@app.callback(
    Output('segment-select', 'options'),
    Input('subgroup-select', 'value')
)

# Callback to set segment options based on the selected subgroup
def set_segment_options(selected_subgroup):
    if selected_subgroup == 'Age':
        return [{'label': label, 'value': label} for label in age_groups.keys()]
    elif selected_subgroup == 'Income':
        return [{'label': label, 'value': label} for label in income_groups.keys()]
    elif selected_subgroup == 'Occupation':
        return [{'label': label, 'value': label} for label in occupation_groups.keys()]
    else:
        return []


@app.callback(
    Output('graph', 'figure'),
    [Input('subgroup-select', 'value'),
     Input('segment-select', 'value'),
     Input('behavior-select', 'value'),
     Input('graph-type-select', 'value')]
)

# Callback to update the bar plot based on the selected options
def update_graph(subgroup, segment, behavior, graph_type):
    if graph_type == 'heatmap':
        # Binning and then converting intervals to strings for JSON serialization
        if subgroup == 'Age':
            bins = pd.IntervalIndex.from_tuples(list(age_groups.values()))
            age_bins = pd.cut(data['Age'], bins=bins)
            data['Age_Bin'] = age_bins.apply(lambda x: f'{x.left}-{x.right}')
            pivot_column = 'Age_Bin'
        elif subgroup == 'Income':
            bins = pd.IntervalIndex.from_tuples(list(income_groups.values()))
            income_bins = pd.cut(data['Annual_Income'], bins=bins)
            data['Income_Bin'] = income_bins.apply(lambda x: f'{x.left}-{x.right}')
            pivot_column = 'Income_Bin'
        elif subgroup == 'Occupation':
            pivot_column = 'Occupation'
        
        # Group by the pivot column and credit score to get the count
        heatmap_data = data.groupby([pivot_column, 'Credit_Score']).size().unstack(fill_value=0)

        # heatmap_data = heatmap_data[]

        # Ensure all expected credit score categories are present
        heatmap_data = heatmap_data.reindex(['Good', 'Standard', 'Poor'], axis=1, fill_value=0)

        # Use imshow from plotly express to generate the heatmap
        fig = px.imshow(heatmap_data, aspect='auto', 
                        color_continuous_scale='Viridis')  # Or any other color scale you prefer

        # Update layout to have more meaningful axis titles
        fig.update_layout(
            xaxis_title="Credit Score",
            yaxis_title=subgroup,
            yaxis=dict(type='category'),
            xaxis=dict(type='category')
        )
        fig.update_xaxes(side="bottom")
        return fig

    if subgroup == 'Age':
        age_range = age_groups.get(segment, (0, 0))
        filtered_data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])]
    elif subgroup == 'Income':
        income_range = income_groups.get(segment, (0, 0))
        filtered_data = data[(data['Annual_Income'] >= income_range[0]) & (data['Annual_Income'] <= income_range[1])]
    elif subgroup == 'Occupation':
        occupation = occupation_groups.get(segment)
        filtered_data = data[(data['Occupation'] == occupation)]
    else:
        filtered_data = data

    # Define custom colors for clarity
    custom_colors = ['#2ca02c', '#fa9c1b', '#d62728'] # Green for Good, Blue for Standard, Red for Poor

    # Specify the order of the categories
    category_order = {'Credit_Score': ['Good', 'Standard', 'Poor']}

    if graph_type == 'bar':
        filtered_data = filtered_data[filtered_data[behavior] != "Not available"]
        fig = px.histogram(filtered_data, x=behavior, color='Credit_Score', color_discrete_sequence=custom_colors, category_orders=category_order)
    elif graph_type == 'box':
        fig = px.box(filtered_data, x=behavior, y='Credit_Score', color='Credit_Score', color_discrete_sequence=custom_colors, category_orders=category_order)
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8051)

OSError: Address 'http://127.0.0.1:8050' already in use.
    Try passing a different port to run_server.

In [None]:
# Initialize the Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id='subgroup-select',
        options=[
            {'label': 'Age', 'value': 'Age'},
            {'label': 'Income', 'value': 'Income'},
            {'label': 'Occupation', 'value': 'Occupation'}
            # Add other subgroups as needed
        ],
        value='Age'  # Default value
    ),
    dcc.Dropdown(
        id='segment-select',
        # Options will be set based on the callback
    ),
    dcc.Dropdown(
        id='behavior-select',
        options=[
            {'label': 'Payment of Minimum Amount', 'value': 'Payment_of_Min_Amount'},
            {'label': 'Behaviour Spending Level', 'value': 'Behaviour_Spending_Level'},
            {'label': 'Payment Behaviour', 'value': 'Payment_Behaviour'},
            {'label': 'Number of creditcards', 'value': 'Num_Credit_Card'},
            {'label': 'Number of bank accounts', 'value': 'Num_Bank_Accounts'}
            # Add other behaviors as needed
        ],
        value='Payment_of_Min_Amount'  # Default value
    ),
    dcc.Dropdown(
        id='graph-type-select',
        options=[
            {'label': 'Bar Chart', 'value': 'bar'},
            {'label': 'Box Plot', 'value': 'box'},
            {'label': 'Heatmap', 'value': 'heatmap'},
            {'label': 'Ratio Line Chart', 'value': 'line'},          
        ],
        value='bar'  # Default graph type
    ),
    dcc.Graph(id='graph')
])

@app.callback(
    Output('segment-select', 'options'),
    Input('subgroup-select', 'value')
)

# Callback to set segment options based on the selected subgroup
def set_segment_options(selected_subgroup):
    if selected_subgroup == 'Age':
        return [{'label': label, 'value': label} for label in age_groups.keys()]
    elif selected_subgroup == 'Income':
        return [{'label': label, 'value': label} for label in income_groups.keys()]
    elif selected_subgroup == 'Occupation':
        return [{'label': label, 'value': label} for label in occupation_groups.keys()]
    else:
        return []


@app.callback(
    Output('graph', 'figure'),
    [Input('subgroup-select', 'value'),
     Input('segment-select', 'value'),
     Input('behavior-select', 'value'),
     Input('graph-type-select', 'value')]
)

# Callback to update the bar plot based on the selected options
def update_graph(subgroup, segment, behavior, graph_type):
    if graph_type == 'heatmap':
        # Binning and then converting intervals to strings for JSON serialization
        if subgroup == 'Age':
            bins = pd.IntervalIndex.from_tuples(list(age_groups.values()))
            age_bins = pd.cut(data['Age'], bins=bins)
            data['Age_Bin'] = age_bins.apply(lambda x: f'{x.left}-{x.right}')
            pivot_column = 'Age_Bin'
        elif subgroup == 'Income':
            bins = pd.IntervalIndex.from_tuples(list(income_groups.values()))
            income_bins = pd.cut(data['Annual_Income'], bins=bins)
            data['Income_Bin'] = income_bins.apply(lambda x: f'{x.left}-{x.right}')
            pivot_column = 'Income_Bin'
        elif subgroup == 'Occupation':
            pivot_column = 'Occupation'
        
        # Group by the pivot column and credit score to get the count
        heatmap_data = data.groupby([pivot_column, 'Credit_Score']).size().unstack(fill_value=0)

        # heatmap_data = heatmap_data[]

        # Ensure all expected credit score categories are present
        heatmap_data = heatmap_data.reindex(['Good', 'Standard', 'Poor'], axis=1, fill_value=0)

        # Use imshow from plotly express to generate the heatmap
        fig = px.imshow(heatmap_data, aspect='auto', 
                        color_continuous_scale='Viridis')  # Or any other color scale you prefer

        # Update layout to have more meaningful axis titles
        fig.update_layout(
            xaxis_title="Credit Score",
            yaxis_title=subgroup,
            yaxis=dict(type='category'),
            xaxis=dict(type='category')
        )
        fig.update_xaxes(side="bottom")
        return fig

    if subgroup == 'Age':
        age_range = age_groups.get(segment, (0, 0))
        filtered_data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])]
    elif subgroup == 'Income':
        income_range = income_groups.get(segment, (0, 0))
        filtered_data = data[(data['Annual_Income'] >= income_range[0]) & (data['Annual_Income'] <= income_range[1])]
    elif subgroup == 'Occupation':
        occupation = occupation_groups.get(segment)
        filtered_data = data[(data['Occupation'] == occupation)]
    else:
        filtered_data = data

    # Define custom colors for clarity
    custom_colors = ['#2ca02c', '#fa9c1b', '#d62728'] # Green for Good, Blue for Standard, Red for Poor

    # Specify the order of the categories
    category_order = {'Credit_Score': ['Good', 'Standard', 'Poor']}

    if graph_type == 'bar':
        filtered_data = filtered_data[filtered_data[behavior] != "Not available"]
        fig = px.histogram(filtered_data, x=behavior, color='Credit_Score', color_discrete_sequence=custom_colors, category_orders=category_order)
    elif graph_type == 'box':
        fig = px.box(filtered_data, x=behavior, y='Credit_Score', color='Credit_Score', color_discrete_sequence=custom_colors, category_orders=category_order)
    elif graph_type == 'line':
        unique_vals = filtered_data[behavior].unique()
        unique_vals.sort()
        y_vals = []
        for value in unique_vals:
            value_data = filtered_data[filtered_data[behavior] == value]
            good_count = value_data[value_data["Credit_Score"] == 'Good'].count() + value_data[value_data["Credit_Score"] == 'Standard'].count()
            ratio = good_count / (value_data.count())
            y_vals.append(ratio[behavior])

        x = unique_vals
        y = y_vals
        fig = px.line( x = x , y = y, range_y=[0,0.8], title = f'Ratios for {x_labels[behavior]}').update_layout(xaxis_title=x_labels[behavior], yaxis_title='Ratio')
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
