In [2]:
import pandas as pd
import dash
from dash import html, dcc, Input, Output
import plotly.express as px

data = pd.read_csv('cleaned_data.csv')

In [5]:

# Define the age and income groups
age_groups = {
    '0-17': (0, 17),
    '18-24': (18, 24),
    '25-34': (25, 34),
    '35-44': (35, 44),
    '45-54': (45, 54),
    '55+': (55, data['Age'].max()),
}

income_groups = {
    '0-20k': (0, 20000),
    '20k-40k': (20000, 40000),
    '40k-60k': (40000, 60000),
    '60k-80k': (60000, 80000),    
    '80k-100k': (80000, 100000),
    '100k-120k': (100000, 120000),
    '120k-140k': (120000, 140000),
    '140k-160k': (140000, 160000),
    '160k+': (160000, data['Annual_Income'].max()),
}

occupation_groups = {
    'Lawyer': 'Lawyer', 
    'Mechanic': 'Mechanic', 
    'Media_Manager': 'Media_Manager', 
    'Doctor': 'Doctor', 
    'Journalist': 'Journalist',
    'Accountant': 'Accountant', 
    'Manager': 'Manager', 
    'Entrepreneur': 'Entrepreneur', 
    'Scientist': 'Scientist', 
    'Architect': 'Architect',
    'Teacher': 'Teacher', 
    'Engineer': 'Engineer', 
    'Writer': 'Writer', 
    'Developer': 'Developer', 
    'Musician': 'Musician'
}

x_labels ={'Payment_of_Min_Amount': 'Payment of Minimum Amount',
            'Behaviour_Spending_Level': 'Behaviour Spending Level',
            'Payment_Behaviour': 'Payment Behaviour',
            'Num_Credit_Card': 'Number of creditcards',
            'Num_Bank_Accounts': 'Number of bank accounts'}

## Bar charts

In [24]:
# Initialize the Dash app
app = dash.Dash(__name__)


app.layout = html.Div([
    html.Div([
        "Age Range:",
        dcc.RangeSlider(
            id='age-slider',
            min=data['Age'].min(),
            max=data['Age'].max(),
            step=1,
            value=[data['Age'].min(), data['Age'].max()],
            marks={i: str(i) for i in range(15, 55 + 1, 5)}
        ),
    ], style={'padding': 20}),

    html.Div([
        "Income Range:",
        dcc.RangeSlider(
            id='income-slider',
            min=data['Annual_Income'].min(),
            max=data['Annual_Income'].max(),
            step=1000,
            value=[data['Annual_Income'].min(), data['Annual_Income'].max()],
            marks={i: '${:,.0f}'.format(i) for i in range(15000, 180000 + 1, 10000)}
        ),
    ], style={'padding': 20}),

    html.Div([
        "Occupation:",
        dcc.Checklist(
            id='occupation-checklist',
            options=[{'label': i, 'value': i} for i in data['Occupation'].unique()],
            value=data['Occupation'].unique().tolist(),
            inline=True
        ),
    ], style={'padding': 20}),

    dcc.Dropdown(
        id='behavior-select',
        options=[
            {'label': 'Payment of Minimum Amount', 'value': 'Payment_of_Min_Amount'},
            {'label': 'Behaviour Spending Level', 'value': 'Behaviour_Spending_Level'},
            {'label': 'Payment Behaviour', 'value': 'Payment_Behaviour'},
            {'label': 'Number of creditcards', 'value': 'Num_Credit_Card'},
            {'label': 'Number of bank accounts', 'value': 'Num_Bank_Accounts'}
        ],
        value='Num_Credit_Card'
    ),
    dcc.Graph(id='graph')
])

@app.callback(
    Output('graph', 'figure'),
    [Input('age-slider', 'value'),
     Input('income-slider', 'value'),
     Input('occupation-checklist', 'value'),
     Input('behavior-select', 'value'),]
)

# Callback to update the bar plot based on the selected options
def update_graph(age_range, income_range, occupations, behavior):
    # Filter data based on inputs
    filtered_data = data[
        (data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1]) &
        (data['Annual_Income'] >= income_range[0]) & (data['Annual_Income'] <= income_range[1]) &
        (data['Occupation'].isin(occupations))
    ]

    # Define custom colors for clarity
    custom_colors = ['#2ca02c', '#d62728'] # Green for Good, Blue for Standard, Red for Poor

    filtered_data = filtered_data[filtered_data[behavior] != "Not available"]
    filtered_data['Good_Standard'] = filtered_data['Credit_Score'].apply(lambda x: 'Good_Standard' if x in ['Good', 'Standard'] else 'Poor')
    counts = filtered_data.groupby([behavior, 'Good_Standard']).size().reset_index(name='counts')

    # Pivot the data to have 'Good_Standard' and 'Poor' side by side
    pivot_data = counts.pivot(index=behavior, columns='Good_Standard', values='counts').fillna(0)

    # Normalize the counts to show proportions
    pivot_data['total'] = pivot_data.sum(axis=1)
    for col in pivot_data.columns[:-1]:  # Exclude the total column
        pivot_data[col] = pivot_data[col] / pivot_data['total']

    # Reset index to make 'behavior' a column again for plotting
    pivot_data.reset_index(inplace=True)

    # Plotting with normalized counts
    fig = px.bar(pivot_data, x=behavior, y=['Good_Standard', 'Poor'], title=f"Good+Standard to Poor Ratio by {x_labels[behavior]}",
                    labels={'value': 'Normalized Credit Score', behavior:  x_labels[behavior]}, 
                    color_discrete_sequence=custom_colors)

    fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


## Boxplots

In [32]:
# Initialize the Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    html.Div([
        "Age Range:",
        dcc.RangeSlider(
            id='age-slider',
            min=data['Age'].min(),
            max=data['Age'].max(),
            step=1,
            value=[data['Age'].min(), data['Age'].max()],
            marks={i: str(i) for i in range(15, 55 + 1, 5)}
        ),
    ], style={'padding': 20}),

    html.Div([
        "Income Range:",
        dcc.RangeSlider(
            id='income-slider',
            min=data['Annual_Income'].min(),
            max=data['Annual_Income'].max(),
            step=1000,
            value=[data['Annual_Income'].min(), data['Annual_Income'].max()],
            marks={i: '${:,.0f}'.format(i) for i in range(15000, 180000 + 1, 10000)}
        ),
    ], style={'padding': 20}),

    html.Div([
        "Occupation:",
        dcc.Checklist(
            id='occupation-checklist',
            options=[{'label': i, 'value': i} for i in data['Occupation'].unique()],
            value=data['Occupation'].unique().tolist(),
            inline=True
        ),
    ], style={'padding': 20}),
    dcc.Dropdown(
        id='behavior-select',
        options=[
            {'label': 'Payment Behaviour', 'value': 'Payment_Behaviour'},
            {'label': 'Number of creditcards', 'value': 'Num_Credit_Card'},
            {'label': 'Number of bank accounts', 'value': 'Num_Bank_Accounts'}
        ],
        value='Num_Credit_Card'  # Default value
    ),
    dcc.Graph(id='graph')
])

@app.callback(
    Output('graph', 'figure'),
    [Input('age-slider', 'value'),
     Input('income-slider', 'value'),
     Input('occupation-checklist', 'value'),
     Input('behavior-select', 'value'),]
)


# Callback to update the bar plot based on the selected options
def update_graph(age_range, income_range, occupations, behavior):
    # Filter data based on inputs
    filtered_data = data[
        (data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1]) &
        (data['Annual_Income'] >= income_range[0]) & (data['Annual_Income'] <= income_range[1]) &
        (data['Occupation'].isin(occupations))
    ]

    # Define custom colors for clarity
    custom_colors = ['#2ca02c', '#fa9c1b', '#d62728'] # Green for Good, Blue for Standard, Red for Poor

    # Specify the order of the categories
    category_order = {'Credit_Score': ['Good', 'Standard', 'Poor']}

    filtered_data = filtered_data[filtered_data[behavior] != "Not available"]
    filtered_data['Payment_Behaviour'] = filtered_data['Payment_Behaviour'].replace({'Low_spent_Small_value_payments': 0, 'Low_spent_Medium_value_payments': 1, 'Low_spent_Large_value_payments': 2, 'High_spent_Small_value_payments': 3, 'High_spent_Medium_value_payments': 4, 'High_spent_Large_value_payments': 5})
    fig = px.box(filtered_data, x=behavior, y='Credit_Score', color='Credit_Score', color_discrete_sequence=custom_colors, category_orders=category_order)
    fig.update_layout(
    yaxis_title='Credit Score',
    xaxis_title=x_labels[behavior]
)
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
