In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
from sklearn.preprocessing import LabelEncoder
import plotly.io as pio
import plotly.express as px
pio.renderers.default = "notebook"
## Read data
df = pd.read_csv("cleaned_data.csv")

##DATA HANDLING
#Encoder of credit score
label_encoder = LabelEncoder()
custom_encoding = {'Good': 3, 'Standard': 2, 'Poor': 1}
df['Credit_Score_Numeric'] = df['Credit_Score'].map(custom_encoding)

#Making Agen groups bins
age_bins = [13, 18, 30, 40, 50, 60]
age_labels = ["<18",'18-29', '30-39', '40-49', '50-59',]
df['Age_Group'] = pd.cut(df['Age'], bins=age_bins, labels=age_labels, right=False)

#Making bins for annual income
income_bins =[0,21000,35000,49000,73000,100000,190000]
income_labels = ['0k-21k', '22k-34k', '35k-48k', '49k-72k', '73k-100k', '100k+']
df['Income_Group'] = pd.cut(df['Annual_Income'], bins=income_bins, labels=income_labels, right=False)


# #Making bins for number of delayed payments
delayed_payment_bins = [0, 1, 3, 9, 13, 16, 19, 29]
delayed_payment_labels = ['0', '1-2', '3-8', '9-12', '13-15', '16-18', '19+']
df['delayed_payment_group'] = pd.cut(df['Num_of_Delayed_Payment'], bins=delayed_payment_bins, labels=delayed_payment_labels, right=False)

##Start dropdown menu
app = dash.Dash(__name__)

app.layout = html.Div([
        ## Dropdown personal values
        dcc.Dropdown(id='personal_slct',
            options=[
                     {'label': 'Age', 'value': 'Age_Group'},
                     {'label': 'Occupation', 'value': 'Occupation'},
                     {'label': 'Income group', 'value': 'Income_Group'},
            ],
            optionHeight=35,                   
            value='Age_Group',                   
            disabled=False,               
            multi=False,                  
            searchable=True,                  
            search_value='',                   
            placeholder='Personal variable',     
            clearable=True,                   
            style={'width':"100%"},             
          
            ),                                  

## behavioural dropdown
        dcc.Dropdown(id='behavioural_slct',
            options=[
                     {'label': 'Number of delayed payments', 'value': 'delayed_payment_group'},
                     {'label': 'Spending level', 'value': 'Behaviour_Spending_Level'},
                     {'label': 'Value size of payments', 'value': 'Behaviour_Value_Size'},
            ],
            optionHeight=35,                    
            value='delayed_payment_group',                   
            disabled=False,                    
            multi=False,                       
            searchable=True,              
            search_value='',                    
            placeholder='Behavioural variable',     
            clearable=True,                    
            style={'width':"100%"},            
            
            ), 
            dcc.Graph(id='Sunburst_Graph')
            ])
 
#---------------------------------------------------------------
# Connecting the Dropdown values to the graph
@app.callback(
     Output(component_id='Sunburst_Graph',    component_property = 'figure'),
    [
        Input(component_id='personal_slct',     component_property='value'),
        Input(component_id='behavioural_slct',  component_property='value'),
    ]
)
##sunburst graph
def update_graph(personal_slct, behavioural_slct):
    ## clean the data set and remove missing values
    df_clean = df.dropna(subset=["Credit_Score"])
    df_clean = df_clean.dropna(subset=[behavioural_slct])
    df_clean = df_clean.dropna(subset=[personal_slct])
    
    # Create new columns that combine the column name and its value
    # Replace underscores with spaces in the column names
    df_clean[personal_slct.replace('_', ' ') + '_new'] = personal_slct.replace('_', ' ') + ': ' + df_clean[personal_slct].astype(str)
    df_clean[behavioural_slct.replace('_', ' ') + '_new'] = behavioural_slct.replace('_', ' ') + ': ' + df_clean[behavioural_slct].astype(str)
    df_clean['Credit Score_new'] = 'Credit Score: ' + df_clean['Credit_Score'].astype(str)
    
    ##create sunburst
    fig = px.sunburst(
        data_frame=df_clean,
        path=[personal_slct.replace('_', ' ') + '_new', behavioural_slct.replace('_', ' ') + '_new', 'Credit Score_new'],  # Root, branches, leaves
        color="Credit_Score_Numeric",
        color_continuous_scale=["red","yellow", "green"],
        range_color=[1,3],
        maxdepth= 2, 
        hover_data=[personal_slct.replace('_', ' ') + '_new', behavioural_slct.replace('_', ' ') + '_new', 'Credit Score_new']  # Add "Credit_Score" to hover_data
    )
    ## choose what text on traces
    fig.update_traces(textinfo='label+percent parent', hovertemplate='%{label}')

    ## Layout titles  
    fig.update_layout(title_text = "Sunburst graph of personal groups and behavioral variables selected in the dropdown.",coloraxis_colorbar_title='Credit Score', margin=dict(t=0, l=0, r=0, b=0))

    return fig
        

#---------------------------------------------------------------
if __name__ == '__main__':
    app.run_server(debug=True, port=5051)