In [16]:
import pandas as pd
import plotly.express as px

# Load the data
df = pd.read_csv('cleaned_data.csv')

# Define the aggregation functions for each column
aggregations = {col: 'mean' if df[col].dtype == 'float64' else 'first' for col in df.columns.drop('Customer_ID')}

# Group by 'Customer_ID' and aggregate
df_grouped= df.groupby('Customer_ID').agg(aggregations).reset_index()


In [17]:

# The 3 credit score categories
credit_categories = ['Poor', 'Standard', 'Good']

# Convert the 'credit_score' column to a categorical type
credit_score_categorical = pd.Categorical(df_grouped['Credit_Score'], categories=credit_categories, ordered=True)

# Add a new column that contains the integer codes of the 'credit_score' column
df_grouped['credit_score_mapped'] = credit_score_categorical.codes

df_grouped_cleaned = df_grouped.dropna()

df_grouped.head()

Unnamed: 0,Customer_ID,ID,Month,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,...,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Behaviour_Spending_Level,Behaviour_Value_Size,Monthly_Balance,Credit_Score,credit_score_mapped
0,CUS_0x1000,0x1628a,January,17,Lawyer,30625.94,2706.16,6,5,27,...,127.5,Yes,42.94,160.591667,Low_spent_Large_value_payments,Low_spent,Large_value_payments,336.249167,Standard,1
1,CUS_0x1009,0x66a2,January,25,Mechanic,52312.68,4250.39,6,5,17,...,370.5,Not available,108.37,169.694545,Low_spent_Medium_value_payments,Low_spent,Medium_value_payments,413.219167,Standard,1
2,CUS_0x100b,0x1ef6,January,18,Media_Manager,113781.39,9549.78,1,4,1,...,188.5,No,0.0,437.2325,Low_spent_Medium_value_payments,Low_spent,Medium_value_payments,782.786667,Poor,0
3,CUS_0x1011,0x17646,January,43,Doctor,58918.47,5208.87,3,3,17,...,188.5,Yes,123.43,283.991818,High_spent_Large_value_payments,High_spent,Large_value_payments,362.038333,Standard,1
4,CUS_0x1013,0x243ea,January,43,Mechanic,98620.98,7962.42,3,3,6,...,212.5,No,228.02,378.729,High_spent_Medium_value_payments,High_spent,Medium_value_payments,496.908333,Standard,1


In [18]:
from sklearn.model_selection import StratifiedShuffleSplit

# Define the stratified shuffle split
sss = StratifiedShuffleSplit(n_splits=100, test_size=0.05, random_state=0)

# Get the indices for the rows to keep
for _, index in sss.split(df_grouped, df_grouped['Credit_Score']):
    df_sample = df_grouped.loc[index]


In [19]:
# Count the number of occurrences of each value in the 'credit_score' column
credit_score_counts = df_sample['Credit_Score'].value_counts()

print(credit_score_counts)

# Get an array of all the column names
column_names = df_sample.columns.values

print(column_names)

Credit_Score
Standard    347
Poor        179
Good         99
Name: count, dtype: int64
['Customer_ID' 'ID' 'Month' 'Age' 'Occupation' 'Annual_Income'
 'Monthly_Inhand_Salary' 'Num_Bank_Accounts' 'Num_Credit_Card'
 'Interest_Rate' 'Num_of_Loan' 'Type_of_Loan' 'Delay_from_due_date'
 'Num_of_Delayed_Payment' 'Changed_Credit_Limit' 'Num_Credit_Inquiries'
 'Credit_Mix' 'Outstanding_Debt' 'Credit_Utilization_Ratio'
 'Credit_History_Age' 'Payment_of_Min_Amount' 'Total_EMI_per_month'
 'Amount_invested_monthly' 'Payment_Behaviour' 'Behaviour_Spending_Level'
 'Behaviour_Value_Size' 'Monthly_Balance' 'Credit_Score'
 'credit_score_mapped']


In [20]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.Checklist(
        id='colorblind-checkbox',
        options=[{'label': 'Use colorblind-friendly color scale', 'value': 'CB'}],
        value=[]
    ),
    dcc.Graph(id='parallel-coordinates-plot')
])

# Define a callback to update the plot
@app.callback(
    Output('parallel-coordinates-plot', 'figure'),
    [Input('colorblind-checkbox', 'value')]
)
def update_plot(colorblind_friendly):
    color_scale = px.colors.sequential.Cividis if 'CB' in colorblind_friendly else px.colors.diverging.Tealrose

    fig = px.parallel_coordinates(
        df_sample,
        color='credit_score_mapped',
        dimensions=['Annual_Income', 'Amount_invested_monthly', 'Credit_Utilization_Ratio', 'Num_of_Delayed_Payment', 'Num_of_Delayed_Payment', 'Outstanding_Debt', 'credit_score_mapped' ],
        color_continuous_scale=color_scale,
        color_continuous_midpoint=1,
        range_color=[0, 2], # Set the color map to go from 0 to 2
        labels={'credit_score_mapped': 'Credit Score'}
    )  

    # Update color bar ticks to category labels
    fig.update_coloraxes(colorbar=dict(
        tickvals=[0, 1, 2],
        ticktext=credit_categories
    ))

    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)