In [None]:
import pandas as pd
import plotly.express as px

# Load the data
df = pd.read_csv('cleaned_data.csv')

# Define the aggregation functions for each column
aggregations = {col: 'mean' if df[col].dtype == 'float64' else 'first' for col in df.columns.drop('Customer_ID')}

# Group by 'Customer_ID' and aggregate
df_grouped= df.groupby('Customer_ID').agg(aggregations).reset_index()


In [None]:

# The 3 credit score categories
credit_categories = ['Poor', 'Standard', 'Good']

# Convert the 'credit_score' column to a categorical type
credit_score_categorical = pd.Categorical(df_grouped['Credit_Score'], categories=credit_categories, ordered=True)

# Add a new column that contains the integer codes of the 'credit_score' column
df_grouped['credit_score_mapped'] = credit_score_categorical.codes

df_grouped_cleaned = df_grouped.dropna()

df_grouped.head()

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

# Define the stratified shuffle split
sss = StratifiedShuffleSplit(n_splits=100, test_size=0.05, random_state=0)

# Get the indices for the rows to keep
for _, index in sss.split(df_grouped, df_grouped['Credit_Score']):
    df_sample = df_grouped.loc[index]


In [None]:
# Count the number of occurrences of each value in the 'credit_score' column
credit_score_counts = df_sample['Credit_Score'].value_counts()

print(credit_score_counts)

# Get an array of all the column names
column_names = df_sample.columns.values

print(column_names)

In [None]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    dcc.Checklist(
        id='colorblind-checkbox',
        options=[{'label': 'Use colorblind-friendly color scale', 'value': 'CB'}],
        value=[]
    ),
    dcc.Graph(id='parallel-coordinates-plot')
])

# Define a callback to update the plot
@app.callback(
    Output('parallel-coordinates-plot', 'figure'),
    [Input('colorblind-checkbox', 'value')]
)
def update_plot(colorblind_friendly):
    color_scale = px.colors.sequential.Cividis if 'CB' in colorblind_friendly else px.colors.diverging.Tealrose

    fig = px.parallel_coordinates(
        df_sample,
        color='credit_score_mapped',
        dimensions=['Annual_Income', 'Amount_invested_monthly', 'Credit_Utilization_Ratio', 'Num_of_Delayed_Payment', 'Num_of_Delayed_Payment', 'Outstanding_Debt', 'credit_score_mapped' ],
        color_continuous_scale=color_scale,
        color_continuous_midpoint=1,
        range_color=[0, 2], # Set the color map to go from 0 to 2
        labels={'credit_score_mapped': 'Credit Score'}
    )  

    # Update color bar ticks to category labels
    fig.update_coloraxes(colorbar=dict(
        tickvals=[0, 1, 2],
        ticktext=credit_categories
    ))

    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)