In [11]:
import pandas as pd
import plotly.express as px

# Load the data
df = pd.read_csv('cleaned_data.csv')

# Define the aggregation functions for each column
aggregations = {col: 'mean' if df[col].dtype == 'float64' else 'first' for col in df.columns.drop('Customer_ID')}

# Group by 'Customer_ID' and aggregate
df_grouped= df.groupby('Customer_ID').agg(aggregations).reset_index()

# Count the number of occurrences of each value in the 'credit_score' column
credit_score_counts = df_grouped['Credit_Score'].value_counts()

print(credit_score_counts)

Standard    6943
Poor        3582
Good        1975
Name: Credit_Score, dtype: int64


In [9]:

# The 3 credit score categories
credit_categories = ['Poor', 'Standard', 'Good']

# Convert the 'credit_score' column to a categorical type
credit_score_categorical = pd.Categorical(df_grouped['Credit_Score'], categories=credit_categories, ordered=True)

# Add a new column that contains the integer codes of the 'credit_score' column
df_grouped['credit_score_mapped'] = credit_score_categorical.codes

In [10]:
# Create a parallel coordinates plot
fig = px.parallel_coordinates(
    df_grouped,
    color='credit_score_mapped',
    dimensions=['Amount_invested_monthly', 'Num_of_Delayed_Payment', 'Credit_Score'],
    color_continuous_scale=px.colors.diverging.Tealrose,
    color_continuous_midpoint=2,
    range_color=[1, 3], # Set the color map to go from 1 to 3
    labels={'credit_score_mapped': 'Credit Score'}
)  

# Update color bar ticks to category labels
fig.update_coloraxes(colorbar=dict(
    tickvals=[1, 2, 3],
    ticktext=credit_categories
))

# Show the plot
fig.show()