In [1]:
import sys
!{sys.executable} -m pip install --upgrade nbformat>=5.0.0
sys.path.append('../')

import numpy as np
import seaborn as sb
import matplotlib as mb
import matplotlib.pyplot as plt
import plotly as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import sklearn as sk
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import feature_engine as fe
import sys
from assets.transformers import pipeline, scaling_transformer

In [2]:
bc = pd.read_csv("../assets/processed-bank-churners.csv")
bc

Unnamed: 0,Customer_Status,Age,Gender,Income_Level,Tenure_Months,Inactive_Months_In_Last_12,Credit_Limit,Total_Trans_Amount,Total_Trans_Count,Avg_Utilization_Ratio
0,Existing Customer,41,F,Unknown,30,1,5417.0,1768,56,0.35
1,Attrited Customer,60,M,$40K - $60K,47,1,3735.0,8193,66,0.32
2,Existing Customer,47,M,Less than $40K,34,1,8390.0,8316,92,0.00
3,Existing Customer,44,M,$60K - $80K,37,2,1682.0,3309,67,0.55
4,Existing Customer,47,F,Less than $40K,36,2,2500.0,4265,87,0.47
...,...,...,...,...,...,...,...,...,...,...
191,Existing Customer,31,M,$40K - $60K,22,1,9096.0,2318,54,0.00
192,Existing Customer,50,F,Less than $40K,38,3,2775.0,4608,84,0.53
193,Existing Customer,38,F,Less than $40K,29,3,3590.0,1472,37,0.54
194,Attrited Customer,39,F,$40K - $60K,31,3,3333.0,3348,50,0.56


In [3]:
I want an interactive visualisation with plotly that plots usage against (last column), against credit limit and finally against customer status

SyntaxError: invalid syntax (1990334511.py, line 1)

In [4]:
fig = px.scatter_3d(bc, 
                    x="Credit_Limit", 
                    y=bc.columns[-1],
                    z="Age",
                    color="Customer_Status",
                    size="Credit_Limit",
                    title="Interactive 3D: Credit Limit vs Usage vs Customer Status",
                    labels={
                        "Credit_Limit": "Credit Limit ($)",
                        bc.columns[-1]: "Usage",
                        "Age": "Customer Age"
                    },
                    color_discrete_map={
                        "Existing Customer": "blue",
                        "Attrited Customer": "red"
                    })

fig.update_layout(
    scene=dict(
        xaxis_title="Credit Limit ($)",
        yaxis_title="Usage",
        zaxis_title="Age"
    ),
    width=800,
    height=600
)

fig.show()

In [5]:
# Create subplots
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Usage vs Credit Limit', 'Usage Distribution by Status', 
                   'Credit Limit vs Customer Status', 'Usage vs Age'),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# Plot 1: Usage vs Credit Limit colored by Customer Status
for status in bc['Customer_Status'].unique():
    subset = bc[bc['Customer_Status'] == status]
    fig.add_trace(
        go.Scatter(x=subset['Credit_Limit'], 
                  y=subset[bc.columns[-1]],
                  mode='markers',
                  name=f'{status}',
                  marker=dict(size=6, opacity=0.7)),
        row=1, col=1
    )

# Plot 2: Usage distribution by Customer Status
for status in bc['Customer_Status'].unique():
    subset = bc[bc['Customer_Status'] == status]
    fig.add_trace(
        go.Histogram(x=subset[bc.columns[-1]], 
                    name=f'{status} Usage',
                    opacity=0.7),
        row=1, col=2
    )

# Plot 3: Credit Limit by Customer Status
fig.add_trace(
    go.Box(x=bc['Customer_Status'], 
           y=bc['Credit_Limit'],
           name='Credit Limit Distribution'),
    row=2, col=1
)

# Plot 4: Usage vs Age
fig.add_trace(
    go.Scatter(x=bc['Age'], 
              y=bc[bc.columns[-1]],
              mode='markers',
              marker=dict(color=bc['Credit_Limit'], 
                         colorscale='Viridis',
                         size=8,
                         opacity=0.7,
                         colorbar=dict(title="Credit Limit")),
              name='Usage vs Age'),
    row=2, col=2
)

fig.update_layout(height=800, title_text="Interactive Banking Customer Analysis Dashboard")


In [6]:
# Interactive scatter plot: Income Level vs Gender vs Customer Status
fig = px.scatter(bc, 
                x='Gender', 
                y='Income_Category',
                color='Customer_Status',
                size='Age',
                hover_data=['Age', 'Credit_Limit', 'Tenure_Months'],
                title='Income Level vs Gender by Customer Status',
                labels={
                    'Gender': 'Gender',
                    'Income_Category': 'Income Level'
                },
                color_discrete_map={
                    'Existing Customer': 'blue',
                    'Attrited Customer': 'red'
                })

fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGray')))
fig.update_layout(
    width=900,
    height=600,
    xaxis_title='Gender',
    yaxis_title='Income Level'
)

fig.show()

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['Customer_Status', 'Age', 'Gender', 'Income_Level', 'Tenure_Months', 'Inactive_Months_In_Last_12', 'Credit_Limit', 'Total_Trans_Amount', 'Total_Trans_Count', 'Avg_Utilization_Ratio'] but received: Income_Category

In [11]:
# First, create a copy with modified gender labels
bc_display = bc.copy()
bc_display['Gender_Short'] = bc_display['Gender'].map({'Female': 'F', 'Male': 'M'})

fig = px.sunburst(bc_display, 
                 path=['Customer_Status', 'Gender_Short', 'Income_Level'],
                 title='Customer Status → Gender → Income Level Breakdown',
                 color_discrete_sequence=px.colors.qualitative.Set3)

fig.update_traces(
    hovertemplate='<b>%{label}</b><br>' +
                  'Count: %{value}<br>' +
                  '<extra></extra>',
    textinfo="label"
)

fig.update_layout(
    width=700,
    height=700
)

fig.show()

ValueError: ('None entries cannot have not-None children', ('Attrited Customer', 'nan', '$120K +'))

In [17]:
# Create custom colors with red for attrited customers
custom_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57', '#F38BA8', '#A8E6CF']

fig = px.sunburst(bc, 
                 path=['Customer_Status', 'Gender', 'Income_Level'],
                 title='Comparison Between Customer Status, Gender and Income Level',
                 color_discrete_sequence=custom_colors)

fig.update_traces(
    hovertemplate='<b>%{label}</b><br>' +
                  'Count: %{value}<br>' +
                  '<extra></extra>',
    textinfo="label+percent parent",
    marker=dict(
        line=dict(
            color="darkred",
            width=2
        )
    )
)

fig.update_layout(
    title='Comparison Between Customer Status, Gender and Income Level',
    width=700,
    height=700
)

fig.show()

In [16]:
# Create custom colors with red for attrited customers
custom_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57', '#F38BA8', '#A8E6CF']

fig = px.sunburst(bc, 
                 path=['Customer_Status', 'Gender', 'Income_Level'],
                 title='Comparison Between Customer Status, Gender and Income Level',
                 color_discrete_sequence=custom_colors)

fig.update_traces(
    hovertemplate='<b>%{label}</b><br>' +
                  'Count: %{value}<br>' +
                  '<extra></extra>',
    textinfo="label+percent parent",
    marker=dict(
        line=dict(
            color="darkred",
            width=2
        )
    )
)

fig.update_layout(
    width=700,
    height=700
)

fig.show()

In [None]:
# Bubble chart with transaction metrics
fig = px.scatter(bc, 
                x='Total_Trans_Count', 
                y='Total_Trans_Amount',
                size='Credit_Limit',
                color='Customer_Status',
                hover_name='Customer_Status',
                hover_data=['Age', 'Income_Level', 'Tenure_Months'],
                title='Transaction Bubble Chart: Count vs Amount (sized by Credit Limit)',
                labels={
                    'Total_Trans_Count': 'Total Transaction Count',
                    'Total_Trans_Amount': 'Total Transaction Amount ($)'
                },
                size_max=50,
                color_discrete_map={
                    'Existing Customer': 'blue',
                    'Attrited Customer': 'red'
                })

fig.update_layout(
    width=900,
    height=600
)

fig.show()

ValueError: Value of 'hover_data_1' is not the name of a column in 'data_frame'. Expected one of ['Customer_Status', 'Age', 'Gender', 'Income_Level', 'Tenure_Months', 'Inactive_Months_In_Last_12', 'Credit_Limit', 'Total_Trans_Amount', 'Total_Trans_Count', 'Avg_Utilization_Ratio'] but received: Income_Category

In [None]:
# Parallel coordinates plot for multi-dimensional transaction analysis
fig = px.parallel_coordinates(bc, 
                            dimensions=['Total_Trans_Ct', 'Total_Trans_Amt', 'Credit_Limit', 'Age'],
                            color='Customer_Status',
                            color_discrete_map={
                                'Existing Customer': 'blue',
                                'Attrited Customer': 'red'
                            },
                            title='Parallel Coordinates: Transaction Patterns by Customer Status')

fig.update_layout(
    width=900,
    height=600
)

fig.show()