In [None]:
import mysql.connector
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


# CONNECT TO THE SERVER
cnx = mysql.connector.connect(
    host="localhost",
    port=3306,
    user="root")

In [None]:
query = "select * from banking_domain.customer"

In [None]:
df = pd.read_sql(query, cnx)

print(df)

In [None]:
cnx.close()

In [None]:
df.head(5)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
bins = [0,100000,300000,float("inf")]
labels = ["Low","Med","High"]

df["Income Band"] = pd.cut(df["Estimated Income"] , bins=bins, labels=labels, right=False)

In [None]:
df["Income Band"].value_counts().plot(kind="bar")

In [None]:
# Examine the distribution in unique cataegories in categorical columns

categorical_cols=df[["BRId","GenderId","IAId","Amount of Credit Cards","Nationality","Occupation","Fee Structure","Loyalty Classification","Properties Owned","Risk Weighting","Income Band"]].columns

for col in categorical_cols:
    print(f"Value Counts For {col} : ")
    display(df[col].value_counts())

In [None]:
# UNIVARIATE ANALYSIS
for i,predictor in enumerate(categorical_cols):
    plt.figure(i)
    sns.countplot(data=df,x=predictor)

In [None]:
# BIVARIATE ANALYSIS
for i,predictor in enumerate(categorical_cols):
    plt.figure(i)
    sns.countplot(data=df,x=predictor,hue="GenderId")

In [None]:
# HISTPLOT OF VALUE COUNTS FOR  DIFFERENT OCCUPATION

for col in categorical_cols:
    if col == "Occupation":
        continue
    plt.figure(figsize=(8,4))
    sns.histplot(df[col])
    plt.title("Histogram of Occupation Count")
    plt.xlabel(col)
    plt.ylabel("Count")
    plt.show()

In [None]:
# NUMERICAL ANALYSIS

numerical_cols = ["Estimated Income","Superannuation Savings","Credit Card Balance","Bank Loans","Bank Deposits","Checking Accounts","Saving Accounts","Foreign Currency Account","Business Lending"]

# UNIVARIATE ANALYSIS AND VISUALIZATION
plt.figure(figsize=(15,20))
for i,col in enumerate(numerical_cols):
    plt.subplot(4,3,i+1)
    sns.histplot(df[col],kde=True)
    plt.title(col)
plt.show()

In [None]:
# HEATMAPS

correlation_matrix = df[numerical_cols].corr()

plt.figure(figsize=(12,12))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix")
plt.show()

In [None]:
pairs_to_plot = [
    ('Bank Deposits', 'Saving Accounts'),
    ('Checking Accounts', 'Saving Accounts'),
    ('Checking Accounts', 'Foreign Currency Account'),
    ('Age', 'Superannuation Savings'),
    ('Estimated Income', 'Checking Accounts'),
    ('Bank Loans', 'Credit Card Balance'),
    ('Business Lending', 'Bank Loans'),
]


for x_col, y_col in pairs_to_plot:
    plt.figure(figsize=(8, 6))
    sns.regplot(
        data=df,
        x=x_col,
        y=y_col,
        scatter_kws={'alpha': 0.4},     # semi-transparent points
        line_kws={'color': 'red'}       # best-fit line color
    )
    plt.title(f'Relationship between {x_col} and {y_col}', fontsize=14)
    plt.xlabel(x_col, fontsize=12)
    plt.ylabel(y_col, fontsize=12)
    plt.tight_layout()
    plt.show()

In [None]:
# Insights:

##Deposits and Savings Behavior

"""A strong correlation between Bank Deposits and Savings Balances suggests that:
=> Customers who deposit more frequently are also likely to grow and maintain higher savings.
=> These two metrics may be capturing similar financial behavior, indicating potential overlap.
."""


## Income and Age

"""Age and income show moderate links to account balances, reflecting a typical lifecycle trend — older, higher earners build more savings and credit exposure."""


##Low Correlation with Properties Owned

"""Property ownership appears driven by external factors beyond banking behavior, which explains its weaker correlation with financial balances."""


##Business vs. Personal Banking


"""=> Business Lending moderately overlaps with Bank Loans, indicating that some  customers carry both personal and business debt.
   => However, its low correlation with deposits and property metrics suggests it serves a separate customer group with different financial behaviors."""
