In [12]:
import pandas as pd
from scipy.stats import skew, kurtosis


df = pd.read_csv("/content/Bank-Customer-Attrition-Insights-Data.csv")


numerical_columns = ["Balance", "CreditScore", "NumOfProducts"]
summary_stats = df[numerical_columns].describe().T
summary_stats["skewness"] = df[numerical_columns].apply(skew)
summary_stats["kurtosis"] = df[numerical_columns].apply(kurtosis)

print("Summary Statistics:\n", summary_stats)


print("\nCustomer Distribution by Gender:\n", df["Gender"].value_counts())
print("\nCustomer Distribution by Geography:\n", df["Geography"].value_counts())
print("\nAge Distribution:\n", df["Age"].describe())


active_customers = df[df["IsActiveMember"] == 1]["NumOfProducts"]
inactive_customers = df[df["IsActiveMember"] == 0]["NumOfProducts"]

print("\nTransaction Stats for Active Customers:",
      {"Min": active_customers.min(), "Max": active_customers.max(), "Mean": active_customers.mean()})
print("\nTransaction Stats for Inactive Customers:",
      {"Min": inactive_customers.min(), "Max": inactive_customers.max(), "Mean": inactive_customers.mean()})


correlation_matrix = df[["CreditScore", "Balance", "NumOfProducts"]].corr()
print("\nCorrelation Matrix:\n", correlation_matrix)


stayed_customers = df[df["Exited"] == 0]
churned_customers = df[df["Exited"] == 1]

print("\nBalance & Transaction Differences:")
print("Stayed Customers - Balance Mean:", stayed_customers["Balance"].mean(),
      "Transaction Count Mean:", stayed_customers["NumOfProducts"].mean())
print("Churned Customers - Balance Mean:", churned_customers["Balance"].mean(),
      "Transaction Count Mean:", churned_customers["NumOfProducts"].mean())


low_credit_threshold = 580
high_balance_threshold = df["Balance"].median()

low_credit_high_balance_customers = df[
    (df["CreditScore"] < low_credit_threshold) & (df["Balance"] > high_balance_threshold)
]

probability_low_credit_high_balance = len(low_credit_high_balance_customers) / len(df)
print("\nProbability of Low Credit Score & High Balance:", probability_low_credit_high_balance)


Summary Statistics:
                  count          mean           std    min    25%       50%  \
Balance        10000.0  76485.889288  62397.405202    0.0    0.0  97198.54   
CreditScore    10000.0    650.528800     96.653299  350.0  584.0    652.00   
NumOfProducts  10000.0      1.530200      0.581654    1.0    1.0      1.00   

                     75%        max  skewness  kurtosis  
Balance        127644.24  250898.09 -0.141088 -1.489267  
CreditScore       718.00     850.00 -0.071596 -0.426113  
NumOfProducts       2.00       4.00  0.745456  0.582089  

Customer Distribution by Gender:
 Gender
Male      5457
Female    4543
Name: count, dtype: int64

Customer Distribution by Geography:
 Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

Age Distribution:
 count    10000.000000
mean        38.921800
std         10.487806
min         18.000000
25%         32.000000
50%         37.000000
75%         44.000000
max         92.000000
Name: Age, dtype: 