In [1]:
import pandas as pd
from scipy.stats import skew, kurtosis
file_path = "/content/Bank-Customer-Attrition-Insights-Data (3).csv"  # Update with the correct path
df = pd.read_csv(file_path)

In [2]:
numerical_columns = ["CreditScore", "Balance", "NumOfProducts"]

In [6]:
summary_stats = pd.DataFrame(index=['skew', 'kurtosis'], columns=numerical_columns)

for col in numerical_columns:
    summary_stats.loc['skew', col] = skew(df[col])
    summary_stats.loc['kurtosis', col] = kurtosis(df[col])

print("Summary Statistics:")
print(summary_stats)


Summary Statistics:
         CreditScore   Balance NumOfProducts
skew       -0.071596 -0.141088      0.745456
kurtosis   -0.426113 -1.489267      0.582089


In [7]:
categorical_columns = ["Gender", "Geography", "Age"]
for col in categorical_columns:
    print(f"\nDistribution of {col}:")
    print(df[col].value_counts())


Distribution of Gender:
Gender
Male      5457
Female    4543
Name: count, dtype: int64

Distribution of Geography:
Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

Distribution of Age:
Age
37    478
38    477
35    474
36    456
34    447
     ... 
92      2
82      1
88      1
85      1
83      1
Name: count, Length: 70, dtype: int64


In [8]:
active_stats = df[df["IsActiveMember"] == 1]["NumOfProducts"].agg(["min", "max", "mean"])
inactive_stats = df[df["IsActiveMember"] == 0]["NumOfProducts"].agg(["min", "max", "mean"])

print("\nTransaction Counts for Active Customers:")
print(active_stats)

print("\nTransaction Counts for Inactive Customers:")
print(inactive_stats)


Transaction Counts for Active Customers:
min     1.000000
max     4.000000
mean    1.535624
Name: NumOfProducts, dtype: float64

Transaction Counts for Inactive Customers:
min     1.000000
max     4.000000
mean    1.524438
Name: NumOfProducts, dtype: float64


In [9]:
correlations = df[["CreditScore", "Balance", "NumOfProducts"]].corr()
print("\nCorrelations between Credit Score, Balance, and Transaction Count:")
print(correlations)



Correlations between Credit Score, Balance, and Transaction Count:
               CreditScore   Balance  NumOfProducts
CreditScore       1.000000  0.006268       0.012238
Balance           0.006268  1.000000      -0.304180
NumOfProducts     0.012238 -0.304180       1.000000


In [10]:
churned_stats = df[df["Exited"] == 1][["Balance", "NumOfProducts"]].mean()
retained_stats = df[df["Exited"] == 0][["Balance", "NumOfProducts"]].mean()

print("\nChurned Customers' Balance and Transaction Activity:")
print(churned_stats)

print("\nRetained Customers' Balance and Transaction Activity:")
print(retained_stats)



Churned Customers' Balance and Transaction Activity:
Balance          91109.476006
NumOfProducts        1.475466
dtype: float64

Retained Customers' Balance and Transaction Activity:
Balance          72742.750663
NumOfProducts        1.544210
dtype: float64


In [11]:
low_credit = df["CreditScore"] < df["CreditScore"].quantile(0.25)
high_balance = df["Balance"] > df["Balance"].quantile(0.75)

probability = (low_credit & high_balance).mean()
print("\nProbability of a customer having both a low credit score and a high balance:", probability)


Probability of a customer having both a low credit score and a high balance: 0.0628
