In [1]:
import pandas as pd
from scipy.stats import skew, kurtosis

In [2]:
df=pd.read_csv("/content/Bank-Customer-Attrition-Insights-Data.csv")
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Complain,Satisfaction Score,Card Type,Point Earned
0,1,15598695,Fields,619,France,Female,42,2,0.00,1,1,1,101348.88,1,1,2,DIAMOND,464
1,2,15649354,Johnston,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,1,3,DIAMOND,456
2,3,15737556,Vasilyev,502,France,Female,42,8,159660.80,3,1,0,113931.57,1,1,3,DIAMOND,377
3,4,15671610,Hooper,699,France,Female,39,1,0.00,2,0,0,93826.63,0,0,5,GOLD,350
4,5,15625092,Colombo,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0,0,5,GOLD,425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15583480,Morgan,771,France,Male,39,5,0.00,2,1,0,96270.64,0,0,1,DIAMOND,300
9996,9997,15620341,Nwebube,516,France,Male,35,10,57369.61,1,1,1,101699.77,0,0,5,PLATINUM,771
9997,9998,15613886,Trevisan,709,France,Female,36,7,0.00,1,0,1,42085.58,1,1,3,SILVER,564
9998,9999,15792916,Ositadimma,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1,1,2,GOLD,339


In [3]:
# Key numerical variables
numerical_vars = ["CreditScore", "Balance", "Point Earned"]


In [4]:
# Summary statistics
summary_stats = df[numerical_vars].describe().T
summary_stats["Median"] = df[numerical_vars].median()
summary_stats["Skewness"] = df[numerical_vars].apply(skew)
summary_stats["Kurtosis"] = df[numerical_vars].apply(kurtosis)
print("Summary Statistics:\n", summary_stats)


Summary Statistics:
                 count          mean           std    min    25%       50%  \
CreditScore   10000.0    650.528800     96.653299  350.0  584.0    652.00   
Balance       10000.0  76485.889288  62397.405202    0.0    0.0  97198.54   
Point Earned  10000.0    606.515100    225.924839  119.0  410.0    605.00   

                    75%        max    Median  Skewness  Kurtosis  
CreditScore      718.00     850.00    652.00 -0.071596 -0.426113  
Balance       127644.24  250898.09  97198.54 -0.141088 -1.489267  
Point Earned     801.00    1000.00    605.00  0.008343 -1.193784  


In [5]:
# Distribution of categorical variables
categorical_distributions = {
    "Gender": df["Gender"].value_counts(normalize=True) * 100,
    "Geography": df["Geography"].value_counts(normalize=True) * 100,
    "Age": df["Age"].value_counts().sort_index(),
}
print("\nDistribution of Customers by Gender, Geography, and Age:\n", categorical_distributions)



Distribution of Customers by Gender, Geography, and Age:
 {'Gender': Gender
Male      54.57
Female    45.43
Name: proportion, dtype: float64, 'Geography': Geography
France     50.14
Germany    25.09
Spain      24.77
Name: proportion, dtype: float64, 'Age': Age
18    22
19    27
20    40
21    53
22    84
      ..
83     1
84     2
85     1
88     1
92     2
Name: count, Length: 70, dtype: int64}


In [6]:
# Transaction analysis for active vs inactive customers
transaction_counts = df.groupby("IsActiveMember")["Point Earned"].agg(["min", "max", "mean"])
print("\nTransaction Counts for Active vs. Inactive Customers:\n", transaction_counts)


Transaction Counts for Active vs. Inactive Customers:
                 min   max        mean
IsActiveMember                       
0               119  1000  607.675397
1               206  1000  605.422831


In [7]:

# Correlation matrix for key variables
correlation_matrix = df[["CreditScore", "Balance", "Point Earned"]].corr()
print("\nCorrelation Matrix:\n", correlation_matrix)


Correlation Matrix:
               CreditScore   Balance  Point Earned
CreditScore      1.000000  0.006268      0.000077
Balance          0.006268  1.000000      0.014608
Point Earned     0.000077  0.014608      1.000000


In [8]:

# Differences in balance and transaction activity between churned and retained customers
churn_analysis = df.groupby("Exited")[["Balance", "Point Earned"]].mean()
print("\nDifferences in Balance and Transactions Between Retained and Churned Customers:\n", churn_analysis)


Differences in Balance and Transactions Between Retained and Churned Customers:
              Balance  Point Earned
Exited                            
0       72742.750663    607.044084
1       91109.476006    604.448479


In [9]:
# Probability of low credit score
low_credit_high_balance = df[(df["CreditScore"] < 600) & (df["Balance"] > 100000)]
probability_low_credit_high_balance = len(low_credit_high_balance) / len(df)
print("\nProbability of Low Credit Score and High Balance:", probability_low_credit_high_balance)


Probability of Low Credit Score and High Balance: 0.1452
