In [3]:
import pandas as pd
from scipy.stats import chi2_contingency, ttest_ind

In [4]:
df = pd.read_csv("../data/Engineered/BankChurners_Engineered_V2.csv")
df.head()

Unnamed: 0,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,...,Card_Category_Platinum,Card_Category_Silver,Avg_Transaction_Value,Engagement_Score,Credit_Pressure,High_Credit_Flag,Tenure_Group,Trans_Freq_Group,High_Contact_Flag,Stability_Score
0,0,45,1,3,2.0,39,5,1,3,12691.0,...,0,0,27.238095,46,0.061224,1,1,0,1,1.48
1,0,49,0,5,4.0,44,6,1,2,8256.0,...,0,0,39.121212,38,0.104651,1,1,0,0,2.6275
2,0,51,1,3,4.0,36,4,1,0,3418.0,...,0,0,94.35,23,0.0,0,1,0,0,2.4635
3,0,40,0,4,2.0,34,3,4,1,3313.0,...,0,0,58.55,19,0.759734,0,1,0,0,1.869
4,0,40,1,3,1.0,21,5,1,0,4716.0,...,0,0,29.142857,32,0.0,1,0,0,0,2.3375


In [5]:
churned = df[df['Attrition_Flag'] == 1]
existing = df[df['Attrition_Flag'] == 0]

In [6]:
from scipy.stats import ttest_ind

t_stat, p_value = ttest_ind(
    churned['Avg_Transaction_Value'],
    existing['Avg_Transaction_Value'],
    equal_var=False
)

print("Avg Transaction Value")
print("T-statistic:", t_stat)
print("P-value:", p_value)

Avg Transaction Value
T-statistic: 1.5821145837736301
P-value: 0.1137653946048938


In [7]:
t_stat, p_value = ttest_ind(
    churned['Engagement_Score'],
    existing['Engagement_Score'],
    equal_var=False
)

print("Engagement Score")
print("T-statistic:", t_stat)
print("P-value:", p_value)


Engagement Score
T-statistic: -56.61143429872478
P-value: 0.0


In [8]:
t_stat, p_value = ttest_ind(
    churned['Credit_Pressure'],
    existing['Credit_Pressure'],
    equal_var=False
)

print("Credit Pressure")
print("T-statistic:", t_stat)
print("P-value:", p_value)


Credit Pressure
T-statistic: -18.62248292161567
P-value: 2.7950902931999413e-72


In [9]:
t_stat, p_value = ttest_ind(
    churned['Stability_Score'],
    existing['Stability_Score'],
    equal_var=False
)

print("Stability Score")
print("T-statistic:", t_stat)
print("P-value:", p_value)


Stability Score
T-statistic: -26.96801595463437
P-value: 1.744069796168686e-139


In [10]:
from scipy.stats import chi2_contingency

contingency = pd.crosstab(
    df['High_Credit_Flag'],
    df['Attrition_Flag']
)

chi2, p, dof, expected = chi2_contingency(contingency)

print("High Credit Flag vs Churn")
print("Chi2:", chi2)
print("P-value:", p)


High Credit Flag vs Churn
Chi2: 9.38300578200016
P-value: 0.002190061638795325


In [11]:
contingency = pd.crosstab(
    df['High_Contact_Flag'],
    df['Attrition_Flag']
)

chi2, p, dof, expected = chi2_contingency(contingency)

print("High Contact Flag vs Churn")
print("Chi2:", chi2)
print("P-value:", p)


High Contact Flag vs Churn
Chi2: 272.3064199561131
P-value: 3.566505055044487e-61


In [12]:
from scipy.stats import f_oneway

groups = [
    df[df['Tenure_Group'] == g]['Attrition_Flag']
    for g in df['Tenure_Group'].unique()
]

f_stat, p_value = f_oneway(*groups)

print("Tenure Group vs Churn")
print("F-statistic:", f_stat)
print("P-value:", p_value)


Tenure Group vs Churn
F-statistic: 0.6408961505830906
P-value: 0.5268414749135739


In [13]:
groups = [
    df[df['Trans_Freq_Group'] == g]['Attrition_Flag']
    for g in df['Trans_Freq_Group'].unique()
]

f_stat, p_value = f_oneway(*groups)

print("Transaction Frequency Group vs Churn")
print("F-statistic:", f_stat)
print("P-value:", p_value)


Transaction Frequency Group vs Churn
F-statistic: 1141.1763874581525
P-value: 0.0


In [14]:
correlations = df[
    [
        'Avg_Transaction_Value',
        'Engagement_Score',
        'Credit_Pressure',
        'Stability_Score',
        'High_Credit_Flag',
        'High_Contact_Flag'
    ]
].corrwith(df['Attrition_Flag'])

correlations.sort_values()


Engagement_Score        -0.392257
Stability_Score         -0.256946
Credit_Pressure         -0.178406
High_Credit_Flag        -0.030708
Avg_Transaction_Value    0.016246
High_Contact_Flag        0.164248
dtype: float64