In [2]:
import numpy as np 
import pandas as pd 

In [3]:
df = pd.read_csv(r"C:\Users\bhanu\Downloads\churn.csv")

### 1. Load the data, print first five rows and display each column datatype

In [4]:
print(df.head(5))

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

### 2. check the missing values, duplicate values and how many are there

In [5]:
print(df.isna().sum().sum()) # zero null values
print(df[df.duplicated()])  # no duplicate rows

0
Empty DataFrame
Columns: [RowNumber, CustomerId, Surname, CreditScore, Geography, Gender, Age, Tenure, Balance, NumOfProducts, HasCrCard, IsActiveMember, EstimatedSalary, Exited]
Index: []


### 3. convert categorical column into label encoding and one hot encoding

In [6]:
pd.get_dummies(df,columns=['Gender'],dtype=int) # one hot encoding
df['Surname'] = df['Surname'].astype('category').cat.codes # label encoding


### 4. calculate the overall churn rate. display both count and the percentage of who exited 

In [7]:
count_of_churned = df['Exited'].sum()
print(f"count of churned customers = {count_of_churned}")
percentage_of_churned_cutsmrs = (count_of_churned/df.shape[0])*100
print(f"percantage of churned customers out of all customers : {percentage_of_churned_cutsmrs:.2f}%")

count of churned customers = 2037
percantage of churned customers out of all customers : 20.37%


### 5. Group the data by geography and find the churn rate for each country

In [None]:
df.groupby("Geography").apply(lambda x : x['Exited'].sum()/len(df))*100

  df.groupby("Geography").apply(lambda x : x['Exited'].sum()/len(df))*100


Geography
France     8.10
Germany    8.14
Spain      4.13
dtype: float64

### 6.What is the age of customers who churned versus who didnt churned

In [9]:
churned_cust = df[df['Exited']==1]
not_churned_cust = df[df['Exited']==0] 
print(f"avg age of churned customers = {churned_cust['Age'].mean():.2f}")
print(f"avg age of not churned customers = {not_churned_cust['Age'].mean():.2f}")

avg age of churned customers = 44.84
avg age of not churned customers = 37.41


### 7. Determine what perecentage of churnmed customers are age over 45. compare it to all overall dataset 

In [23]:
total_churned = df[df['Exited']==1]
churned_over45 = total_churned[total_churned['Age']>45]
overall_cust_over45 = df[df['Age']>45]

percen_churned_over45 = (len(churned_cust)/len(total_churned))*100
percen_overall_over45 = (len(overall_cust_over45)/len(df))*100

print(f"Percentage of churned customers over 45: {percen_churned_over45:.2f}%")
print(f"Percentage of all customers over 45: {percen_overall_over45:.2f}%")


Percentage of churned customers over 45: 100.00%
Percentage of all customers over 45: 21.11%


### 8. Analize the churn rate by gender. Do male or female churn more?

In [29]:
total_churned = df[df['Exited']==1] 
churned_male = total_churned[total_churned['Gender']=='Male']
churned_female = total_churned[total_churned['Gender']=='Female']

percent_churned_male = (len(churned_male)/len(total_churned))*100
percent_churned_female = (len(churned_female)/len(total_churned))*100

print(f"Percenatge of churned male {percent_churned_male:.2f}")
print(f"Percenatge of churned female {percent_churned_female:.2f}")


Percenatge of churned male 44.08
Percenatge of churned female 55.92


### 9. Analyze the churn rate based on numberproducts. Create a summary table showing a churn rate for 1,2,3 and 4 products

In [36]:
summary = df.groupby('NumOfProducts')['Exited'].agg(['count','sum'])
summary.columns = ['total_customers','churned_customers']
summary['churnrae (%)'] = (summary['churned_customers']/summary['total_customers'])*100
summary = summary.loc[[1,2,3,4]]
print(summary)

               total_customers  churned_customers  churnrae (%)
NumOfProducts                                                  
1                         5084               1409     27.714398
2                         4590                348      7.581699
3                          266                220     82.706767
4                           60                 60    100.000000


### 10. Among customers with a Balance of zero, what percentage have exited the bank

In [45]:
total_churned = df[df['Exited']==1]
customers_with_zerobalance = total_churned[total_churned['Balance']==0]

percent_of_exited_bank_with_zerobalance = (len(customers_with_zerobalance)/len(total_churned))*100
print(f"{percent_of_exited_bank_with_zerobalance:.2f}%")


24.55%


### 11. what is average tenure of customers who churned compared to who didnt?

In [49]:
churned = df[df['Exited']==1] 
not_churned = df[df['Exited']==0] 

print(f"avg of churned customer tenure {churned['Tenure'].mean()}")
print(f"avg of non churned customer tenure {not_churned['Tenure'].mean()}")

avg of churned customer tenure 4.932744231713304
avg of non churned customer tenure 5.033278914981791


### 12. Calculate the churn rate for customers with credit score < 600 , creditscore>800

In [50]:
total_churned = df[df['Exited']==1] 
cus_with_lessthn_600credit = total_churned[total_churned['CreditScore']<600]
cus_with_greatthn_800credit = total_churned[total_churned['CreditScore']>800]

percent_lessthn_600credit = (len(cus_with_lessthn_600credit)/len(total_churned))*100
percent_greatthn_800credit = (len(cus_with_greatthn_800credit)/len(total_churned))*100

print(f"less than 600 creditscore = {percent_lessthn_600credit:.2f}%")
print(f"great than 800 creditscore = {percent_greatthn_800credit:.2f}%")



less than 600 creditscore = 32.40%
great than 800 creditscore = 6.23%


### 13.Among the customers with a hascrcard value of 0 and 1, compare the churn rate.

In [53]:
card_0 = df[df['HasCrCard'] == 0]
card_1 = df[df['HasCrCard'] == 1]

churn_rate_0 = (card_0['Exited'].sum() / len(card_0)) * 100
churn_rate_1 = (card_1['Exited'].sum() / len(card_1)) * 100

print(f"Churn rate for customers without a credit card (HasCrCard = 0): {churn_rate_0:.2f}%")
print(f"Churn rate for customers with a credit card (HasCrCard = 1): {churn_rate_1:.2f}%")

cr_summary = df.groupby('HasCrCard')['Exited'].agg(['count', 'sum'])
cr_summary.columns = ['Total_Customers', 'Churned_Customers']
cr_summary['Churn_Rate (%)'] = (cr_summary['Churned_Customers'] / cr_summary['Total_Customers']) * 100

print(cr_summary)


Churn rate for customers without a credit card (HasCrCard = 0): 20.81%
Churn rate for customers with a credit card (HasCrCard = 1): 20.18%
           Total_Customers  Churned_Customers  Churn_Rate (%)
HasCrCard                                                    
0                     2945                613       20.814941
1                     7055               1424       20.184266


###  14. Create the column of Category, Credit Score. and calcute churn rate between category

In [72]:
def categ_creditscore(score):
    if score < 600:
        return "low"
    elif score > 600 and score < 800:
        return "medium"
    elif score > 800:
        return 'high'


df['score_category'] = df['CreditScore'].apply(categ_creditscore)
churnedd = df[df['Exited']==1]
summary = churnedd.groupby('score_category')['Exited'].agg(['sum'])
summary.columns = ['total_exited']

summary['avg_category_churn'] = (summary['total_exited']/len(churnedd))*100
summary



Unnamed: 0_level_0,total_exited,avg_category_churn
score_category,Unnamed: 1_level_1,Unnamed: 2_level_1
high,127,6.234659
low,660,32.400589
medium,1244,61.070201


### 16. What is the average estimated salary for churned vs non-churned customers?

In [75]:
churned = df[df['Exited'] == 1]
non_churned = df[df['Exited'] == 0]

avg_estimated_salary_churned = churned['EstimatedSalary'].mean()
avg_estimated_salary_nonchurned = non_churned['EstimatedSalary'].mean()

print(f"avg estimated salary for churned customer = {avg_estimated_salary_churned:.2f}")
print(f"avg estimated salary for non churned customer = {avg_estimated_salary_nonchurned:.2f}")



avg estimated salary for churned customer = 101465.68
avg estimated salary for non churned customer = 99738.39
