In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Load Training Dataset

In [2]:
df=pd.read_csv('customer_churn_dataset-training-master.csv')
df.head()

Unnamed: 0,CustomerID,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,2.0,30.0,Female,39.0,14.0,5.0,18.0,Standard,Annual,932.0,17.0,1.0
1,3.0,65.0,Female,49.0,1.0,10.0,8.0,Basic,Monthly,557.0,6.0,1.0
2,4.0,55.0,Female,14.0,4.0,6.0,18.0,Basic,Quarterly,185.0,3.0,1.0
3,5.0,58.0,Male,38.0,21.0,7.0,7.0,Standard,Monthly,396.0,29.0,1.0
4,6.0,23.0,Male,32.0,20.0,5.0,8.0,Basic,Monthly,617.0,20.0,1.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 440833 entries, 0 to 440832
Data columns (total 12 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   CustomerID         440832 non-null  float64
 1   Age                440832 non-null  float64
 2   Gender             440832 non-null  object 
 3   Tenure             440832 non-null  float64
 4   Usage Frequency    440832 non-null  float64
 5   Support Calls      440832 non-null  float64
 6   Payment Delay      440832 non-null  float64
 7   Subscription Type  440832 non-null  object 
 8   Contract Length    440832 non-null  object 
 9   Total Spend        440832 non-null  float64
 10  Last Interaction   440832 non-null  float64
 11  Churn              440832 non-null  float64
dtypes: float64(9), object(3)
memory usage: 40.4+ MB


In [11]:
df.dropna(inplace=True)

In [4]:
df.drop(columns='CustomerID',inplace=True)

In [5]:
cat_col=df.select_dtypes(include='object').columns

# Encoding

In [6]:
df=pd.get_dummies(df,columns=cat_col,drop_first=True)

In [12]:
df=df.astype(int)

In [14]:
df.head()

Unnamed: 0,Age,Tenure,Usage Frequency,Support Calls,Payment Delay,Total Spend,Last Interaction,Churn,Gender_Male,Subscription Type_Premium,Subscription Type_Standard,Contract Length_Monthly,Contract Length_Quarterly
0,30,39,14,5,18,932,17,1,0,0,1,0,0
1,65,49,1,10,8,557,6,1,0,0,0,1,0
2,55,14,4,6,18,185,3,1,0,0,0,0,1
3,58,38,21,7,7,396,29,1,1,0,1,1,0
4,23,32,20,5,8,617,20,1,1,0,0,1,0


In [15]:
x=df.drop(columns='Churn')
y=df['Churn']

# Train-Test Split

In [16]:
from sklearn.model_selection import train_test_split

In [23]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [17]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

# Random Forest

In [18]:
from sklearn.ensemble import RandomForestClassifier

In [19]:
rf=RandomForestClassifier(n_estimators=300,random_state=42,n_jobs=1)

In [20]:
rf.fit(x_train,y_train)

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [22]:
pred_rf=rf.predict(x_test)

In [25]:
# Accuracy And Confusion matrx
accuracy_score(y_test,pred_rf)

0.9996483945240283

In [26]:
confusion_matrix(y_test,pred_rf)

array([[38167,     0],
       [   31, 49969]])

In [28]:
print(classification_report(y_test,pred_rf))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     38167
           1       1.00      1.00      1.00     50000

    accuracy                           1.00     88167
   macro avg       1.00      1.00      1.00     88167
weighted avg       1.00      1.00      1.00     88167



# Important Feature

In [29]:
feature_names=x.columns
importances=rf.feature_importances_

In [30]:
feat_imp=pd.DataFrame({'feature':feature_names,'importance':importances})

In [36]:
feat_imp.sort_values(by='importance',ascending=False,inplace=True)

In [49]:
plt.figure(figsize=(10,6))
plt.barh(x=feat_imp['importance'],y=feat_imp['feature'])
plt.xlabel('importance')
plt.ylabel('feature')
plt.title('Important Feature (Random Forest)')
plt.tight_layout()
plt.show()

TypeError: barh() missing 1 required positional argument: 'width'

<Figure size 1000x600 with 0 Axes>