##Data Preparation

In [None]:
from pandas import read_csv, DataFrame, get_dummies, Series
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
CC = read_csv('/content/drive/MyDrive/Customer_Churn.csv')
CC['Attrition_Flag'] = CC['Attrition_Flag'].map({'Existing Customer':1, 'Attrited Customer':0})
CC['Gender'] = CC['Gender'].map({'M':0,'F':1})
CC['Education_Level'] = CC['Education_Level'].map({'Uneducated':0, 'High School':1, 'Graduate':2, 'Post-Graduate':3, 'Doctorate':4})
CC['Income_Category'] = CC['Income_Category'].map({'Less than $40K':0, '$40K - $60K':1, '$60K - $80K':2, '$80K - $120K':3, '$120K +':4})
CC['Card_Category'] = CC['Card_Category'].map({'Blue':0, 'Silver':1, 'Gold':2, 'Platinum':3})
CC2 = get_dummies(CC, ['Marital_Status'], dtype=int)
x = CC2.drop('Attrition_Flag', axis=1)
y = CC2['Attrition_Flag']
x_scaled = StandardScaler().fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=100)
x_train, y_train = SMOTE().fit_resample(x_train, y_train)

#SVC - Support Vector Classifier

##Pipeline Method for Tuning

In [None]:
from sklearn import svm
from imblearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
SV_classifier = Pipeline([('balancing', SMOTE(random_state = 100)),('classification', svm.SVC(random_state=100))])
kernels_c = {'classification__kernel': ['linear','poly','rbf','sigmoid'], 'classification__C': [98, 99, 100, 101, 102]}
grid_search1 = GridSearchCV(estimator=SV_classifier, param_grid=kernels_c, scoring='recall', cv=5)
grid_search1.fit(x_scaled, y)
best_parameters = grid_search1.best_params_
print(best_parameters)
best_result = grid_search1.best_score_
print(best_result)

{'classification__C': 100, 'classification__kernel': 'rbf'}
0.9233480335584794


##SVC - Using Best Parameters

In [None]:
SV_classifier = svm.SVC(kernel='rbf', random_state=100, C=100)
SV_classifier.fit(x_train, y_train)
y_pred= SV_classifier.predict(x_test)

##Evaluation

In [None]:
from sklearn import metrics
recall = metrics.recall_score(y_test, y_pred)
print (recall)

0.9129186602870814


#Random Forest Method

##Balanced Grid Search CV with Pipeline

In [None]:
from imblearn.pipeline import Pipeline
from sklearn import ensemble
RF_classifier = Pipeline([('balancing', SMOTE(random_state = 100)),('classification', ensemble.RandomForestClassifier(criterion='entropy', max_features='sqrt', random_state=100))])
no_trees = {'classification__n_estimators': [784, 785, 786, 787, 788]}
grid_search = GridSearchCV(estimator=RF_classifier, param_grid=no_trees, scoring='recall', cv=5)
grid_search.fit(x_scaled, y)

best_parameters = grid_search.best_params_
print(best_parameters)
best_result = grid_search.best_score_
print(best_result)

{'classification__n_estimators': 785}
0.9520735276560799


##Regular Method with Best Parameters & Evaluation

In [None]:
RF_classifier = ensemble.RandomForestClassifier(n_estimators=785, criterion='entropy', max_features='sqrt', random_state=100)
RF_classifier.fit(x_train,y_train)
y_pred=RF_classifier.predict(x_test)
from sklearn import metrics
confusion_matrix=metrics.confusion_matrix(y_test,y_pred)
print(confusion_matrix)
recall=metrics.recall_score(y_test,y_pred)
print('recall',recall)

[[ 163   40]
 [  40 1005]]
recall 0.9617224880382775
