In [145]:
# Imports

In [162]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [202]:
# Data Parsing
# Since our data set is very small, we are clubbing train and test datasets.

In [189]:
train = pd.read_csv("churn-bigml-80.csv")
test=pd.read_csv("churn_test.csv")
df = train.append(test, ignore_index=True)

In [None]:
# Handling Categorical features and dropping correlated features.

In [191]:
cleanup_nums = {"No": 0, "Yes": 1}
df.replace(cleanup_nums, inplace=True)
df_test.replace(cleanup_nums, inplace=True)
df['Churn_idx'] = np.where(df['Churn']==False, 0, 1)
df1=df.drop(["Churn","Total day charge","Total eve charge","Total night charge","Total intl charge","Area code","State"], axis=1)
df1.head(2)             

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,KS,128,415,0,1,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,0,1,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False


In [None]:
# Imports for Deep learning

In [193]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [194]:
seed = 7
np.random.seed(seed)

In [203]:
# split into input (X) and output (Y) variables

In [195]:
dataset = df2.values
X = dataset[:,0:13].astype(float)
Y = dataset[:,13].astype(float)

In [None]:
# Baseline model with Stratied K Fold (10 folds)

In [197]:
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [198]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X,Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Results: 86.46% (1.50%)


In [None]:
# Baseline model with Stratied K Fold (10 folds) with standardized dataset

In [199]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X,Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 90.88% (2.73%)


In [None]:
# Baseline model with Stratied K Fold (10 folds) with *Smaller* standardized dataset

In [200]:
# smaller model
def create_smaller():
	# create model
	model = Sequential()
	model.add(Dense(6, input_dim=13, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X,Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 91.26% (1.43%)


In [None]:
# Baseline model with Stratied K Fold (10 folds) with *Larger* standardized dataset

In [201]:
# larger model
def create_larger():
	# create model
	model = Sequential()
	model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
	model.add(Dense(6, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X,Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Larger: 91.64% (3.22%)


In [None]:
# Conclusion:
# The best model is the one which gives good accuracy with the least standard deviation.
# Hence, the model trained on Statified Kfold on *Smaller* scaled dataset is the best for our dataset.
# Accuray: 91.26% 
# Standar Dev.: 1.43%