In [1]:
import pandas as pd

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier


from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score
)

In [2]:
df = pd.read_csv("../data/churn_modelling.csv")
df = df.drop(["RowNumber", "CustomerId", "Surname"], axis=1)
df = df.replace(
    {"Geography" : {"Germany" : 0,
                    "France" : 1,
                    "Spain" : 2 },
     "Gender" : {"Female" : 0,
                 "Male" : 1}
    }
)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,1,0,42,2,0.0,1,1,1,101348.88,1
1,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,502,1,0,42,8,159660.8,3,1,0,113931.57,1
3,699,1,0,39,1,0.0,2,0,0,93826.63,0
4,850,2,0,43,2,125510.82,1,1,1,79084.1,0


In [3]:
X = df.drop(["Exited"], axis=1)
y = df["Exited"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

X_test.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
8783,782,1,1,34,9,0.0,1,1,0,183021.06
5663,850,1,0,38,2,0.0,2,1,0,9015.07
5591,539,1,1,38,5,0.0,2,1,0,47388.41
5827,653,2,1,35,9,0.0,2,1,1,45956.05
1209,850,1,0,32,7,0.0,2,0,0,155227.0


In [4]:
## Support Vector Classifier

In [5]:
svc_clf = SVC(kernel="sigmoid")
svc_clf.fit(X_train,y_train)
y_pred_clf= svc_clf.predict(X_test)

In [6]:
print(confusion_matrix(y_test, y_pred_clf))
print("F1="+str(f1_score(y_test, y_pred_clf)))
print("Precision="+str(precision_score(y_test, y_pred_clf)))
print("Recall="+str(recall_score(y_test, y_pred_clf)))

[[1946  461]
 [ 458  135]]
F1=0.2270815811606392
Precision=0.22651006711409397
Recall=0.22765598650927488


In [7]:
## Decision Tree Classifier

In [8]:
dtc = DecisionTreeClassifier(max_depth=2)
dtc.fit(X_train,y_train)
y_pred_dtc = dtc.predict(X_test)

In [9]:
print(confusion_matrix(y_test, y_pred_dtc))
print("F1="+str(f1_score(y_test, y_pred_dtc)))
print("Precision="+str(precision_score(y_test, y_pred_dtc)))
print("Recall="+str(recall_score(y_test, y_pred_dtc)))

[[2228  179]
 [ 335  258]]
F1=0.5009708737864077
Precision=0.5903890160183066
Recall=0.4350758853288364


In [10]:
## KNeighbors Classifier

In [11]:
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train,y_train)
y_pred_neigh = neigh.predict(X_test)

In [12]:
print(confusion_matrix(y_test, y_pred_neigh))
print("F1="+str(f1_score(y_test, y_pred_neigh)))
print("Precision="+str(precision_score(y_test, y_pred_neigh)))
print("Recall="+str(recall_score(y_test, y_pred_neigh)))

[[2265  142]
 [ 535   58]]
F1=0.14627994955863807
Precision=0.29
Recall=0.09780775716694773


In [13]:
## Logistic Regression

In [14]:
lgc = LogisticRegression()
lgc.fit(X_train,y_train)
y_pred_lgc = lgc.predict(X_test)

In [15]:
print(confusion_matrix(y_test, y_pred_lgc))
print("F1="+str(f1_score(y_test, y_pred_lgc)))
print("Precision="+str(precision_score(y_test, y_pred_lgc)))
print("Recall="+str(recall_score(y_test, y_pred_lgc)))

[[2338   69]
 [ 558   35]]
F1=0.10043041606886656
Precision=0.33653846153846156
Recall=0.05902192242833052


In [16]:
voting_clf = VotingClassifier(estimators=[("svc", svc_clf), ("dtc", dtc), ("lgc", lgc)])
voting_clf.fit(X_train,y_train)
y_pred_voting = voting_clf.predict(X_test)

In [17]:
print(confusion_matrix(y_test, y_pred_voting))
print("F1="+str(f1_score(y_test, y_pred_voting)))
print("Precision="+str(precision_score(y_test, y_pred_voting)))
print("Recall="+str(recall_score(y_test, y_pred_voting)))

[[2355   52]
 [ 508   85]]
F1=0.23287671232876714
Precision=0.6204379562043796
Recall=0.1433389544688027


In [18]:
## Random Forest Classifier

In [19]:
rfc = RandomForestClassifier(random_state=0)
rfc.fit(X_train,y_train)
y_pred_rfc = rfc.predict(X_test)

In [20]:
print(confusion_matrix(y_test, y_pred_rfc))
print("F1="+str(f1_score(y_test, y_pred_rfc)))
print("Precision="+str(precision_score(y_test, y_pred_rfc)))
print("Recall="+str(recall_score(y_test, y_pred_rfc)))

[[2321   86]
 [ 304  289]]
F1=0.597107438016529
Precision=0.7706666666666667
Recall=0.4873524451939292


In [21]:
## Multilayer Perceptron

In [22]:
sc=StandardScaler()
scaler = sc.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [23]:
mlp_tuning = MLPClassifier()
parameter_space = { 
    "hidden_layer_sizes": [(150,100,50), (120,80,40), (100,50,30)],
    "max_iter": [100, 150, 200, 250, 300],
    "activation": ["tanh", "relu"], 
    "solver": ["sgd", "adam"], 
    "alpha": [0.0001, 0.05], 
    "learning_rate": ["constant", "invscaling", "adaptive"], 
}
#clf = GridSearchCV (mlp_tuning, parameter_space, n_jobs=-1, cv=5) 
#clf.fit(X_train_scaled,y_train)
#print(clf.best_params_)

In [24]:
mlp = MLPClassifier(max_iter=300, activation="tanh", alpha=0.05, hidden_layer_sizes=(20,), learning_rate="invscaling", solver="adam")
mlp.fit(X_train,y_train)
y_pred_mlp = mlp.predict(X_test_scaled)

In [25]:
print(confusion_matrix(y_test, y_pred_mlp))
print("F1="+str(f1_score(y_test, y_pred_mlp)))
print("Precision="+str(precision_score(y_test, y_pred_mlp)))
print("Recall="+str(recall_score(y_test, y_pred_mlp)))

[[2078  329]
 [ 530   63]]
F1=0.12791878172588833
Precision=0.16071428571428573
Recall=0.10623946037099494
