#### 1.Loading The Data

In [1]:
import pandas as pd

# Read the data

df = pd.read_csv('Churn_Modelling.csv')

# Print the first 5 rows of the dataframe.

df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


#### 2. Data Cleaning

In [2]:
# remove Surname & RowNumber

df.drop(['RowNumber','Surname'],axis=1,inplace=True)

#### 3. Label Encoding Gender and Geographical Columns

In [3]:
# use LabelEncoder to convert categorical features to numerical features

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

df['Geography'] = le.fit_transform(df['Geography'])

df['Gender'] = le.fit_transform(df['Gender'])

df.head()

Unnamed: 0,CustomerId,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,2,0,43,2,125510.82,1,1,1,79084.1,0


#### 4. Features Scaling 

In [4]:
# feature scaling of all the columns except the target column

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

df.iloc[:,0:11] = sc.fit_transform(df.iloc[:,0:11])

df.head()

Unnamed: 0,CustomerId,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,-0.783213,-0.326221,-0.901886,-1.095988,0.293517,-1.04176,-1.225848,-0.911583,0.646092,0.970243,0.021886,1
1,-0.606534,-0.440036,1.515067,-1.095988,0.198164,-1.387538,0.11735,-0.911583,-1.547768,0.970243,0.216534,0
2,-0.995885,-1.536794,-0.901886,-1.095988,0.293517,1.032908,1.333053,2.527057,0.646092,-1.03067,0.240687,1
3,0.144767,0.501521,-0.901886,-1.095988,0.007457,-1.387538,-1.225848,0.807737,-1.547768,-1.03067,-0.108918,0
4,0.652659,2.063884,1.515067,-1.095988,0.388871,-1.04176,0.785728,-0.911583,0.646092,0.970243,-0.365276,0


#### 5. Oversampling The Data

In [5]:
# oversampling the minority class

from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state=42)

X = df.drop('Exited',axis=1)

y = df['Exited']

X_res,y_res = sm.fit_resample(X,y)

#### 6. Train Test Split Data

In [6]:
# split the data into train and test

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=0)

print(X_train.shape)
print(X_test.shape)

(12740, 11)
(3186, 11)


#### 7. Model Builiding and Training & Testing

In [7]:
# accuracy score and precision score
from sklearn.metrics import accuracy_score,precision_score

1.Logistic Regression

In [8]:
# train the model Logistic Regression

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()

lr.fit(X_train,y_train)

# predict the test set results
y_pred = lr.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.7099811676082862
0.7115869017632241


2. KNN - K-Nearest Neighbours

In [9]:
# using KNN

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train,y_train)

# predict the test set results
y_pred = knn.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.844632768361582
0.7862714508580343


3. SVM - Support Vector Machine

In [10]:
# using SVM

from sklearn.svm import SVC

svc = SVC()

svc.fit(X_train,y_train)

# predict the test set results
y_pred = svc.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.817639673571877
0.8100183262064753


4. Decision Tree Classifier

In [11]:
# using Decision Tree

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()

dt.fit(X_train,y_train)

# predict the test set results
y_pred = dt.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.8427495291902072
0.8296207104154124


5. Random Forest Classifier

In [12]:
# using Random Forest

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)

rf.fit(X_train,y_train)


# predict the test set results
y_pred = rf.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.7947269303201506
0.799618320610687


6. Batch Gradient Classifier

In [13]:
# using Gradient Boosting Classifier

from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(learning_rate=0.4, n_estimators=130, max_depth=3)

gb.fit(X_train,y_train)

# predict the test set results
y_pred = gb.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.8954802259887006
0.9201596806387226


7. Naive Bayes - Gaussian Naive Bayes

In [14]:
# naive bayes

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()

nb.fit(X_train,y_train)

# predict the test set results
y_pred = nb.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.748587570621469
0.7520608750792644


8. XGBoost Classifier

In [15]:
# using XGBoost

from xgboost import XGBClassifier

xgb = XGBClassifier(learning_rate=0.4, n_estimators=140, max_depth=3)

xgb.fit(X_train,y_train)

# predict the test set results
y_pred = xgb.predict(X_test)

# accuracy score
print(accuracy_score(y_test,y_pred))

# precision score
print(precision_score(y_test,y_pred))

0.8910860012554928
0.9127561136814276
