# Classification Models

## Importing the libraries

In [19]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [20]:
dataset = pd.read_csv('/content/sample_data/Binary Classification with a Bank Churn Dataset.csv')

In [21]:
dataset = dataset.drop(columns=['id','CustomerId','Surname'])
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [22]:
dataset.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,668,France,Male,33.0,3,0.0,2,1.0,0.0,181449.97,0
1,627,France,Male,33.0,1,0.0,2,1.0,1.0,49503.5,0
2,678,France,Male,40.0,10,0.0,2,1.0,0.0,184866.69,0
3,581,France,Male,34.0,2,148882.54,1,1.0,1.0,84560.88,0
4,716,Spain,Male,33.0,5,0.0,2,1.0,1.0,15068.83,0


In [23]:
dataset.shape

(165034, 11)

In [24]:
dataset.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

##Encoding Categorical Data

In [25]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,2])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [26]:
X

array([[1.0, 0.0, 0.0, ..., 1.0, 0.0, 181449.97],
       [1.0, 0.0, 0.0, ..., 1.0, 1.0, 49503.5],
       [1.0, 0.0, 0.0, ..., 1.0, 0.0, 184866.69],
       ...,
       [1.0, 0.0, 0.0, ..., 1.0, 1.0, 127429.56],
       [0.0, 0.0, 1.0, ..., 0.0, 1.0, 71173.03],
       [1.0, 0.0, 0.0, ..., 1.0, 0.0, 61581.79]], dtype=object)

## Splitting the dataset into the Training set and Test set

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [28]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Training the classification model on the Training set

In [29]:
def evaluate_model(true, predicted):
    acc_score = accuracy_score(true, predicted)
    return acc_score

In [30]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [31]:

models = {
    "Decision Tree": DecisionTreeClassifier(criterion = 'entropy', random_state = 0),
    "KNN": KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2),
    "Kernal SVM": SVC(kernel = 'rbf', random_state = 0),
    "Logistic Regression": LogisticRegression(),
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0),
    "Support Vector Machine": SVC(kernel = 'linear', random_state = 0),



}
model_list = []
acc_list =[]

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluate Train and Test dataset
    model_train_acc = evaluate_model(y_train, y_train_pred)

    model_test_acc = evaluate_model(y_test, y_test_pred)


    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model performance for Training set')
    print("- Accuracy Score: {:.4f}".format(model_train_acc))

    print('----------------------------------')

    print('Model performance for Test set')
    print("- Accuracy Score: {:.4f}".format(model_test_acc))
    acc_list.append(model_test_acc)

    print('='*35)
    print('\n')

Decision Tree
Model performance for Training set
- Accuracy Score: 0.9997
----------------------------------
Model performance for Test set
- Accuracy Score: 0.8021


KNN
Model performance for Training set
- Accuracy Score: 0.8845
----------------------------------
Model performance for Test set
- Accuracy Score: 0.8480


Kernal SVM
Model performance for Training set
- Accuracy Score: 0.8641
----------------------------------
Model performance for Test set
- Accuracy Score: 0.8622


Logistic Regression
Model performance for Training set
- Accuracy Score: 0.8345
----------------------------------
Model performance for Test set
- Accuracy Score: 0.8334


Naive Bayes
Model performance for Training set
- Accuracy Score: 0.8006
----------------------------------
Model performance for Test set
- Accuracy Score: 0.8022


Random Forest
Model performance for Training set
- Accuracy Score: 0.9855
----------------------------------
Model performance for Test set
- Accuracy Score: 0.8523


Support

In [32]:
pd.DataFrame(list(zip(model_list, acc_list)), columns=['Model Name', 'Accuracy_Score']).sort_values(by=["Accuracy_Score"],ascending=False)

Unnamed: 0,Model Name,Accuracy_Score
2,Kernal SVM,0.86215
5,Random Forest,0.852304
1,KNN,0.847972
6,Support Vector Machine,0.833793
3,Logistic Regression,0.833429
4,Naive Bayes,0.802163
0,Decision Tree,0.802133


##Training using Kernal SVM

In [35]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)

## Making the Confusion Matrix

In [37]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[25094   994]
 [ 3556  3363]]


0.8621504529342261