<a href="https://colab.research.google.com/github/Achyuta-Harshavardhan/Machine-Learning-Classification/blob/main/Machine_Learning_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

In [2]:
# Read the dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Datasets/Churn_Modelling.csv")

In [3]:
# Display first five records
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# TO check if there are any missing Values in the dataset
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [5]:
# Independent and Dependent Features
x = df.iloc[:,3:13]
y = df.iloc[:,13]

In [6]:
# One Hot Encoding
geography = pd.get_dummies(x['Geography'],drop_first=True)
gender = pd.get_dummies(x['Gender'],drop_first=True)

# Drop the Categorical Features
x = x.drop(['Geography','Gender'],axis=1)
x = pd.concat([x,geography,gender],axis=1)

x.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


In [7]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)

# Logistic Classifier

In [8]:
classifier_logistic_regression  = LogisticRegression()
classifier_logistic_regression.fit(x_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [9]:
pred_logistic = classifier_logistic_regression.predict(x_test)
accuracy_logistic = accuracy_score(y_test,pred_logistic)
print('LogisticRegression Classifier Accuracy: ',accuracy_logistic)

LogisticRegression Classifier Accuracy:  0.784


# KNN Classifier

In [10]:
classifier_knn = KNeighborsClassifier()
classifier_knn.fit(x_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [11]:
pred_knn = classifier_knn.predict(x_test)
accuracy_knn = accuracy_score(y_test,pred_knn)
print('KNeighnours Classifier Accuracy: ',accuracy_knn)

KNeighnours Classifier Accuracy:  0.756


# SVM Classifier

In [12]:
classifier_svm = SVC()
classifier_svm.fit(x_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [13]:
pred_svm = classifier_svm.predict(x_test)
accuracy_svm = accuracy_score(y_test,pred_svm)
print('SVM Classifier Accuracy: ',accuracy_svm)

SVM Classifier Accuracy:  0.793


# Decision Tree Classifier

In [14]:
classifier_decision_tree = DecisionTreeClassifier()
classifier_decision_tree.fit(x_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [15]:
pred_decision_tree = classifier_decision_tree.predict(x_test)
accuracy_decision_tree = accuracy_score(y_test,pred_decision_tree)
print('Decision Tree Classifier Accuracy: ',accuracy_decision_tree)

Decision Tree Classifier Accuracy:  0.7973333333333333


# Naive Bayes Classifier

In [16]:
# Incase of imbalanced Dataset
classifier_naive_bayes = MultinomialNB()
classifier_naive_bayes.fit(x_train,y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [17]:
pred_naive_bayes = classifier_naive_bayes.predict(x_test)
accuracy_naive_bayes = accuracy_score(y_test,pred_naive_bayes)
print('Naive Bayes Classifier Accuracy: ',accuracy_naive_bayes)

Naive Bayes Classifier Accuracy:  0.539


# Random Forest Classifier

In [18]:
classifier_random_forest = RandomForestClassifier()
classifier_random_forest.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [19]:
pred_random_forest = classifier_random_forest.predict(x_test)
accuracy_random_forest = accuracy_score(y_test,pred_random_forest)
print('Random Forest Classifier Accuracy: ',accuracy_random_forest)

Random Forest Classifier Accuracy:  0.867


# ADABoost Classifier

In [20]:
classifier_adaboost = AdaBoostClassifier()
classifier_adaboost.fit(x_train,y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=None)

In [21]:
pred_adaboost = classifier_adaboost.predict(x_test)
accuracy_adaboost = accuracy_score(y_test,pred_adaboost)
print('ADABoost Classifier Accuracy: ',accuracy_adaboost)

ADABoost Classifier Accuracy:  0.8613333333333333


# Gradient Boosting Classifier

In [22]:
classifier_gradientboost = GradientBoostingClassifier()
classifier_gradientboost.fit(x_train,y_train)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [23]:
pred_gradientboost = classifier_gradientboost.predict(x_test)
accuracy_gradientboost = accuracy_score(y_test,pred_gradientboost)
print('GradientBoost Classifier Accuracy: ',accuracy_gradientboost)

GradientBoost Classifier Accuracy:  0.8683333333333333


# XGBoost Classifier

In [24]:
classifier_xgboost = XGBClassifier()
classifier_xgboost.fit(x_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [25]:
pred_xgboost = classifier_xgboost.predict(x_test)
accuracy_xgboost = accuracy_score(y_test,pred_xgboost)
print('XGBoost Classifier Accuracy: ',accuracy_xgboost)

XGBoost Classifier Accuracy:  0.869
