In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")

In [2]:
customer_dataset=pd.read_csv('3. Customer Churn Prediction.csv')

In [3]:
customer_dataset.shape

(10000, 14)

In [4]:
customer_dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
customer_dataset.value_counts('Geography')

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [6]:
customer_dataset.replace({'Geography':{'France':0, 'Germany':1, 'Spain':2}}, inplace = True)

In [7]:
customer_dataset.replace({'Gender':{'Male':0, 'Female':1}}, inplace = True)

In [8]:
X = customer_dataset.drop(columns = ['CustomerId', 'Surname', 'Exited'], axis=1)
y = customer_dataset['Exited']

In [9]:
print(X)
print(y)

      RowNumber  CreditScore  Geography  Gender  Age  Tenure    Balance  \
0             1          619          0       1   42       2       0.00   
1             2          608          2       1   41       1   83807.86   
2             3          502          0       1   42       8  159660.80   
3             4          699          0       1   39       1       0.00   
4             5          850          2       1   43       2  125510.82   
...         ...          ...        ...     ...  ...     ...        ...   
9995       9996          771          0       0   39       5       0.00   
9996       9997          516          0       0   35      10   57369.61   
9997       9998          709          0       1   36       7       0.00   
9998       9999          772          1       0   42       3   75075.31   
9999      10000          792          0       1   28       4  130142.79   

      NumOfProducts  HasCrCard  IsActiveMember  EstimatedSalary  
0                 1          1   

In [10]:
standardize = StandardScaler()
standardize.fit(X)
standardize_X = standardize.transform(X)

In [11]:
X = standardize_X
y = customer_dataset['Exited']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y, random_state = 42)

In [13]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': SVC(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

In [14]:
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_test = model.predict(X_test)
    
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_f1 = f1_score(y_test, y_pred_test)
    test_recall = recall_score(y_test, y_pred_test)
    test_precision = precision_score(y_test, y_pred_test)

    print(f"{model_name} - Test Accuracy: {test_accuracy:.4f}")
    print(f"{model_name} - Test F1 Score: {test_f1:.4f}")
    print(f"{model_name} - Test Recall: {test_recall:.4f}")
    print(f"{model_name} - Test Precision: {test_precision:.4f}\n")

Logistic Regression - Test Accuracy: 0.8055
Logistic Regression - Test F1 Score: 0.2327
Logistic Regression - Test Recall: 0.1450
Logistic Regression - Test Precision: 0.5900

Support Vector Machine - Test Accuracy: 0.8590
Support Vector Machine - Test F1 Score: 0.5253
Support Vector Machine - Test Recall: 0.3833
Support Vector Machine - Test Precision: 0.8342

Random Forest - Test Accuracy: 0.8575
Random Forest - Test F1 Score: 0.5455
Random Forest - Test Recall: 0.4201
Random Forest - Test Precision: 0.7773

Decision Tree - Test Accuracy: 0.7790
Decision Tree - Test F1 Score: 0.4610
Decision Tree - Test Recall: 0.4644
Decision Tree - Test Precision: 0.4576

K-Nearest Neighbors - Test Accuracy: 0.8285
K-Nearest Neighbors - Test F1 Score: 0.4530
K-Nearest Neighbors - Test Recall: 0.3489
K-Nearest Neighbors - Test Precision: 0.6455

Naive Bayes - Test Accuracy: 0.8300
Naive Bayes - Test F1 Score: 0.3633
Naive Bayes - Test Recall: 0.2383
Naive Bayes - Test Precision: 0.7638



In [15]:
input_data=(132,795,1,1,33,9,130862.43,1,1,1,114935.21)
input_data_as_numpy_array=np.asarray(input_data)
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)
prediction=model.predict(input_data_reshaped)
print(prediction)
if(prediction[0]==1):
    print("Customer is exited")
else:
    print("Customer is not exited")

[0]
Customer is not exited
