In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
data["Exited"].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [None]:
churn = data[data["Exited"] == 0]
not_churn = data[data["Exited"] == 1]

In [None]:
churn = churn.sample(n=2000)

In [None]:
new_data = pd.concat([churn , not_churn], axis = 0)

In [None]:
# X = new_data.drop(columns=['Exited','Gender'])
# y = new_data['Exited']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
data.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [None]:
data.isna().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [None]:
Gender_identify = pd.get_dummies(data['Gender'],drop_first=True)

In [None]:
Gender_identify

Unnamed: 0,Male
0,0
1,0
2,0
3,0
4,0
...,...
9995,1
9996,1
9997,0
9998,1


In [None]:
data.insert(6,'Gender_identify',Gender_identify)

In [None]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Gender_identify,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,0,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,0,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,0,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,0,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,0,43,2,125510.82,1,1,1,79084.1,0


In [None]:
Geography_identify = pd.get_dummies(data['Geography'])

In [None]:
Geography_identify

Unnamed: 0,France,Germany,Spain
0,1,0,0
1,0,0,1
2,1,0,0
3,1,0,0
4,0,0,1
...,...,...,...
9995,1,0,0
9996,1,0,0
9997,1,0,0
9998,0,1,0


In [None]:
data=pd.get_dummies(data,columns=["Geography"])

In [None]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Gender,Gender_identify,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,1,15634602,Hargrave,619,Female,0,42,2,0.0,1,1,1,101348.88,1,1,0,0
1,2,15647311,Hill,608,Female,0,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,3,15619304,Onio,502,Female,0,42,8,159660.8,3,1,0,113931.57,1,1,0,0
3,4,15701354,Boni,699,Female,0,39,1,0.0,2,0,0,93826.63,0,1,0,0
4,5,15737888,Mitchell,850,Female,0,43,2,125510.82,1,1,1,79084.1,0,0,0,1


In [None]:
data.drop(columns=["RowNumber","CustomerId","Surname",],inplace=True)

In [None]:
data.head()

Unnamed: 0,CreditScore,Gender,Gender_identify,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,Female,0,42,2,0.0,1,1,1,101348.88,1,1,0,0
1,608,Female,0,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,502,Female,0,42,8,159660.8,3,1,0,113931.57,1,1,0,0
3,699,Female,0,39,1,0.0,2,0,0,93826.63,0,1,0,0
4,850,Female,0,43,2,125510.82,1,1,1,79084.1,0,0,0,1


In [None]:
X =data.drop(columns=["Gender","Exited"])
y =data["Exited"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(8000, 12) (2000, 12) (8000,) (2000,)


In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
y_pred = model.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.776


In [None]:
from sklearn.naive_bayes import BernoulliNB
bnb = BernoulliNB()

In [None]:
bnb.fit(X_train,y_train)
y_pred = bnb.predict(X_test)


In [None]:
y_pred = bnb.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7675


In [None]:
from sklearn.svm import SVC

In [None]:
clf = SVC(kernel='poly',degree=3)

In [None]:
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)


In [None]:
y_pred = clf.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7745
