In [2]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
import pickle
from preprocessing import country_mapping, gender_mapping, scaling

In [3]:
data = pd.read_csv(r'../Skybug-Customer-Churn-Prediction/data/Churn_Modelling.csv')

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data = country_mapping(data)
data = gender_mapping(data)
max_vals, min_vals, features, data = scaling(data, ['CreditScore', 'Age', 'Balance', 'EstimatedSalary'], MinMaxScaler())

data

Yes


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,0.538,0,1,0.324324,2,0.000000,1,1,1,0.506735,1
1,0.516,1,1,0.310811,1,0.334031,1,0,1,0.562709,0
2,0.304,0,1,0.324324,8,0.636357,3,1,0,0.569654,1
3,0.698,0,1,0.283784,1,0.000000,2,0,0,0.469120,0
4,1.000,1,1,0.337838,2,0.500246,1,1,1,0.395400,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,0.842,0,0,0.283784,5,0.000000,2,1,0,0.481341,0
9996,0.332,0,0,0.229730,10,0.228657,1,1,1,0.508490,0
9997,0.718,0,1,0.243243,7,0.000000,1,0,1,0.210390,1
9998,0.844,2,0,0.324324,3,0.299226,2,1,0,0.464429,1


In [3]:
x_train = data.iloc[:, :-1]
y_train = data.iloc[:, -1]

print(x_train.shape)

(10000, 10)


## Multinomial NB

In [4]:
model = MultinomialNB()
model.fit(x_train, y_train)

print(model.score(x_train, y_train))

0.7963


In [5]:
pickle.dump(model, open('models/NB.pkl', 'wb'))

## Logistic Regression

In [6]:
model = LogisticRegression(max_iter=1000)
model.fit(x_train, y_train)

print(model.score(x_train, y_train))

0.8128


In [7]:
pickle.dump(model, open('models/LR.pkl', 'wb'))

## SVM

### Linear

In [8]:
model = SVC(kernel='linear')
model.fit(x_train, y_train)

print(model.score(x_train, y_train))

0.7963


In [9]:
pickle.dump(model, open('models/SVM_lin.pkl', 'wb'))

### Sigmoid

In [10]:
model = SVC(kernel='sigmoid')
model.fit(x_train, y_train)

print(model.score(x_train, y_train))

0.6865


In [11]:
pickle.dump(model, open('models/SVM_sig.pkl', 'wb'))

## Decision Tree

In [12]:
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

print(dtc.score(x_train, y_train))

1.0


In [13]:
pickle.dump(dtc, open('models/DTC.pkl', 'wb'))

## XGBoost

In [14]:
xgb = xgb.XGBClassifier()
xgb.fit(x_train, y_train)

print(xgb.score(x_train, y_train))


0.9473


In [15]:
pickle.dump(xgb, open('models/XGB.pkl', 'wb'))