# Read the dataset CustomerChurn

In [1]:
import pandas as pd
import numpy as np

OriginalCustomerChurn = pd.read_csv("CustomerChurn.csv")
OriginalCustomerChurn.head()

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,KS,128,415,No,Yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,No,Yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,NJ,137,415,No,No,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,Yes,No,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,Yes,No,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


# Import Machine learning library

In [2]:
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import linear_model, datasets
from sklearn.svm import LinearSVC, SVC


# Preprocess Data with encoding

## Function GetEncodeData

In [3]:
import copy

def getEncodeData(DataFrame, Columns):
    labelEncoder = LabelEncoder()
    encodedData = copy.deepcopy(DataFrame)
    for column in Columns:
        encodedData[column] = labelEncoder.fit_transform(DataFrame[column])
    return encodedData

## Preprocess Data

In [4]:
encodedData = getEncodeData(OriginalCustomerChurn, ['State', 'International plan', 'Voice mail plan'])
encodedData.head()

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,16,128,415,0,1,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,35,107,415,0,1,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,31,137,415,0,0,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,35,84,408,1,0,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,36,75,415,1,0,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


# Create train data and test data

## SplitDataFrameToTrainAndTest function

In [5]:
def SplitDataFrameToTrainAndTest(DataFrame, TrainDataRate, TargetAtt):
    training = DataFrame.sample(frac=TrainDataRate, random_state=1)
    testing = DataFrame.loc[~DataFrame.index.isin(training.index)]
    x_train = training.drop(TargetAtt, 1)
    y_train = training[TargetAtt]
    x_test = testing.drop(TargetAtt, 1)
    y_test = testing[TargetAtt]
    return x_train, y_train, x_test, y_test

## Train data and test data

In [6]:
x_train, y_train, x_test, y_test = SplitDataFrameToTrainAndTest(DataFrame=encodedData, TrainDataRate=0.6, TargetAtt='Churn')

# LogisticRegression Learning

## Declaration

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

def LogisticRegressionLearning(DataTrain, TargetTrain):
    return LogisticRegression().fit(DataTrain, TargetTrain)

def LogisticRegressionTesting(LRModel, DataTest, TargetTest):
    PredictTestLR = LRModel.predict(DataTest)
    AccuracyLR = accuracy_score(TargetTest, PredictTestLR)
    return AccuracyLR, PredictTestLR

## Training and testing

In [8]:
logReg = LogisticRegressionLearning(x_train, y_train)
accuracy, predictTest = LogisticRegressionTesting(logReg, x_test, y_test)
print('Accuracy: ', accuracy)
print('Predict: ', predictTest)

Accuracy:  0.8627156789197299
Predict:  [False False False ... False False False]




# SVM Learning

## Declaration

In [9]:
def SVMLearning(DataTrain, TargetTrain, ClassifierType):
    if (ClassifierType == 'linear'):
        print('linear')
        svc = SVC(kernel= 'linear', C= 0.025)
        
    elif (ClassifierType == 'rbf'):
        print('rbf')
        svc = SVC(gamma= 2, C= 1)
        
    elif (ClassifierType == 'poly'):
        print('poly')
        svc = SVC(kernel='poly', degree=8)
        
    else:
        print('default')
        svc = SVC()
        
    svc.fit(DataTrain, TargetTrain.values.ravel())
    return svc

def SVMTesting(SVMModel, DataTest, TargetTest):
    PredictTest = SVMModel.predict(DataTest)
    Accuracy = accuracy_score(TargetTest, PredictTest)
    return Accuracy, PredictTest
        

## Training and testing

### Linear Kernel

In [10]:
svmModel = SVMLearning(x_train, y_train, 'linear')
accuracy, predictTest = SVMTesting(svmModel, x_test, y_test)
print('Accuracy: ', accuracy)
print('Predict: ', predictTest)

linear
Accuracy:  0.8574643660915229
Predict:  [False False False ... False False False]


### Polynomial Kernel

In [11]:
svmModel = SVMLearning(x_train, y_train, 'poly')
accuracy, predictTest = SVMTesting(svmModel, x_test, y_test)
print('Accuracy: ', accuracy)
print('Predict: ', predictTest)

poly




Accuracy:  0.8117029257314329
Predict:  [False  True False ... False False False]


### Radial Basis Function Kernel

In [12]:
svmModel = SVMLearning(x_train, y_train, 'rbf')
accuracy, predictTest = SVMTesting(svmModel, x_test, y_test)
print('Accuracy: ', accuracy)
print('Predict: ', predictTest)

rbf
Accuracy:  0.8574643660915229
Predict:  [False False False ... False False False]
