In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("OneDrive/Documents/Machine Learning/Churn_Modelling.csv")

In [3]:
data['Exited'].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [4]:
data.columns



Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [5]:
X = data[['CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary']]
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [6]:
X['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [7]:
geo = {'France': 1 , 'Spain' : 2 , 'Germany' : 3}
X["Geography"] = X["Geography"].map(geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["Geography"] = X["Geography"].map(geo)


In [8]:
X['Geography'].unique()

array([1, 2, 3], dtype=int64)

In [9]:
gender = {"Female": 0 , "Male": 1}
X["Gender"] = X["Gender"].map(gender)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["Gender"] = X["Gender"].map(gender)


In [10]:
X.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,1,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,1,0,42,8,159660.8,3,1,0,113931.57
3,699,1,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1
5,645,2,1,44,8,113755.78,2,1,0,149756.71
6,822,1,1,50,7,0.0,2,1,1,10062.8
7,376,3,0,29,4,115046.74,4,1,0,119346.88
8,501,1,1,44,4,142051.07,2,0,1,74940.5
9,684,1,1,27,2,134603.88,1,1,1,71725.73


In [11]:
X = X.astype(float)

In [12]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619.0,1.0,0.0,42.0,2.0,0.0,1.0,1.0,1.0,101348.88
1,608.0,2.0,0.0,41.0,1.0,83807.86,1.0,0.0,1.0,112542.58
2,502.0,1.0,0.0,42.0,8.0,159660.8,3.0,1.0,0.0,113931.57
3,699.0,1.0,0.0,39.0,1.0,0.0,2.0,0.0,0.0,93826.63
4,850.0,2.0,0.0,43.0,2.0,125510.82,1.0,1.0,1.0,79084.1


In [13]:
y = data[["Exited"]].astype(float)

In [14]:
y.head()

Unnamed: 0,Exited
0,1.0
1,0.0
2,1.0
3,0.0
4,0.0


In [15]:
X = (X-np.mean(X))/np.std(X)

In [16]:
from sklearn.model_selection import train_test_split

x,x_t,y,y_t = train_test_split(X,y, test_size = 0.2 , random_state = 4)

In [17]:
input_layer_size = len(X.columns)
hidden_layer_size = 4
output_layer_size = 1

In [18]:
def initialize_theta(in_l,out_l,e_init = 0.12):
    theta = np.zeros((out_l,in_l+1))
    theta = np.random.rand(out_l,1+in_l) * e_init * 2 - e_init
    return theta

In [19]:
iTheta1 = initialize_theta(input_layer_size,hidden_layer_size)
iTheta2 = initialize_theta(hidden_layer_size,output_layer_size)
iParameters = np.concatenate([iTheta1.ravel(),iTheta2.ravel()],axis = 0)

In [20]:
y.size

8000

In [21]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [22]:
def sigmoidGrad(x):
    z = sigmoid(x)
    return z*(1-z)

In [23]:
def costFunction(nnParam,in_l,h_l,x,y,num_labels,lambda_ = 0.0):
    
    # parameters
    theta1 = np.reshape(nnParam[0:h_l*(in_l+1)],(h_l,in_l+1))
    theta2 = np.reshape(nnParam[h_l*(in_l+1):],(num_labels,(h_l+1)))
    m = y.size
    
    # variables to return
    J = 0
    theta1_grad = np.zeros(theta1.shape)
    theta2_grad = np.zeros(theta2.shape)
    
    # forward feeding
    a1 = np.c_[np.ones((m,1)),x]
    z2 = a1.dot(theta1.T)
    a2 = sigmoid(z2)
    a2 = np.c_[np.ones((a2.shape[0],1)),a2]
    z3 = a2.dot(theta2.T)
    a3 = sigmoid(z3)
    
    # cost and reg
    cost = (-1/m) * np.sum(y * np.log(a3) + (1-y) * np.log(1-a3)) 
    reg = (lambda_/(2*m)) * (np.sum(theta1**2)+np.sum(theta2**2))
    J = cost + reg
    
    # backpropagation and reg
    del3 = a3 - y
    q = np.asarray(del3.dot(theta2))
    del2 = q[:,1:] * sigmoidGrad(z2)

    
    Delta2 = del3.T.dot(a2)
    Delta1 = del2.T.dot(a1)
    
    theta2_grad = (1/m) * Delta2 
    theta1_grad = (1/m) * Delta1
    
    theta2_grad[1:] += (lambda_/m) * theta2[1:]
    theta1_grad[1:] += (lambda_/m) * theta1[1:]
    
    # rolling grad vector
    grad = np.concatenate([np.asarray(theta1_grad).ravel(),np.asarray(theta2_grad).ravel()])
    
    return J,grad

In [24]:
from scipy import optimize as opt


In [25]:
options= {'maxiter': 1000}
num_labels = 1
lambda_ = 0.01
costFun = lambda p: costFunction(p,input_layer_size,hidden_layer_size,x,y,num_labels,lambda_)
res = opt.minimize(costFun,iParameters,jac=True,method='TNC',options=options)
params = res.x
params

array([ 2.86891687e+00,  5.44582522e-02,  3.75181576e-02,  8.79650286e-02,
       -9.01501034e-01,  2.02076867e-02, -2.89338189e-01, -1.27784650e+00,
        4.98558700e-02,  1.90089648e-01, -1.17258343e-02,  6.14186446e+00,
        2.94406128e-02, -1.63200276e-01, -9.68070281e-02, -5.85691579e-01,
       -6.76043017e-03,  2.48390015e-01,  5.86327476e+00,  2.99359714e-02,
        1.28721417e-01, -2.45232459e-02, -9.00132651e+00, -1.07289748e+00,
       -3.80214081e+00,  1.52510856e+01, -1.07169875e+00,  1.89973350e+00,
       -9.12482437e+00, -5.31116167e+00, -1.27512531e+00,  1.59641395e+00,
        5.50675312e-01, -6.74295847e+00, -1.65375654e-01,  6.20418237e-02,
       -1.93668757e-02,  2.37932775e+00,  1.59533046e-01,  3.84233824e-02,
       -1.48249801e-01,  2.49731448e-02,  8.84682243e-01, -6.32765608e-02,
        1.63551438e+01, -1.04438125e+01, -9.75195185e+00, -1.17198330e+00,
       -9.62266238e+00])

In [26]:
r,_ = costFunction(iParameters,input_layer_size,hidden_layer_size,x,y,num_labels,lambda_ )
r[0]

0.6606477216684498

In [27]:
q,_=costFunction(params,input_layer_size,hidden_layer_size,x,y,num_labels,lambda_ )
q[0]

0.33785133715028093

In [28]:
Theta1 = np.reshape(params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))

In [29]:
def predict(theta1, theta2, X):
    """
    Predict the label of an input given a trained neural network
    Outputs the predicted label of X given the trained weights of a neural
    network(Theta1, Theta2)
    """
    # Useful values
    m = X.shape[0]
    num_labels = Theta2.shape[0]
    
    pred = [0]*m
    
    a1 = np.c_[np.ones((m,1)),X]
    z2 = a1.dot(theta1.T)
    a2 = sigmoid(z2)
    a2 = np.c_[np.ones((a2.shape[0],1)),a2]
    z3 = a2.dot(theta2.T)
    a3 = sigmoid(z3)
    for i in range(len(a3)):
        if a3[i] >= 0.5:
            pred[i] = 1
        else:
            pred[i] = 0
    return pred


In [30]:
pred = np.asarray(predict(Theta1, Theta2, x)).reshape(y.shape)
print('Training Set Accuracy: %f' % (np.mean(pred == y) * 100))

Training Set Accuracy: 86.200000


In [31]:
pred = np.asarray(predict(Theta1, Theta2, x_t)).reshape(y_t.shape)
print(pred.shape)
print('Test Set Accuracy: %f' % (np.mean(pred == y_t) * 100))

(2000, 1)
Test Set Accuracy: 86.300000


In [32]:
import sklearn.metrics as met

In [33]:
pred_train = np.asarray(predict(Theta1, Theta2, x)).reshape(y.shape)
print(met.f1_score(y,pred_train))

0.585274229902329


In [34]:
pred_test = np.asarray(predict(Theta1, Theta2, x_t)).reshape(y_t.shape)
print(met.f1_score(y_t,pred_test))

0.5885885885885886
