In [60]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

In [23]:
churn_data = pd.read_csv('test.csv')
# Check whether there is nan value

nan_counts = churn_data.isna().sum()
print(nan_counts)

# nan_counts = data_ori.isna().sum()
# print(nan_counts)

id                 0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
dtype: int64


In [24]:
# Ensure Surname column is string type for processing
churn_data['Surname'] = churn_data['Surname'].astype(str)

# Prepare unique surnames for Word2Vec training
sentences = churn_data['Surname'].unique().tolist()
sentences = [s.split() for s in sentences]

# Train Word2Vec model
model = Word2Vec(sentences, vector_size=50, window=3, min_count=1)

# Initialize a list to hold the vectors. This handles missing surnames in the Word2Vec model.
vectors = []
for surname in churn_data['Surname']:
    if surname in model.wv:
        vectors.append(model.wv[surname])
    else:
        # If the surname isn't in the model, use a zero vector instead.
        vectors.append([0]*50)

# Convert the vectors to a DataFrame
surname_vectors_df = pd.DataFrame(vectors)

# Apply PCA
pca = PCA(n_components=10)
surname_pca = pca.fit_transform(surname_vectors_df)

In [25]:
surname_pca

array([[-0.00362187,  0.01969531,  0.00653744, ...,  0.01932003,
        -0.03154706, -0.01255974],
       [-0.00970108, -0.0122481 , -0.01218967, ...,  0.01234379,
        -0.00313088, -0.00414226],
       [-0.00038546, -0.01553748, -0.01086019, ...,  0.01469728,
        -0.01134762,  0.03335265],
       ...,
       [-0.00709857, -0.01444706,  0.04314492, ...,  0.01822482,
         0.0150478 , -0.00968025],
       [ 0.00105936, -0.00692069, -0.00215593, ..., -0.00199064,
        -0.01853939,  0.0274087 ],
       [ 0.02996059, -0.00678158, -0.02456724, ...,  0.00734631,
         0.03535441, -0.00112485]])

In [26]:
churn_data_final = pd.DataFrame()
churn_data_final['Mean_Age_NumOfProducts'] = (churn_data['Age'] + churn_data['NumOfProducts']) / 2
churn_data_final['NumOfProducts_Age_difference'] = churn_data['NumOfProducts'] - churn_data['Age']
churn_data_final['Balance_NumOfProducts_difference'] = churn_data['Balance'] - churn_data['NumOfProducts']
churn_data_final['Mean_NumOfProducts_Balance'] = (churn_data['NumOfProducts'] + churn_data['Balance']) / 2
churn_data_final['NumOfProducts'] = churn_data['NumOfProducts']
churn_data_final['CreditScore'] = churn_data['CreditScore']
churn_data_final['Balance'] = churn_data['Balance']
churn_data_final['EstimatedSalary'] = churn_data['EstimatedSalary']
churn_data_final['Age'] = churn_data['Age']
churn_data_final['pca_0'] = surname_pca[:,0]

In [27]:
test = churn_data_final.values

In [28]:
scaler = StandardScaler()
test = scaler.fit_transform(test)

In [29]:
test

array([[-1.66388764,  1.74225211, -0.88127788, ...,  0.96787351,
        -1.70650447, -0.22542774],
       [ 0.83043716, -0.94349269, -0.88126195, ..., -0.79093861,
         0.88899019, -0.60380156],
       [-0.41672524,  0.51128574, -0.88127788, ...,  0.52841334,
        -0.46518094, -0.02399133],
       ...,
       [-0.75686044,  0.84700384, -0.88127788, ..., -1.90998095,
        -0.80372372, -0.44181964],
       [-0.75686044,  0.62319178, -0.88126195, ...,  0.92490799,
        -0.69087612,  0.06593516],
       [-0.18996844,  0.06366161,  0.51785544, ..., -1.75238983,
        -0.12663815,  1.86476727]])

In [53]:
class SimpleBinaryClassifier(nn.Module):
    def __init__(self, input_dim):
        super(SimpleBinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 2*input_dim)  # first hidden layer
        self.fc2 = nn.Linear(2*input_dim, input_dim)
        self.fc3 = nn.Linear(input_dim, 1)          # output layer

    def forward(self, x):
        x = (self.fc1(x))
        x = (self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

def neural_network_method(X_train, X_test, y_train, y_test):
    X_train = torch.tensor(np.array(X_train), dtype=torch.float32)
    y_train = torch.tensor(np.array(y_train), dtype=torch.float32).view(-1, 1)
    X_test = torch.tensor(np.array(X_test), dtype=torch.float32)
    y_test = torch.tensor(np.array(y_test), dtype=torch.float32).view(-1, 1)
    
    model = SimpleBinaryClassifier(X_train.shape[1])
    
    # loss function
    criterion = nn.BCELoss()

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # fit
    model.train()
    for epoch in range(100): 
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
    
    # evaluate
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        predictions = (outputs > 0.5).float()
        
        accuracy = (predictions.eq(y_test).sum().item()) / y_test.size(0)
        # Calculate the accuracy when Exited = 0
        mask_zero = (y_test == 0)
        correct_zero = (predictions[mask_zero] == y_test[mask_zero]).float().sum().item()
        accuracy_zero = correct_zero / mask_zero.sum().item()
        print('Accuracy when Exited = 0: ', accuracy_zero)

        # Calculate the accuracy when Exited = 1
        mask_one = (y_test == 1)
        correct_one = (predictions[mask_one] == y_test[mask_one]).float().sum().item()
        accuracy_one = correct_one / mask_one.sum().item()
        print('Accuracy when Exited = 1: ', accuracy_one)
        
        loss = criterion(outputs, y_test)
    
    print(f"Test Loss: {loss.item()}, Test Accuracy: {accuracy}")

    predictions = predictions.cpu().numpy()
    y_test = y_test.cpu().numpy()

    model_result = pd.DataFrame({'test_true': y_test.flatten(), 'test_predict': predictions.flatten()})

    torch.save(model.state_dict(), 'neural_network_model.pth')

    return model_result, accuracy_zero, accuracy_one

In [62]:
model_nn = torch.load('neural_network_model.pth')
model_nn.eval()

SimpleBinaryClassifier(
  (fc1): Linear(in_features=10, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
)

In [74]:
pred_prob = model_nn(torch.tensor(np.array(test), dtype=torch.float32))
pred_prob = pred_prob.detach().numpy().flatten()

df = pd.DataFrame({
    'id': churn_data['id'].values.flatten(),
    'CustomerId': churn_data['CustomerId'].values.flatten(),
    'PredProb': pred_prob
})
df['PredStatus'] = np.where(df['PredProb'] < 0.5, 'stay', 'exit')

In [75]:
df

Unnamed: 0,id,CustomerId,PredProb,PredStatus
0,165034,15773898,0.149467,stay
1,165035,15782418,0.160420,stay
2,165036,15807120,0.137349,stay
3,165037,15808905,0.176211,stay
4,165038,15607314,0.227854,stay
...,...,...,...,...
110018,275052,15662091,0.258538,stay
110019,275053,15774133,0.264919,stay
110020,275054,15728456,0.115849,stay
110021,275055,15687541,0.178209,stay
