In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
# preprocessing from ann ex

df = pd.read_csv("../csv/customer_churn.csv")
df.drop(["customerID"], axis="columns", inplace=True)

df1 = df[df.TotalCharges != ' ']
df1.TotalCharges = pd.to_numeric(df1.TotalCharges)

df1.replace('No phone service', 'No', inplace=True)
df1.replace('No internet service', 'No', inplace=True)

yes_and_no_cols = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                   'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']

for col in yes_and_no_cols:
    df1[col].replace({"Yes" : 1, "No" : 0}, inplace=True)
    df1[col] = df1[col].astype(np.uint8)

df1.replace({"Male" : 1, "Female" : 0}, inplace=True)
df1["gender"] = df1["gender"].astype(np.uint8)

df2 = pd.get_dummies(data=df1, columns=['InternetService', 'Contract', 'PaymentMethod'], drop_first=True)

bool_cols = ['InternetService_No', 'InternetService_Fiber optic', 'Contract_Two year',
             'Contract_One year','PaymentMethod_Mailed check','PaymentMethod_Credit card (automatic)',
             'PaymentMethod_Electronic check']

for col in bool_cols:
    df2[col] = df2[col].astype(np.uint8)

scaler = MinMaxScaler()
cols_to_scale = ['tenure', 'MonthlyCharges', 'TotalCharges']
df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

In [3]:
def ANN(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray, epochs: int = 100) -> np.ndarray:
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(26, input_dim=X_train.shape[1], activation='relu'),
        tf.keras.layers.Dense(13, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid'),
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics='accuracy')

    model.fit(X_train, y_train, epochs=epochs)

    try:
        print(model.evaluate())

    except:
        print('No evaluation')

    y_pred = model.predict(X_test)
    y_pred = np.round(y_pred)

    print('Classification report: \n',classification_report(y_test, y_pred))

    return y_pred

# Unbalaced dataset solutions

### 1. Undersampling

In [4]:
count_class_0, count_class_1 = df1.Churn.value_counts()

df2_zeroes = df2[df2['Churn'] == 0]
df2_ones = df2[df2['Churn'] == 1]

df2_zeroes = df2_zeroes.sample(min(count_class_0, count_class_1))
df2_ones = df2_ones.sample(min(count_class_0, count_class_1))

df2_under = pd.concat([df2_ones, df2_zeroes], axis=0)
print(df2_under.Churn.value_counts())

Churn
1    1869
0    1869
Name: count, dtype: int64


In [5]:
X_train, X_test, y_train, y_test = train_test_split(df2_under.drop('Churn', axis='columns'), df2_under['Churn'], test_size=0.3, stratify=df2_under['Churn'])

In [6]:
y_preds = ANN(X_train, X_test, y_train, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### 2. Oversampling

In [7]:
count_class_0, count_class_1 = df1.Churn.value_counts()

df2_zeroes = df2[df2['Churn'] == 0]
df2_ones = df2[df2['Churn'] == 1]

df2_zeroes = df2_zeroes.sample(max(count_class_0, count_class_1), replace=True)
df2_ones = df2_ones.sample(max(count_class_0, count_class_1), replace=True)

print(df2_zeroes.shape, df2_ones.shape)

df2_over = pd.concat([df2_ones, df2_zeroes], axis=0)
print(df2_over.Churn.value_counts())

(5163, 24) (5163, 24)
Churn
1    5163
0    5163
Name: count, dtype: int64


In [8]:
X_train, X_test, y_train, y_test = train_test_split(df2_over.drop('Churn', axis='columns'), df2_over['Churn'], test_size=0.3)
y_pred = ANN(X_train, X_test, y_train, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### 3. SMOTE

In [9]:
X = df2.drop('Churn', axis='columns')
y = df2['Churn']

smote = SMOTE(sampling_strategy='minority')
(Xs, ys) = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(Xs, ys, test_size=0.3)
y_pred = ANN(X_train, X_test, y_train, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### 4. Use of Ensemble with Undersampling

In [15]:
X = df2.drop('Churn', axis='columns')
y = df2['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

min_class = min(y.value_counts())

div = X_train.shape[0] // min_class
ans = np.array([.0] * y_test.shape[0])

for i in range(div):
    X_train_new = X_train[i * min_class : (i + 1) * min_class]
    y_train_new = y_train[i * min_class : (i + 1) * min_class]
    y_pred = ANN(X_train_new, X_test, y_train_new, y_test)
    ans += y_pred.reshape(y_pred.shape[0])

X_train_new = X_train[X_train.shape[0] - min_class:]
y_train_new = y_train[y_train.shape[0] - min_class:]
y_pred = ANN(X_train_new, X_test, y_train_new, y_test)
ans += y_pred.reshape(y_pred.shape[0])

y_pred_new = np.array(ans > 1, dtype=np.uint8)
print(classification_report(y_test, y_pred_new))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78