In [2]:
pip install tensorflow

Collecting tensorflowNote: you may need to restart the kernel to use updated packages.

  Downloading tensorflow-2.14.0-cp310-cp310-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.14.0
  Downloading tensorflow_intel-2.14.0-cp310-cp310-win_amd64.whl (284.1 MB)
     -------------------------------------- 284.1/284.1 MB 6.2 MB/s eta 0:00:00
Collecting keras<2.15,>=2.14.0
  Downloading keras-2.14.0-py3-none-any.whl (1.7 MB)
     ---------------------------------------- 1.7/1.7 MB 12.1 MB/s eta 0:00:00
Collecting tensorboard<2.15,>=2.14
  Downloading tensorboard-2.14.1-py3-none-any.whl (5.5 MB)
     ---------------------------------------- 5.5/5.5 MB 12.1 MB/s eta 0:00:00
Collecting google-pasta>=0.1.1
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
     ---------------------------------------- 57.5/57.5 KB 3.1 MB/s eta 0:00:00
Collecting wrapt<1.15,>=1.11.0
  Downloading wrapt-1.14.1-cp310-cp310-win_amd64.whl (35 kB)
Collecting h5py>=2.9.0
  Downloading h5py-3.10.0-cp310-cp

You should consider upgrading via the 'C:\Python310\python.exe -m pip install --upgrade pip' command.


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
from tensorflow import keras

# Loading dataset
df = pd.read_excel('customer_churn_large_dataset.xlsx')
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Location'] = label_encoder.fit_transform(df['Location'])
X = df[['Age', 'Gender', 'Location', 'Subscription_Length_Months', 'Monthly_Bill', 'Total_Usage_GB']]
y = df['Churn']
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model 1: Logistic Regression
model_1 = LogisticRegression()
logistic_scores = cross_val_score(model_1, X, y, cv=StratifiedKFold(n_splits=5, shuffle=True), scoring='accuracy')
logisticaccuracy = logistic_scores.mean()

# Model 2: Random Forest
model_2 = RandomForestClassifier()
rf_scores = cross_val_score(model_2, X, y, cv=StratifiedKFold(n_splits=5, shuffle=True), scoring='accuracy')
rfaccuracy = rf_scores.mean()

# Model 3: Neural Network
model_3 = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])
model_3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Use StratifiedKFold for cross-validation
nn_scores = []
cv = StratifiedKFold(n_splits=5, shuffle=True)
for train_index, val_index in cv.split(X, y):
    X_train_cv, X_val = X[train_index], X[val_index]
    y_train_cv, y_val = y.iloc[train_index], y.iloc[val_index]

    model_3.fit(X_train_cv, y_train_cv, epochs=10, batch_size=32, verbose=0)
    minn, accuracy = model_3.evaluate(X_val, y_val)
    nn_scores.append(accuracy)

nnaccuracy = np.mean(nn_scores)

print(f'Logistic Regression Accuracy: {logisticaccuracy}')
print(f'Random Forest Accuracy: {rfaccuracy}')
print(f'Neural Network Accuracy: {nnaccuracy}')

best_model = 'Logistic Regression' if logisticaccuracy >= rfaccuracy and logisticaccuracy >= nnaccuracy else 'Random Forest' if rfaccuracy >= nnaccuracy else 'Neural Network'
print(f'Best Model: {best_model}')


Logistic Regression Accuracy: 0.50109
Random Forest Accuracy: 0.49688
Neural Network Accuracy: 0.5064800024032593
Best Model: Neural Network


In [11]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
y_predict = model_3.predict(X_test)
y_binary = (y_predict > 0.5).astype(int)

# Evaluate the model on the test set
accuracy = accuracy_score(y_test, y_binary)
confmatrix = confusion_matrix(y_test, y_binary)
classificationrep = classification_report(y_test, y_binary)

print(f'Accuracy on the test set: {accuracy}')
print(f'Confusion Matrix:\n{confmatrix}')
print(f'Classification Report:\n{classificationrep}')

Accuracy on the test set: 0.5196
Confusion Matrix:
[[5199 4880]
 [4728 5193]]
Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.52      0.52     10079
           1       0.52      0.52      0.52      9921

    accuracy                           0.52     20000
   macro avg       0.52      0.52      0.52     20000
weighted avg       0.52      0.52      0.52     20000

