# Table of Contents
1. Data Preparation
2. Model Selection
3. Model Training
4. Preparation for a Single Customer
5. Test Det Prediction
6. Mode Evaluation

In [8]:
# Importing libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_selector as selector
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, confusion_matrix

In [9]:
# Load the dataset
# The dataset contains customer churn information, which is crucial for predicting customer behaviour.
df = pd.read_csv("Telco-ANN.csv")

# 1. Data Preparation

In [10]:
# Encoding 'Churn' as the target variable (what we are trying to predict) and transforming categorical 'Yes'/'No' values into binary (1/0) format.
df['Churn'] = df['Churn'].map({'Yes':1, 'No':0})

In [11]:
# Split data into features (X) and target (y).
#'Churn' indicates if a customer has left the company.
X = df.drop('Churn', axis=1)
y = df['Churn']

In [12]:
# Preprocessing: Encoding categorical variables and scaling numerical features
preprocessing = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), selector(dtype_include=['int64', 'float64'])),
        ('cat', OneHotEncoder(), selector(dtype_include=['object', 'bool']))
    ])                                                    

In [13]:
# Split dataset into 80% training and 20% test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [15]:
# Apply preprocessing transformations to training and test datasets.
X_train = preprocessing.fit_transform(X_train)
X_test = preprocessing.transform(X_test)

# 2. Model Selection

In [17]:
# Define the ANN model with three layers, two relu hidden layers, the first layer with 128 neurons, the second with 64 neurons and one sigmoid output layer for binary classification
model = Sequential([
    Dense(128, activation='relu', input_dim=X_train.shape[1]),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compiling the model prepares it for training by specifying optimizer, loss and metrics to evaluate during training.
# 'adam' is used for efficiency, 'binary_crossentropy' for binary classification problems.
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# 3. Model Training

In [19]:
# Train the ANN model on the training data and validate its performance using the validation set
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=2)

Epoch 1/50
159/159 - 1s - loss: 0.4574 - accuracy: 0.7655 - val_loss: 0.4075 - val_accuracy: 0.8245 - 1s/epoch - 8ms/step
Epoch 2/50
159/159 - 0s - loss: 0.4242 - accuracy: 0.7963 - val_loss: 0.4119 - val_accuracy: 0.8245 - 225ms/epoch - 1ms/step
Epoch 3/50
159/159 - 0s - loss: 0.4187 - accuracy: 0.7959 - val_loss: 0.4061 - val_accuracy: 0.8245 - 230ms/epoch - 1ms/step
Epoch 4/50
159/159 - 0s - loss: 0.4161 - accuracy: 0.7992 - val_loss: 0.4018 - val_accuracy: 0.8227 - 215ms/epoch - 1ms/step
Epoch 5/50
159/159 - 0s - loss: 0.4138 - accuracy: 0.7986 - val_loss: 0.4092 - val_accuracy: 0.8227 - 214ms/epoch - 1ms/step
Epoch 6/50
159/159 - 0s - loss: 0.4139 - accuracy: 0.7986 - val_loss: 0.4089 - val_accuracy: 0.8316 - 214ms/epoch - 1ms/step
Epoch 7/50
159/159 - 0s - loss: 0.4130 - accuracy: 0.8004 - val_loss: 0.4060 - val_accuracy: 0.8227 - 208ms/epoch - 1ms/step
Epoch 8/50
159/159 - 0s - loss: 0.4130 - accuracy: 0.7976 - val_loss: 0.4049 - val_accuracy: 0.8245 - 208ms/epoch - 1ms/step
Epo

# 4. Preparation for a Single Customer

In [20]:
# Prediction for a single customer instance.
# Example customer data should be preprocessed in the same way as training data
example_customer = X_test[0:1]

# Predict the churn for the example customer.  The output is a probability that needs to be classified as churn/no churn.
prediction = model.predict(example_customer)
print(f"Churn prediction (1 for churn, 0 for no churn): {prediction[0][0]}")

Churn prediction (1 for churn, 0 for no churn): 0.02356061525642872


# 5. Test Set Prediction

In [23]:
# Use the trained model to predict the churn of the unseen test data
test_predictions = model.predict(X_test)
# Convert probabilities to binary output using the threshold 0.5
test_predictions = (test_predictions > 0.5).astype(int) 



# 6. Model Evaluation

In [24]:
# Evaluate the model's performance on the test set using metrics like accuracy and confusion matrix

# Calculate accuracy
accuracy = accuracy_score(y_test, test_predictions)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7814052519517388


In [25]:
# Caluclate the confusion matrix

In [26]:
conf_matrix = confusion_matrix(y_test, test_predictions)
print(f"Confusion Matrix:\n{conf_matrix}")

Confusion Matrix:
[[926 115]
 [193 175]]


In [27]:
# Interpretation
print("\nAccuracy gives the proportion of total predicitions (both churn and no churn) that were correct.  The confusion matrix provides a detailed breakdown of the mode's performance, showing the true positives, true negatives, false positives and false negatives.")


Accuracy gives the proportion of total predicitions (both churn and no churn) that were correct.  The confusion matrix provides a detailed breakdown of teh mode's performance, showing the true positives, true negatives, false positives and false negatives.
