In [1]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import json

In [3]:
# Set random seed for reproducibility
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

# Load dataset
data = pd.read_csv(r"D:\Keras\02.Bank_churn_Modeling_ANN\Churn_Modelling.csv")  

## Step 1: Data Exploration
print("Dataset shape:", data.shape)
print("\nFirst 5 rows:")
print(data.head())
print("\nData types and missing values:")
print(data.info())
print("\nSummary statistics:")
print(data.describe())

# Visualize target variable distribution
plt.figure(figsize=(6,4))
sns.countplot(x='Exited', data=data)
plt.title('Distribution of Churn (Exited)')
# plt.savefig('churn_distribution.png')
plt.close()

Dataset shape: (10000, 14)

First 5 rows:
   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  


In [4]:
## Step 2: Data Preprocessing
# Drop irrelevant columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# One-hot encode categorical features (better than label encoding for non-ordinal categories)
data = pd.get_dummies(data, columns=['Geography', 'Gender'], drop_first=True)

# Separate features (X) and target variable (y)
X = data.drop('Exited', axis=1)
y = data['Exited']

In [5]:
# Save feature names for later use
feature_names = X.columns.tolist()
with open('feature_names.json', 'w') as f:
    json.dump(feature_names, f)

In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler for later use
joblib.dump(scaler, 'scaler.save')

['scaler.save']

In [7]:
## Step 3: Build the ANN Model
model = Sequential()

# Input layer
model.add(Dense(units=64, activation='relu', input_dim=X_train_scaled.shape[1]))
model.add(Dropout(0.3))  # Add dropout to prevent overfitting

# Hidden layers
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(units=16, activation='relu'))
model.add(Dropout(0.1))

# Output layer
model.add(Dense(units=1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Compile the ANN
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Display model summary
print("\nModel Summary:")
model.summary()


Model Summary:


In [11]:
X_train_scaled.shape, y_train.shape

((8000, 11), (8000,))

In [12]:
## Step 4: Train the Model
history = model.fit(
    X_train_scaled, 
    y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7693 - loss: 0.5223 - val_accuracy: 0.8165 - val_loss: 0.4205
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8090 - loss: 0.4411 - val_accuracy: 0.8285 - val_loss: 0.4008
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8239 - loss: 0.4198 - val_accuracy: 0.8405 - val_loss: 0.3793
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8387 - loss: 0.3977 - val_accuracy: 0.8550 - val_loss: 0.3628
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8447 - loss: 0.3858 - val_accuracy: 0.8530 - val_loss: 0.3544
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8521 - loss: 0.3677 - val_accuracy: 0.8560 - val_loss: 0.3516
Epoch 7/100
[1m250/25

In [13]:
# Save the trained model
model.save('customer_churn_model.h5')



In [14]:


## Step 5: Evaluate the Model
# Plot training history
plt.figure(figsize=(12, 5))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.savefig('training_history.png')
plt.close()

In [15]:
# Final evaluation on test data
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"\nTest Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Make predictions
y_pred_prob = model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype("int32")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8775 - loss: 0.3108

Test Accuracy: 0.8635
Test Loss: 0.3329
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [None]:
# 0 → Customer did NOT churn (they stayed with the company)

# 1 → Customer churned (they left or closed their account)

In [17]:
y_pred[0]

array([0], dtype=int32)

In [18]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Not Churn', 'Churn'], 
            yticklabels=['Not Churn', 'Churn'])
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')
plt.close()

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1593
           1       0.76      0.48      0.59       407

    accuracy                           0.86      2000
   macro avg       0.82      0.72      0.75      2000
weighted avg       0.85      0.86      0.85      2000



In [20]:
## Step 7: Prediction on Single Data Point
def predict_single_customer(customer_data):
    """
    Make prediction for a single customer's data
    customer_data should be a dictionary with all required features
    """
    # Load necessary artifacts
    loaded_model = tf.keras.models.load_model('customer_churn_model.h5')
    scaler = joblib.load('scaler.save')
    with open('feature_names.json', 'r') as f:
        feature_names = json.load(f)
    
    # Create DataFrame from input
    input_df = pd.DataFrame([customer_data])
    
    # Ensure all required columns are present
    for col in feature_names:
        if col not in input_df.columns:
            input_df[col] = 0  # Add missing columns with default value
    
    # Reorder columns to match training data
    input_df = input_df[feature_names]
    
    # Scale the data
    input_scaled = scaler.transform(input_df)
    
    # Make prediction
    prediction_prob = loaded_model.predict(input_scaled)[0][0]
    prediction = 1 if prediction_prob > 0.5 else 0
    
    return {
        'probability': float(prediction_prob),
        'prediction': int(prediction),
        'class': 'Churn' if prediction == 1 else 'Not Churn'
    }

# Example usage:
sample_customer = {
    'CreditScore': 600,
    'Age': 40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000,
    'Geography_Germany': 0,
    'Geography_Spain': 1,
    'Gender_Male': 0
}

print("\nSample Prediction:")
print(predict_single_customer(sample_customer))




Sample Prediction:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
{'probability': 0.09911832958459854, 'prediction': 0, 'class': 'Not Churn'}
