# Question 1: 3-Layer MLP with NumPy for Bank Churn Prediction

**University of Kurdistan - Deep Learning Course**

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

np.random.seed(42)
print("Libraries imported successfully!")

## Generate Dataset

In [None]:
# Create synthetic bank churn dataset
np.random.seed(42)
n_samples = 5000

data = {
    'age': np.random.randint(18, 80, n_samples),
    'credit_score': np.random.normal(650, 100, n_samples).astype(int),
    'balance': np.round(np.random.exponential(50000, n_samples), 2),
    'products_number': np.random.randint(1, 5, n_samples),
    'estimated_salary': np.round(np.random.uniform(20000, 150000, n_samples), 2),
    'country': np.random.choice(['Germany', 'France', 'Spain'], n_samples),
    'gender': np.random.choice(['Male', 'Female'], n_samples),
    'tenure': np.random.randint(0, 10, n_samples),
    'has_credit_card': np.random.choice([0, 1], n_samples),
    'is_active_member': np.random.choice([0, 1], n_samples)
}

df = pd.DataFrame(data)

# Create target variable
df['churn'] = (
    (df['balance'] > 80000) | 
    (df['credit_score'] < 550) |
    (df['products_number'] == 1) |
    ((df['age'] > 65) & (df['is_active_member'] == 0))
).astype(int)

print(f"Dataset: {len(df)} rows × {len(df.columns)} columns")
print(f"Churn rate: {df['churn'].mean():.2%}")
df.head()

## Data Preprocessing

In [None]:
# Encode categorical variables
le_country = LabelEncoder()
le_gender = LabelEncoder()
df['country_encoded'] = le_country.fit_transform(df['country'])
df['gender_encoded'] = le_gender.fit_transform(df['gender'])

# Select features
feature_columns = ['age', 'credit_score', 'balance', 'products_number', 
                   'estimated_salary', 'country_encoded', 'gender_encoded', 
                   'tenure', 'has_credit_card', 'is_active_member']

X = df[feature_columns].values
y = df['churn'].values.reshape(-1, 1)

# Split: Train (60%), Validation (20%), Test (20%)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")
print(f"Churn - Train: {y_train.mean():.2%}, Val: {y_val.mean():.2%}, Test: {y_test.mean():.2%}")

## MLP Implementation

In [None]:
class MLP_ThreeLayer:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.lr = learning_rate
        
        # Xavier initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))
        
        # Training history
        self.train_loss_history = []
        self.train_acc_history = []
        self.val_loss_history = []
        self.val_acc_history = []
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
    
    def relu(self, z):
        return np.maximum(0, z)
    
    def relu_derivative(self, z):
        return (z > 0).astype(float)
    
    def forward_propagation(self, X):
        # Hidden layer with ReLU
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        # Output layer with Sigmoid
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.sigmoid(self.Z2)
        return self.A2
    
    def compute_loss(self, y_true, y_pred):
        # Binary cross-entropy
        m = y_true.shape[0]
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss
    
    def backward_propagation(self, X, y):
        m = X.shape[0]
        
        # Output layer gradients
        dZ2 = self.A2 - y
        dW2 = (1/m) * np.dot(self.A1.T, dZ2)
        db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)
        
        # Hidden layer gradients
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = (1/m) * np.dot(X.T, dZ1)
        db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)
        
        # Update weights
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
    
    def evaluate(self, X, y):
        y_pred = self.forward_propagation(X)
        loss = self.compute_loss(y, y_pred)
        predictions = (y_pred > 0.5).astype(int)
        accuracy = np.mean(predictions == y)
        return loss, accuracy
    
    def train(self, X_train, y_train, X_val=None, y_val=None, epochs=1000, verbose=True):
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward_propagation(X_train)
            train_loss = self.compute_loss(y_train, y_pred)
            
            # Backward pass
            self.backward_propagation(X_train, y_train)
            
            # Training metrics
            train_predictions = (y_pred > 0.5).astype(int)
            train_accuracy = np.mean(train_predictions == y_train)
            self.train_loss_history.append(train_loss)
            self.train_acc_history.append(train_accuracy)
            
            # Validation metrics
            if X_val is not None and y_val is not None:
                val_loss, val_accuracy = self.evaluate(X_val, y_val)
                self.val_loss_history.append(val_loss)
                self.val_acc_history.append(val_accuracy)
                
                if verbose and (epoch + 1) % 100 == 0:
                    print(f"Epoch {epoch+1}/{epochs} - "
                          f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f} | "
                          f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
            else:
                if verbose and (epoch + 1) % 100 == 0:
                    print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.4f}, Acc: {train_accuracy:.4f}")
    
    def predict(self, X):
        y_pred = self.forward_propagation(X)
        return (y_pred > 0.5).astype(int)

print("MLP class ready!")

## Train Model

In [None]:
# Initialize MLP
mlp = MLP_ThreeLayer(
    input_size=X_train.shape[1],
    hidden_size=64,
    output_size=1,
    learning_rate=0.01
)

print("Architecture: Input(10) -> Hidden(64, ReLU) -> Output(1, Sigmoid)")
print("Training...\n")

# Train with validation
mlp.train(X_train, y_train, X_val, y_val, epochs=1000, verbose=True)

## Evaluation

In [None]:
# Predictions
y_train_pred = mlp.predict(X_train)
y_val_pred = mlp.predict(X_val)
y_test_pred = mlp.predict(X_test)

# Accuracies
train_acc = np.mean(y_train_pred == y_train)
val_acc = np.mean(y_val_pred == y_val)
test_acc = np.mean(y_test_pred == y_test)

print("="*50)
print("RESULTS")
print("="*50)
print(f"Train Accuracy: {train_acc:.4f} ({train_acc*100:.2f}%)")
print(f"Val Accuracy:   {val_acc:.4f} ({val_acc*100:.2f}%)")
print(f"Test Accuracy:  {test_acc:.4f} ({test_acc*100:.2f}%)")
print("="*50)

# Test set classification report
print("\nTest Set Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=['No Churn', 'Churn']))

## Visualize Training Progress

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Loss curves
axes[0].plot(mlp.train_loss_history, linewidth=2, color='blue', label='Train Loss')
axes[0].plot(mlp.val_loss_history, linewidth=2, color='red', label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training vs Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy curves
axes[1].plot(mlp.train_acc_history, linewidth=2, color='blue', label='Train Accuracy')
axes[1].plot(mlp.val_acc_history, linewidth=2, color='red', label='Val Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Training vs Validation Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Overfitting Analysis

In [None]:
# Calculate gaps
train_loss_final = mlp.train_loss_history[-1]
val_loss_final = mlp.val_loss_history[-1]
train_acc_final = mlp.train_acc_history[-1]
val_acc_final = mlp.val_acc_history[-1]

acc_gap = (train_acc_final - val_acc_final) * 100
loss_gap = val_loss_final - train_loss_final

print("="*70)
print("OVERFITTING ANALYSIS")
print("="*70)
print(f"\nAccuracy Gap (Train - Val): {acc_gap:.2f}%")
print(f"Loss Gap (Val - Train):     {loss_gap:.4f}")

# Diagnosis
if acc_gap < 2 and loss_gap < 0.05:
    status = "✅ NO OVERFITTING"
    note = "Model generalizes well"
elif acc_gap < 5 and loss_gap < 0.1:
    status = "⚠️  SLIGHT OVERFITTING"
    note = "Acceptable performance"
elif acc_gap < 10:
    status = "⚠️  MODERATE OVERFITTING"
    note = "Consider regularization"
else:
    status = "❌ SEVERE OVERFITTING"
    note = "Regularization required"

print(f"\nStatus: {status}")
print(f"Note:   {note}")
print("="*70)

## Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_test_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - Test Set')
plt.show()