# سوال 1: پیاده‌سازی MLP سه لایه با NumPy
## Bank Churn Prediction using 3-Layer MLP (Pure NumPy)

### University of Kurdistan - Department of Computer and IT Engineering

**هدف:** ساخت یک MLP سه لایه از ابتدا با استفاده از NumPy برای پیش‌بینی ریزش مشتری

## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries imported successfully!")

Libraries imported successfully!


## 2. Generate Synthetic Bank Churn Dataset

In [2]:
# Create the synthetic bank churn dataset
np.random.seed(42)
n_samples = 5000

data = {
    'age': np.random.randint(18, 80, n_samples),
    'credit_score': np.random.normal(650, 100, n_samples).astype(int),
    'balance': np.round(np.random.exponential(50000, n_samples), 2),
    'products_number': np.random.randint(1, 5, n_samples),
    'estimated_salary': np.round(np.random.uniform(20000, 150000, n_samples), 2),
    'country': np.random.choice(['Germany', 'France', 'Spain'], n_samples),
    'gender': np.random.choice(['Male', 'Female'], n_samples),
    'tenure': np.random.randint(0, 10, n_samples),
    'has_credit_card': np.random.choice([0, 1], n_samples),
    'is_active_member': np.random.choice([0, 1], n_samples)
}

df = pd.DataFrame(data)

# Create target variable (churn) based on some logic
df['churn'] = (
    (df['balance'] > 80000) | 
    (df['credit_score'] < 550) |
    (df['products_number'] == 1) |
    ((df['age'] > 65) & (df['is_active_member'] == 0))
).astype(int)

print(f"Dataset size: {len(df)} rows × {len(df.columns)} columns")
print(f"Churn rate: {df['churn'].mean():.2%}")
print(f"\nFirst 5 rows:")
df.head()

Dataset size: 5000 rows × 11 columns
Churn rate: 55.44%

First 5 rows:


Unnamed: 0,age,credit_score,balance,products_number,estimated_salary,country,gender,tenure,has_credit_card,is_active_member,churn
0,56,348,47753.51,1,51862.6,Germany,Female,3,1,0,1
1,69,668,88025.62,3,112913.89,Germany,Female,6,0,1,1
2,46,830,8569.11,2,87781.32,Spain,Female,1,1,0,0
3,32,773,45110.98,1,108825.96,France,Female,6,0,0,1
4,60,670,45022.17,2,55027.0,France,Female,4,1,0,0


## 3. Data Preprocessing

In [None]:
# Encode categorical variables
le_country = LabelEncoder()
le_gender = LabelEncoder()

df['country_encoded'] = le_country.fit_transform(df['country'])
df['gender_encoded'] = le_gender.fit_transform(df['gender'])

# Select features for training
feature_columns = ['age', 'credit_score', 'balance', 'products_number', 
                   'estimated_salary', 'country_encoded', 'gender_encoded', 
                   'tenure', 'has_credit_card', 'is_active_member']

X = df[feature_columns].values
y = df['churn'].values.reshape(-1, 1)

# Split data into train, validation, and test sets
# First split: 80% train+val, 20% test
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Second split: 75% train, 25% validation (from the 80%)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

# Normalize features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")
print(f"\nChurn rate in training set: {y_train.mean():.2%}")
print(f"Churn rate in validation set: {y_val.mean():.2%}")
print(f"Churn rate in test set: {y_test.mean():.2%}")

## 4. MLP Implementation with NumPy
### پیاده‌سازی MLP سه لایه: Input → Hidden → Output

In [None]:
class MLP_ThreeLayer:
    """
    Multi-Layer Perceptron with 3 layers:
    - Input Layer
    - Hidden Layer
    - Output Layer
    """
    
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        """
        Initialize the MLP with random weights and biases
        
        Args:
            input_size: Number of input features
            hidden_size: Number of neurons in hidden layer
            output_size: Number of output neurons (1 for binary classification)
            learning_rate: Learning rate for gradient descent
        """
        self.lr = learning_rate
        
        # Initialize weights with Xavier initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))
        
        # For storing training history
        self.train_loss_history = []
        self.train_acc_history = []
        self.val_loss_history = []
        self.val_acc_history = []
        
    def sigmoid(self, z):
        """Sigmoid activation function"""
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
    
    def sigmoid_derivative(self, z):
        """Derivative of sigmoid function"""
        return z * (1 - z)
    
    def relu(self, z):
        """ReLU activation function"""
        return np.maximum(0, z)
    
    def relu_derivative(self, z):
        """Derivative of ReLU function"""
        return (z > 0).astype(float)
    
    def forward_propagation(self, X):
        """
        Forward pass through the network
        
        Args:
            X: Input data (m, input_size)
            
        Returns:
            A2: Output predictions (m, output_size)
        """
        # Hidden layer: ReLU activation
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        
        # Output layer: Sigmoid activation
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.sigmoid(self.Z2)
        
        return self.A2
    
    def compute_loss(self, y_true, y_pred):
        """
        Compute binary cross-entropy loss
        
        Args:
            y_true: True labels (m, 1)
            y_pred: Predicted probabilities (m, 1)
            
        Returns:
            loss: Average loss
        """
        m = y_true.shape[0]
        epsilon = 1e-15  # To avoid log(0)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss
    
    def backward_propagation(self, X, y):
        """
        Backward pass to compute gradients
        
        Args:
            X: Input data (m, input_size)
            y: True labels (m, 1)
        """
        m = X.shape[0]
        
        # Output layer gradients
        dZ2 = self.A2 - y
        dW2 = (1/m) * np.dot(self.A1.T, dZ2)
        db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)
        
        # Hidden layer gradients
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = (1/m) * np.dot(X.T, dZ1)
        db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)
        
        # Update weights and biases
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
    
    def evaluate(self, X, y):
        """
        Evaluate model on given data
        
        Args:
            X: Input data (m, input_size)
            y: True labels (m, 1)
            
        Returns:
            loss, accuracy
        """
        y_pred = self.forward_propagation(X)
        loss = self.compute_loss(y, y_pred)
        predictions = (y_pred > 0.5).astype(int)
        accuracy = np.mean(predictions == y)
        return loss, accuracy
    
    def train(self, X_train, y_train, X_val=None, y_val=None, epochs=1000, verbose=True):
        """
        Train the MLP with optional validation set
        
        Args:
            X_train: Training data (m, input_size)
            y_train: Training labels (m, 1)
            X_val: Validation data (optional)
            y_val: Validation labels (optional)
            epochs: Number of training iterations
            verbose: Print training progress
        """
        for epoch in range(epochs):
            # Forward propagation
            y_pred = self.forward_propagation(X_train)
            
            # Compute training loss
            train_loss = self.compute_loss(y_train, y_pred)
            
            # Backward propagation
            self.backward_propagation(X_train, y_train)
            
            # Compute training accuracy
            train_predictions = (y_pred > 0.5).astype(int)
            train_accuracy = np.mean(train_predictions == y_train)
            
            # Store training history
            self.train_loss_history.append(train_loss)
            self.train_acc_history.append(train_accuracy)
            
            # Evaluate on validation set if provided
            if X_val is not None and y_val is not None:
                val_loss, val_accuracy = self.evaluate(X_val, y_val)
                self.val_loss_history.append(val_loss)
                self.val_acc_history.append(val_accuracy)
                
                # Print progress
                if verbose and (epoch + 1) % 100 == 0:
                    print(f"Epoch {epoch + 1}/{epochs} - "
                          f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f} | "
                          f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
            else:
                # Print progress without validation
                if verbose and (epoch + 1) % 100 == 0:
                    print(f"Epoch {epoch + 1}/{epochs} - Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}")
    
    def predict(self, X):
        """
        Make predictions
        
        Args:
            X: Input data (m, input_size)
            
        Returns:
            predictions: Binary predictions (m, 1)
        """
        y_pred = self.forward_propagation(X)
        return (y_pred > 0.5).astype(int)
    
    def predict_proba(self, X):
        """
        Predict probabilities
        
        Args:
            X: Input data (m, input_size)
            
        Returns:
            probabilities: Predicted probabilities (m, 1)
        """
        return self.forward_propagation(X)

print("MLP class defined successfully!")

## 5. Train the MLP Model

In [None]:
# Initialize MLP
input_size = X_train.shape[1]  # 10 features
hidden_size = 64  # Number of neurons in hidden layer
output_size = 1   # Binary classification
learning_rate = 0.01

mlp = MLP_ThreeLayer(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    learning_rate=learning_rate
)

print(f"MLP Architecture:")
print(f"  Input Layer: {input_size} neurons")
print(f"  Hidden Layer: {hidden_size} neurons (ReLU activation)")
print(f"  Output Layer: {output_size} neuron (Sigmoid activation)")
print(f"  Learning Rate: {learning_rate}")
print(f"\nStarting training with validation...\n")

# Train the model with validation set
mlp.train(X_train, y_train, X_val=X_val, y_val=y_val, epochs=1000, verbose=True)

## 6. Evaluate the Model

In [6]:
# Make predictions
y_train_pred = mlp.predict(X_train)
y_test_pred = mlp.predict(X_test)

# Calculate accuracy
train_accuracy = np.mean(y_train_pred == y_train)
test_accuracy = np.mean(y_test_pred == y_test)

print("="*50)
print("Model Performance:")
print("="*50)
print(f"Training Accuracy: {train_accuracy:.4f} ({train_accuracy*100:.2f}%)")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print("="*50)

# Confusion Matrix for Test Set
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_test_pred)
print("\nConfusion Matrix (Test Set):")
print(cm)
print("\nClassification Report (Test Set):")
print(classification_report(y_test, y_test_pred, target_names=['No Churn', 'Churn']))

Model Performance:
Training Accuracy: 0.8337 (83.38%)
Test Accuracy: 0.8270 (82.70%)

Confusion Matrix (Test Set):
[[333 105]
 [ 68 494]]

Classification Report (Test Set):
              precision    recall  f1-score   support

    No Churn       0.83      0.76      0.79       438
       Churn       0.82      0.88      0.85       562

    accuracy                           0.83      1000
   macro avg       0.83      0.82      0.82      1000
weighted avg       0.83      0.83      0.83      1000



## 7. Visualize Training Progress

In [None]:
# Plot training and validation curves
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Loss curve - Train vs Validation
axes[0].plot(mlp.train_loss_history, linewidth=2, color='blue', label='Training Loss')
if len(mlp.val_loss_history) > 0:
    axes[0].plot(mlp.val_loss_history, linewidth=2, color='red', label='Validation Loss')
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss (Binary Cross-Entropy)', fontsize=12)
axes[0].set_title('Training vs Validation Loss', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Accuracy curve - Train vs Validation
axes[1].plot(mlp.train_acc_history, linewidth=2, color='blue', label='Training Accuracy')
if len(mlp.val_acc_history) > 0:
    axes[1].plot(mlp.val_acc_history, linewidth=2, color='red', label='Validation Accuracy')
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].set_title('Training vs Validation Accuracy', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\n{'='*60}")
print(f"Final Training Loss: {mlp.train_loss_history[-1]:.4f}")
print(f"Final Training Accuracy: {mlp.train_acc_history[-1]:.4f}")
if len(mlp.val_loss_history) > 0:
    print(f"\nFinal Validation Loss: {mlp.val_loss_history[-1]:.4f}")
    print(f"Final Validation Accuracy: {mlp.val_acc_history[-1]:.4f}")
    print(f"\nAccuracy Gap (Train - Val): {(mlp.train_acc_history[-1] - mlp.val_acc_history[-1])*100:.2f}%")
    print(f"Loss Gap (Val - Train): {(mlp.val_loss_history[-1] - mlp.train_loss_history[-1]):.4f}")
print(f"{'='*60}")

## 9. Visualize Confusion Matrix

## 10. Summary & Key Insights

### معماری MLP:
- **Input Layer**: 10 ویژگی ورودی (age, credit_score, balance, etc.)
- **Hidden Layer**: 64 نورون با تابع فعال‌سازی ReLU
- **Output Layer**: 1 نورون با تابع فعال‌سازی Sigmoid (برای طبقه‌بندی دودویی)

### نکات کلیدی پیاده‌سازی:

**1. تقسیم داده:**
- Training: 60% (3000 samples)
- Validation: 20% (1000 samples)  
- Test: 20% (1000 samples)

**2. تکنیک‌های بهینه‌سازی:**
- **Xavier Initialization**: وزن‌ها با Xavier initialized شدند تا مشکل vanishing/exploding gradient کاهش یابد
- **ReLU در Hidden Layer**: سرعت یادگیری را افزایش می‌دهد و مشکل vanishing gradient را حل می‌کند
- **Sigmoid در Output Layer**: برای خروجی احتمالی بین 0 و 1 در classification دودویی
- **Binary Cross-Entropy Loss**: مناسب برای طبقه‌بندی دودویی
- **Gradient Descent**: بهینه‌سازی با استفاده از backward propagation

**3. ارزیابی Overfitting:**
- استفاده از validation set جداگانه برای monitoring overfitting
- مقایسه training و validation metrics در هر epoch
- نمودارهای learning curves برای تشخیص overfitting
- محاسبه accuracy gap و loss gap

**4. مزایای پیاده‌سازی با NumPy:**
- درک عمیق‌تر از نحوه کار neural networks
- کنترل کامل بر forward و backward propagation
- آموزش مفاهیم پایه deep learning بدون استفاده از frameworks

## 8. Overfitting Analysis
### تحلیل Overfitting با استفاده از Learning Curves

In [None]:
# Comprehensive Overfitting Analysis
print("="*70)
print("OVERFITTING ANALYSIS")
print("="*70)

# Calculate metrics
train_acc = mlp.train_acc_history[-1]
val_acc = mlp.val_acc_history[-1]
train_loss = mlp.train_loss_history[-1]
val_loss = mlp.val_loss_history[-1]

acc_gap = (train_acc - val_acc) * 100
loss_gap = val_loss - train_loss

print(f"\n1. ACCURACY COMPARISON:")
print(f"   Training Accuracy:   {train_acc:.4f} ({train_acc*100:.2f}%)")
print(f"   Validation Accuracy: {val_acc:.4f} ({val_acc*100:.2f}%)")
print(f"   Gap:                 {acc_gap:.2f}%")

print(f"\n2. LOSS COMPARISON:")
print(f"   Training Loss:       {train_loss:.4f}")
print(f"   Validation Loss:     {val_loss:.4f}")
print(f"   Gap:                 {loss_gap:.4f}")

print(f"\n3. OVERFITTING DIAGNOSIS:")
if acc_gap < 2 and loss_gap < 0.05:
    diagnosis = "✅ NO OVERFITTING - Model generalizes well!"
    explanation = "The gap between training and validation metrics is minimal."
elif acc_gap < 5 and loss_gap < 0.1:
    diagnosis = "⚠️  SLIGHT OVERFITTING - Model is acceptable"
    explanation = "Small gap exists but model still generalizes reasonably."
elif acc_gap < 10 and loss_gap < 0.2:
    diagnosis = "⚠️  MODERATE OVERFITTING - Consider regularization"
    explanation = "Noticeable gap suggests model is memorizing training data."
else:
    diagnosis = "❌ SEVERE OVERFITTING - Regularization required!"
    explanation = "Large gap indicates poor generalization."

print(f"   Status: {diagnosis}")
print(f"   {explanation}")

print(f"\n4. RECOMMENDATIONS:")
if acc_gap >= 5:
    print("   • Add L2 regularization (weight decay)")
    print("   • Implement dropout")
    print("   • Reduce model complexity (fewer neurons)")
    print("   • Get more training data")
    print("   • Apply early stopping")
else:
    print("   • Current model configuration is good")
    print("   • Can try increasing model capacity for better performance")
    print("   • Consider data augmentation for improved robustness")

print("="*70)