# سوال 1: پیاده‌سازی MLP سه لایه با NumPy
## Bank Churn Prediction using 3-Layer MLP (Pure NumPy)

### University of Kurdistan - Department of Computer and IT Engineering

**هدف:** ساخت یک MLP سه لایه از ابتدا با استفاده از NumPy برای پیش‌بینی ریزش مشتری

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries imported successfully!")

## 2. Generate Synthetic Bank Churn Dataset

In [None]:
# Create the synthetic bank churn dataset
np.random.seed(42)
n_samples = 5000

data = {
    'age': np.random.randint(18, 80, n_samples),
    'credit_score': np.random.normal(650, 100, n_samples).astype(int),
    'balance': np.round(np.random.exponential(50000, n_samples), 2),
    'products_number': np.random.randint(1, 5, n_samples),
    'estimated_salary': np.round(np.random.uniform(20000, 150000, n_samples), 2),
    'country': np.random.choice(['Germany', 'France', 'Spain'], n_samples),
    'gender': np.random.choice(['Male', 'Female'], n_samples),
    'tenure': np.random.randint(0, 10, n_samples),
    'has_credit_card': np.random.choice([0, 1], n_samples),
    'is_active_member': np.random.choice([0, 1], n_samples)
}

df = pd.DataFrame(data)

# Create target variable (churn) based on some logic
df['churn'] = (
    (df['balance'] > 80000) | 
    (df['credit_score'] < 550) |
    (df['products_number'] == 1) |
    ((df['age'] > 65) & (df['is_active_member'] == 0))
).astype(int)

print(f"Dataset size: {len(df)} rows × {len(df.columns)} columns")
print(f"Churn rate: {df['churn'].mean():.2%}")
print(f"\nFirst 5 rows:")
df.head()

## 3. Data Preprocessing
### مدیریت متغیرهای دسته‌بندی و نرمال‌سازی

In [None]:
# Encode categorical variables
le_country = LabelEncoder()
le_gender = LabelEncoder()

df['country_encoded'] = le_country.fit_transform(df['country'])
df['gender_encoded'] = le_gender.fit_transform(df['gender'])

# Select features for training
feature_columns = ['age', 'credit_score', 'balance', 'products_number', 
                   'estimated_salary', 'country_encoded', 'gender_encoded', 
                   'tenure', 'has_credit_card', 'is_active_member']

X = df[feature_columns].values
y = df['churn'].values.reshape(-1, 1)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"\nChurn rate in training set: {y_train.mean():.2%}")
print(f"Churn rate in test set: {y_test.mean():.2%}")

## 4. MLP Implementation with NumPy
### پیاده‌سازی MLP سه لایه: Input → Hidden → Output

In [None]:
class MLP_ThreeLayer:
    """
    Multi-Layer Perceptron with 3 layers:
    - Input Layer
    - Hidden Layer
    - Output Layer
    """
    
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        """
        Initialize the MLP with random weights and biases
        
        Args:
            input_size: Number of input features
            hidden_size: Number of neurons in hidden layer
            output_size: Number of output neurons (1 for binary classification)
            learning_rate: Learning rate for gradient descent
        """
        self.lr = learning_rate
        
        # Initialize weights with Xavier initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))
        
        # For storing training history
        self.train_loss_history = []
        self.train_acc_history = []
        
    def sigmoid(self, z):
        """Sigmoid activation function"""
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
    
    def sigmoid_derivative(self, z):
        """Derivative of sigmoid function"""
        return z * (1 - z)
    
    def relu(self, z):
        """ReLU activation function"""
        return np.maximum(0, z)
    
    def relu_derivative(self, z):
        """Derivative of ReLU function"""
        return (z > 0).astype(float)
    
    def forward_propagation(self, X):
        """
        Forward pass through the network
        
        Args:
            X: Input data (m, input_size)
            
        Returns:
            A2: Output predictions (m, output_size)
        """
        # Hidden layer: ReLU activation
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        
        # Output layer: Sigmoid activation
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.sigmoid(self.Z2)
        
        return self.A2
    
    def compute_loss(self, y_true, y_pred):
        """
        Compute binary cross-entropy loss
        
        Args:
            y_true: True labels (m, 1)
            y_pred: Predicted probabilities (m, 1)
            
        Returns:
            loss: Average loss
        """
        m = y_true.shape[0]
        epsilon = 1e-15  # To avoid log(0)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss
    
    def backward_propagation(self, X, y):
        """
        Backward pass to compute gradients
        
        Args:
            X: Input data (m, input_size)
            y: True labels (m, 1)
        """
        m = X.shape[0]
        
        # Output layer gradients
        dZ2 = self.A2 - y
        dW2 = (1/m) * np.dot(self.A1.T, dZ2)
        db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)
        
        # Hidden layer gradients
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = (1/m) * np.dot(X.T, dZ1)
        db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)
        
        # Update weights and biases
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
    
    def train(self, X, y, epochs=1000, verbose=True):
        """
        Train the MLP
        
        Args:
            X: Training data (m, input_size)
            y: Training labels (m, 1)
            epochs: Number of training iterations
            verbose: Print training progress
        """
        for epoch in range(epochs):
            # Forward propagation
            y_pred = self.forward_propagation(X)
            
            # Compute loss
            loss = self.compute_loss(y, y_pred)
            
            # Backward propagation
            self.backward_propagation(X, y)
            
            # Compute accuracy
            predictions = (y_pred > 0.5).astype(int)
            accuracy = np.mean(predictions == y)
            
            # Store history
            self.train_loss_history.append(loss)
            self.train_acc_history.append(accuracy)
            
            # Print progress
            if verbose and (epoch + 1) % 100 == 0:
                print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")
    
    def predict(self, X):
        """
        Make predictions
        
        Args:
            X: Input data (m, input_size)
            
        Returns:
            predictions: Binary predictions (m, 1)
        """
        y_pred = self.forward_propagation(X)
        return (y_pred > 0.5).astype(int)
    
    def predict_proba(self, X):
        """
        Predict probabilities
        
        Args:
            X: Input data (m, input_size)
            
        Returns:
            probabilities: Predicted probabilities (m, 1)
        """
        return self.forward_propagation(X)

print("MLP class defined successfully!")

## 5. Train the MLP Model

In [None]:
# Initialize MLP
input_size = X_train.shape[1]  # 10 features
hidden_size = 64  # Number of neurons in hidden layer
output_size = 1   # Binary classification
learning_rate = 0.01

mlp = MLP_ThreeLayer(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=output_size,
    learning_rate=learning_rate
)

print(f"MLP Architecture:")
print(f"  Input Layer: {input_size} neurons")
print(f"  Hidden Layer: {hidden_size} neurons (ReLU activation)")
print(f"  Output Layer: {output_size} neuron (Sigmoid activation)")
print(f"  Learning Rate: {learning_rate}")
print(f"\nStarting training...\n")

# Train the model
mlp.train(X_train, y_train, epochs=1000, verbose=True)

## 6. Evaluate the Model

In [None]:
# Make predictions
y_train_pred = mlp.predict(X_train)
y_test_pred = mlp.predict(X_test)

# Calculate accuracy
train_accuracy = np.mean(y_train_pred == y_train)
test_accuracy = np.mean(y_test_pred == y_test)

print("="*50)
print("Model Performance:")
print("="*50)
print(f"Training Accuracy: {train_accuracy:.4f} ({train_accuracy*100:.2f}%)")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print("="*50)

# Confusion Matrix for Test Set
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_test_pred)
print("\nConfusion Matrix (Test Set):")
print(cm)
print("\nClassification Report (Test Set):")
print(classification_report(y_test, y_test_pred, target_names=['No Churn', 'Churn']))

## 7. Visualize Training Progress

In [None]:
# Plot training loss and accuracy
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss curve
axes[0].plot(mlp.train_loss_history, linewidth=2, color='red')
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss (Binary Cross-Entropy)', fontsize=12)
axes[0].set_title('Training Loss Over Epochs', fontsize=14, fontweight='bold')
axes[0].grid(True, alpha=0.3)

# Accuracy curve
axes[1].plot(mlp.train_acc_history, linewidth=2, color='green')
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].set_title('Training Accuracy Over Epochs', fontsize=14, fontweight='bold')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nFinal Training Loss: {mlp.train_loss_history[-1]:.4f}")
print(f"Final Training Accuracy: {mlp.train_acc_history[-1]:.4f}")

## 8. Visualize Confusion Matrix

In [None]:
# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'],
            cbar_kws={'label': 'Count'})
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 9. Summary & Insights

### معماری MLP:
- **Input Layer**: 10 ویژگی ورودی
- **Hidden Layer**: 64 نورون با تابع فعال‌سازی ReLU
- **Output Layer**: 1 نورون با تابع فعال‌سازی Sigmoid (برای طبقه‌بندی دودویی)

### نکات کلیدی پیاده‌سازی:
1. **Xavier Initialization**: وزن‌ها با Xavier initialized شدند تا مشکل vanishing/exploding gradient کاهش یابد
2. **ReLU در Hidden Layer**: سرعت یادگیری را افزایش می‌دهد
3. **Sigmoid در Output Layer**: برای خروجی احتمالی بین 0 و 1
4. **Binary Cross-Entropy Loss**: مناسب برای طبقه‌بندی دودویی
5. **Gradient Descent**: بهینه‌سازی با استفاده از backward propagation