In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# ====== 1. Generate Dataset ======
np.random.seed(42)
n_samples = 5000

data = {
    'age': np.random.randint(18, 80, n_samples),
    'credit_score': np.random.normal(650, 100, n_samples).astype(int),
    'balance': np.round(np.random.exponential(50000, n_samples), 2),
    'products_number': np.random.randint(1, 5, n_samples),
    'estimated_salary': np.round(np.random.uniform(20000, 150000, n_samples), 2),
    'country': np.random.choice(['Germany', 'France', 'Spain'], n_samples),
    'gender': np.random.choice(['Male', 'Female'], n_samples),
    'tenure': np.random.randint(0, 10, n_samples),
    'has_credit_card': np.random.choice([0, 1], n_samples),
    'is_active_member': np.random.choice([0, 1], n_samples)
}

df = pd.DataFrame(data)

# Create target variable
df['churn'] = (
    (df['balance'] > 80000) | 
    (df['credit_score'] < 550) |
    (df['products_number'] == 1) |
    ((df['age'] > 65) & (df['is_active_member'] == 0))
).astype(int)

print(f"Dataset: {len(df)} rows × {len(df.columns)} columns")
print(f"Churn rate: {df['churn'].mean():.2%}\n")

# ====== 2. Data Preprocessing ======
# Encode categorical variables
le_country = LabelEncoder()
le_gender = LabelEncoder()
df['country'] = le_country.fit_transform(df['country'])
df['gender'] = le_gender.fit_transform(df['gender'])

# Separate features and target
X = df.drop('churn', axis=1).values
y = df['churn'].values.reshape(-1, 1)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"Train set: {X_train.shape}, Test set: {X_test.shape}\n")

# ====== 3. Build 3-Layer MLP with NumPy ======
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        # Xavier initialization for better convergence
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0/input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0/hidden_size)
        self.b2 = np.zeros((1, output_size))
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
    
    def sigmoid_derivative(self, z):
        return z * (1 - z)
    
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def backward(self, X, y, output, learning_rate):
        m = X.shape[0]
        
        # Calculate gradient for output layer
        dz2 = output - y
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m
        
        # Calculate gradient for hidden layer
        dz1 = np.dot(dz2, self.W2.T) * self.sigmoid_derivative(self.a1)
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m
        
        # Update weights and biases
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
    
    def compute_loss(self, y_true, y_pred):
        loss = -np.mean(y_true * np.log(y_pred + 1e-8) + (1 - y_true) * np.log(1 - y_pred + 1e-8))
        return loss
    
    def train(self, X, y, epochs, learning_rate):
        losses = []
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output, learning_rate)
            loss = self.compute_loss(y, output)
            losses.append(loss)
            
            if (epoch + 1) % 200 == 0:
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")
        
        return losses
    
    def predict(self, X):
        output = self.forward(X)
        return (output > 0.5).astype(int)

# ====== 4. Train Model ======
input_size = X_train.shape[1]
hidden_size = 64
output_size = 1

mlp = MLP(input_size, hidden_size, output_size)
print("Training MLP with improved settings...\n")
losses = mlp.train(X_train, y_train, epochs=1000, learning_rate=0.1)

# ====== 5. Evaluation ======
y_pred_train = mlp.predict(X_train)
y_pred_test = mlp.predict(X_test)

train_accuracy = np.mean(y_pred_train == y_train)
test_accuracy = np.mean(y_pred_test == y_test)

print(f"\n✓ Train Accuracy: {train_accuracy:.4f}")
print(f"✓ Test Accuracy: {test_accuracy:.4f}")