In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [13]:
data=pd.read_csv('Customer Churn.csv')
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [15]:
# Features and Target
X = data.drop(columns=["Exited","Surname","Geography","Gender","RowNumber"])
y = data["Exited"]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [17]:
# Bat Algorithm Parameters
num_bats = 10
max_iter =20
loudness = 0.5
pulse_rate = 0.5
frequency_min, frequency_max = 0, 2

# Fitness Function with Additional Metrics
def fitness_function_with_metrics(solution):
    selected_features = np.where(solution > 0.5)[0]
    if len(selected_features) == 0:
        return 0, 0, 0, 0  # Return 0 for accuracy, precision, recall, and F1 if no features are selected

    # Train model on selected features
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train.iloc[:, selected_features], y_train)
    preds = clf.predict(X_test.iloc[:, selected_features])

    # Calculate metrics
    accuracy = accuracy_score(y_test, preds)
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)

    return accuracy, precision, recall, f1

# Initialize bats
positions = np.random.rand(num_bats, X.shape[1])  # Random positions in [0,1]
velocities = np.random.uniform(-1, 1, (num_bats, X.shape[1]))  # Random velocities
best_global = positions[np.random.randint(0, num_bats)]  # Randomly select a bat
best_global_accuracy, best_global_precision, best_global_recall, best_global_f1 = fitness_function_with_metrics(best_global)



In [19]:
# Bat Algorithm
for t in range(max_iter):
    for i in range(num_bats):
        # Calculate frequency and update velocity and position
        frequency = frequency_min + (frequency_max - frequency_min) * np.random.rand()
        velocities[i] += (positions[i] - best_global) * frequency
        positions[i] = np.clip(positions[i] + velocities[i], 0, 1)  # Ensure positions stay in range

        # Local search
        if np.random.rand() > pulse_rate:
            positions[i] = np.clip(best_global + np.random.normal(0, 0.1, size=X.shape[1]), 0, 1)

        # Evaluate fitness
        accuracy, precision, recall, f1 = fitness_function_with_metrics(positions[i])
        if accuracy > best_global_accuracy and np.random.rand() < loudness:
            best_global = positions[i]
            best_global_accuracy, best_global_precision, best_global_recall, best_global_f1 = accuracy, precision, recall, f1

    # Update loudness and pulse rate
    loudness = max(0.1, loudness * 0.95)
    pulse_rate = min(1.0, pulse_rate * 1.05)

    # Print progress
    print(f"Iteration {t + 1}:")
    print(f"   Best Accuracy = {best_global_accuracy:.4f}")
    print(f"   Best Precision = {best_global_precision:.4f}")
    print(f"   Best Recall = {best_global_recall:.4f}")
    print(f"   Best F1 Score = {best_global_f1:.4f}")
    selected_features = np.where(best_global > 0.5)[0]
    print("   Selected Features:", X.columns[selected_features].tolist())
    print()

# Final Results
print("Final Results:")
print(f"Best Accuracy: {best_global_accuracy:.4f}")
print(f"Best Precision: {best_global_precision:.4f}")
print(f"Best Recall: {best_global_recall:.4f}")
print(f"Best F1 Score: {best_global_f1:.4f}")
print("Selected Features:", X.columns[selected_features].tolist())

Iteration 1:
   Best Accuracy = 0.8150
   Best Precision = 0.5381
   Best Recall = 0.3510
   Best F1 Score = 0.4249
   Selected Features: ['CreditScore', 'Age', 'Balance', 'IsActiveMember']

Iteration 2:
   Best Accuracy = 0.8150
   Best Precision = 0.5381
   Best Recall = 0.3510
   Best F1 Score = 0.4249
   Selected Features: ['CreditScore', 'Age', 'Balance', 'IsActiveMember']

Iteration 3:
   Best Accuracy = 0.8593
   Best Precision = 0.7411
   Best Recall = 0.4264
   Best F1 Score = 0.5413
   Selected Features: ['CustomerId', 'CreditScore', 'Age', 'Balance', 'NumOfProducts', 'IsActiveMember', 'EstimatedSalary']

Iteration 4:
   Best Accuracy = 0.8593
   Best Precision = 0.7411
   Best Recall = 0.4264
   Best F1 Score = 0.5413
   Selected Features: ['CustomerId', 'CreditScore', 'Age', 'NumOfProducts', 'IsActiveMember', 'EstimatedSalary']

Iteration 5:
   Best Accuracy = 0.8593
   Best Precision = 0.7411
   Best Recall = 0.4264
   Best F1 Score = 0.5413
   Selected Features: ['Custome