# MHA Toolbox: Metaheuristic Algorithm Toolbox Tutorial

This tutorial demonstrates how to use the MHA Toolbox, a flexible and user-friendly library for metaheuristic optimization algorithms.

The toolbox features:
- Automatic parameter handling (missing parameters are automatically calculated)
- Unified interface across all algorithms
- Support for both direct optimization and feature selection
- Comprehensive results objects with built-in analysis tools

## 1. Import Required Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer, make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Import MHA toolbox
from mha_toolbox.toolbox import get_optimizer, list_algorithms
import mha_toolbox.benchmarks as benchmarks

# Set random seed for reproducibility
np.random.seed(42)

## 2. List Available Algorithms

First, let's check what algorithms are available in the toolbox.

In [None]:
# List all available algorithms
print("Available algorithms:")
print(list_algorithms())

## 3. Basic Usage: Optimizing a Benchmark Function

Let's start with a simple example: optimizing the Sphere function.

In [None]:
# Define objective function (using built-in benchmark)
sphere = benchmarks.sphere

# Create SCA optimizer with full parameter specification
optimizer = get_optimizer(
    "SCA",
    dimensions=10,
    lower_bound=-5,
    upper_bound=5,
    population_size=30,
    max_iterations=100,
    verbose=True
)

# Run optimization
result = optimizer.optimize(objective_function=sphere)

# Display results
print(f"\nBest fitness: {result.best_fitness}")
print(f"Execution time: {result.execution_time:.4f} seconds")

# Plot convergence curve
result.plot_convergence(title="Sphere Function Optimization")

## 4. Automatic Parameter Handling

Now let's see how the toolbox handles missing parameters by providing only some of them.

In [None]:
# Define another benchmark function
rastrigin = benchmarks.rastrigin

# Create optimizer with partial parameters
# Notice we only provide dimensions and upper_bound
optimizer = get_optimizer(
    "SCA",
    dimensions=5,
    upper_bound=5.12,  # Only specify upper bound
    verbose=True
)

# Run optimization
result = optimizer.optimize(objective_function=rastrigin)

# Display results
print(f"\nBest fitness: {result.best_fitness}")

# Show the parameters that were actually used (including derived ones)
print("\nParameters used:")
for key, value in result.parameters.items():
    print(f"  - {key}: {value}")

# Plot convergence curve
result.plot_convergence(title="Rastrigin Function Optimization")

## 5. Feature Selection Example

The toolbox can also be used for feature selection. Let's use a real dataset for this example.

In [None]:
# Load breast cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Print dataset information
print(f"Dataset shape: {X.shape}")
print(f"Number of features: {X.shape[1]}")
print(f"Number of samples: {X.shape[0]}")
print(f"Feature names: {data.feature_names}")

In [None]:
# Create optimizer for feature selection
# Notice we don't specify any bounds or dimensions - they'll be derived from the data
optimizer = get_optimizer(
    "SCA",
    population_size=20,
    max_iterations=50,
    verbose=True
)

# Run feature selection
fs_result = optimizer.optimize(X=X, y=y)

# Display results
print(f"\nBest fitness (error rate): {fs_result.best_fitness}")

# Get selected features
selected_indices = np.where(fs_result.best_solution_binary)[0]
selected_names = [data.feature_names[i] for i in selected_indices]

print(f"\nSelected {len(selected_indices)} out of {X.shape[1]} features:")
for i, (idx, name) in enumerate(zip(selected_indices, selected_names)):
    print(f"  {i+1}. Feature {idx}: {name}")

# Plot convergence curve
fs_result.plot_convergence(title="Feature Selection Convergence")

## 6. Evaluating Feature Selection Results

Let's evaluate how good our feature selection is by training a classifier with the selected features.

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Get the selected features
selected_features = np.where(fs_result.best_solution_binary)[0]
X_selected = X[:, selected_features]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Train a classifier on the selected features
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy with {len(selected_features)} selected features: {accuracy:.4f}")

# Compare with using all features
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X, y, test_size=0.3, random_state=42)
clf_full = KNeighborsClassifier(n_neighbors=5)
clf_full.fit(X_train_full, y_train_full)
y_pred_full = clf_full.predict(X_test_full)
accuracy_full = accuracy_score(y_test_full, y_pred_full)

print(f"Accuracy with all {X.shape[1]} features: {accuracy_full:.4f}")
print(f"Feature reduction: {100 * (1 - len(selected_features) / X.shape[1]):.1f}%")

## 7. Customizing Objective Functions

You can also define your own objective functions for optimization.

In [None]:
# Define a custom objective function
def custom_objective(solution):
    """
    Ackley function - a non-convex optimization benchmark function
    """
    a = 20
    b = 0.2
    c = 2 * np.pi
    n = len(solution)
    
    sum1 = np.sum(solution**2)
    sum2 = np.sum(np.cos(c * solution))
    
    term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
    term2 = -np.exp(sum2 / n)
    
    return term1 + term2 + a + np.exp(1)

# Create optimizer
optimizer = get_optimizer(
    "SCA",
    dimensions=10,
    lower_bound=-32.768,
    upper_bound=32.768,
    verbose=True
)

# Run optimization
result = optimizer.optimize(objective_function=custom_objective)

# Display results
print(f"\nBest fitness: {result.best_fitness}")
print(f"Best solution: {result.best_solution[:5]}...")
result.plot_convergence(title="Custom Objective Function Optimization")

## 8. Exploring the Model Object

The result model object contains comprehensive information about the optimization run.

In [None]:
# Print a summary of the optimization results
result.summary()

In [None]:
# Access individual attributes
print("Accessing individual attributes:")
print(f"Algorithm: {result.algorithm_name}")
print(f"Best fitness: {result.best_fitness}")
print(f"Execution time: {result.execution_time:.4f} seconds")
print(f"Population size: {result.parameters['population_size']}")
print(f"\nConvergence curve (first 5 iterations):")
for i, fitness in enumerate(result.convergence_curve[:5]):
    print(f"Iteration {i+1}: {fitness}")

## 9. Default Parameter Demonstration

Let's see what happens when we provide no parameters at all.

In [None]:
# Create optimizer with no parameters
optimizer = get_optimizer("SCA", verbose=True)

# Define a simple test function
def simple_function(x):
    return np.sum(x**2)

# Run optimization
result = optimizer.optimize(objective_function=simple_function)

# Display results
print("\nParameters that were automatically set:")
for key, value in result.parameters.items():
    if key not in ['algorithm_name']:
        print(f"  - {key}: {value}")

# Plot convergence curve
result.plot_convergence(title="Optimization with Default Parameters")

## 10. Working with Data from Files

Let's see how to use the toolbox with data loaded from files.

In [None]:
# Let's first create a sample dataset and save it
X_sample, y_sample = make_classification(n_samples=100, n_features=20, 
                                        n_informative=5, n_redundant=10,
                                        random_state=42)

# Convert to DataFrame
features = [f"feature_{i}" for i in range(X_sample.shape[1])]
df = pd.DataFrame(X_sample, columns=features)
df['target'] = y_sample

# Save to CSV
file_path = "sample_dataset.csv"
df.to_csv(file_path, index=False)
print(f"Sample dataset saved to {file_path}")

In [None]:
# Load the dataset
data = pd.read_csv(file_path)
X = data.drop(columns=['target']).values
y = data['target'].values

# Create optimizer
optimizer = get_optimizer(
    "SCA",
    population_size=20,
    max_iterations=30,
    verbose=True
)

# Run feature selection
result = optimizer.optimize(X=X, y=y)

# Display results
print(f"\nBest fitness: {result.best_fitness}")
print(f"Selected {sum(result.best_solution_binary)} out of {X.shape[1]} features")
print(f"Selected features: {np.where(result.best_solution_binary)[0]}")
result.plot_convergence(title="Feature Selection on Loaded Dataset")

## 11. Tips and Best Practices

### Setting Appropriate Bounds

- For benchmark functions, use the known bounds of the function
- For feature selection, the toolbox automatically uses [0, 1] bounds
- If you know the bounds for only one side, the other will be intelligently derived

### Choosing Population Size and Iterations

- For simple problems, small populations (10-30) are often sufficient
- For complex problems with many dimensions, use larger populations (50-100)
- The number of iterations typically scales with problem complexity
- For feature selection, a good starting point is 10Ã— the number of features for iterations

### Interpreting Results

- Check the convergence curve to see if optimization has stabilized
- For feature selection, compare accuracy with selected features vs. all features
- Remember that the goal is to minimize the fitness function

### Custom Objective Functions

- Ensure your function is vectorized for better performance
- The function should accept a solution vector and return a single fitness value
- Lower fitness values are considered better (the toolbox minimizes the objective)