In [None]:
import mlrose_hiive as mr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import time

In [None]:
# Define the problem for 4-Peaks
problem_size = 100 
fitness = mr.FourPeaks(t_pct=0.1)
problem_4peaks = mr.DiscreteOpt(length=problem_size, fitness_fn=fitness, maximize=True, max_val=2)


In [None]:
max_attempts = 200
max_iterations = 1000  # Limit iterations for quicker execution
num_runs = 10  # Number of runs for statistical robustness

In [None]:
# Initialize results storage
results = {}

# List of algorithms and their specific parameters
algorithms = {
    'random_hill_climb': {'restarts': 50},
    'simulated_annealing': {'schedule': mr.ExpDecay(init_temp=1, exp_const=0.005, min_temp=0.001)},
    'genetic_alg': {'pop_size': 200, 'mutation_prob': 0.1},
    'mimic': {'pop_size': 200, 'keep_pct': 0.2}
}

In [None]:
# Execute each algorithm multiple times
for name, params in algorithms.items():
    run_times = []
    fitness_scores = []
    curves = []
    total_fevs = []  # List to store total function evaluations for each run

    for _ in range(num_runs):
        start_time = time.time()
        _, best_fitness, fitness_curve = getattr(mr, name.lower())(
            problem_4peaks, max_attempts=max_attempts, max_iters=max_iterations, 
            curve=True, random_state=42, **params)
        end_time = time.time()

        run_times.append(end_time - start_time)
        fitness_scores.append(best_fitness)
        curves.append(fitness_curve[:, 0])  # Assuming the first column is the fitness score

        # Assuming the second column of fitness_curve contains the cumulative fevals
        total_fevs.append(fitness_curve[-1, 1])

    # Calculate fevals per second and per iteration
    fevals_per_sec = sum(total_fevs) / sum(run_times)
    fevals_per_iteration = sum(total_fevs) / (num_runs * max_iterations)

    results[name] = {
        'times': run_times,
        'fitness_scores': fitness_scores,
        'average_curve': np.mean(curves, axis=0),
        'total_fevs': sum(total_fevs),  # Total function evaluations across all runs
        'fevals_per_sec': fevals_per_sec,  # Function evaluations per second
        'fevals_per_iteration': fevals_per_iteration  # Function evaluations per iteration
    }


In [None]:
results_df = pd.DataFrame()

In [None]:
results_df = pd.read_csv('4peaks_results_v2.csv')

In [None]:
results_df


In [None]:
import matplotlib.pyplot as plt

# Plot average fitness curves
plt.figure(figsize=(10, 6))
for algo, data in results.items():
    plt.plot(data['average_curve'], label=f"{algo} (Avg. Fitness)")
plt.title('Average Fitness Curves for 4-Peaks Problem')
plt.xlabel('Iterations')
plt.ylabel('Fitness')
plt.legend()
plt.grid(True)
plt.show()

# Plot convergence times
plt.figure(figsize=(10, 6))
convergence_times = [np.mean(data['times']) for algo, data in results.items()]
plt.bar(results.keys(), convergence_times, color='lightblue')
plt.title('Average Convergence Times for 4-Peaks Problem')
plt.xlabel('Algorithm')
plt.ylabel('Time in seconds')
plt.grid(True, axis='y')
plt.show()


In [None]:
# Improved parsing function that checks the data type
import json


def parse_list(data):
    if isinstance(data, str):
        try:
            return json.loads(data.replace('\n', '').replace('  ', ',').replace('array(', '[').replace(')', ']'))
        except json.JSONDecodeError:
            return eval(data)  # Using eval as a fallback for string representations of lists
    return data  # Return as-is if it's already a list
# Setting a style
sns.set(style="whitegrid")

# Define a color palette
palette = sns.color_palette("muted")

# Now let's extract and convert the data correctly
try:
    rhc_times = parse_list(results_df.iloc[0]['random_hill_climb'])
    sa_times = parse_list(results_df.iloc[0]['simulated_annealing'])
    ga_times = parse_list(results_df.iloc[0]['genetic_alg'])
    mimic_times = parse_list(results_df.iloc[0]['mimic'])

    rhc_curve = parse_list(results_df.iloc[2]['random_hill_climb'])
    sa_curve = parse_list(results_df.iloc[2]['simulated_annealing'])
    ga_curve = parse_list(results_df.iloc[2]['genetic_alg'])
    mimic_curve = parse_list(results_df.iloc[2]['mimic'])
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
times = [np.mean(rhc_times), np.mean(sa_times), np.mean(ga_times), np.mean(mimic_times)]
labels = ['RHC', 'SA', 'GA', 'MIMIC']

time_data = {
    'Algorithm': labels,
    'Mean Execution Time (seconds)': times,
    'Standard Deviation': [np.std(rhc_times), np.std(sa_times), np.std(ga_times), np.std(mimic_times)]
}

execution_times_df = pd.DataFrame(time_data)

# Function to highlight the minimum execution time
def highlight_min(s):
    is_min = s == min(s)
    return ['background-color: yellow' if v else '' for v in is_min]

# Styling the DataFrame
styled_df = execution_times_df.style.apply(highlight_min, subset=['Mean Execution Time (seconds)'])\
                                    .format({'Mean Execution Time (seconds)': "{:.2f}", 'Standard Deviation': "{:.2f}"})\
                                    .set_table_styles([{'selector': 'th', 'props': [('font-size', '12pt')]}])\
                                    .set_properties(**{'font-size': '11pt'})\
                                    .set_caption("Summary of Execution Times")

styled_df

In [None]:


# Parse the average curves (Fitness/Iteration)
results_df['random_hill_climb'][2] = parse_list(results_df['random_hill_climb'][2])
results_df['simulated_annealing'][2] = parse_list(results_df['simulated_annealing'][2])
results_df['genetic_alg'][2] = parse_list(results_df['genetic_alg'][2])
results_df['mimic'][2] = parse_list(results_df['mimic'][2])

# Plot Fitness / Iteration for each algorithm
plt.figure(figsize=(12, 8))
plt.plot(results_df['random_hill_climb'][2], label='Random Hill Climb')
plt.plot(results_df['simulated_annealing'][2], label='Simulated Annealing')
plt.plot(results_df['genetic_alg'][2], label='Genetic Algorithm')
plt.plot(results_df['mimic'][2], label='MIMIC')
plt.title('Fitness / Iteration (Average Curve)')
plt.xlabel('Iterations')
plt.ylabel('Fitness')
plt.legend()
plt.grid(True)
plt.show()

# Parse the wall clock times
results_df['random_hill_climb'][0] = parse_list(results_df['random_hill_climb'][0])
results_df['simulated_annealing'][0] = parse_list(results_df['simulated_annealing'][0])
results_df['genetic_alg'][0] = parse_list(results_df['genetic_alg'][0])
results_df['mimic'][0] = parse_list(results_df['mimic'][0])

# Calculate total wall clock time for each algorithm
total_times = {
    'Random Hill Climb': np.sum(results_df['random_hill_climb'][0]),
    'Simulated Annealing': np.sum(results_df['simulated_annealing'][0]),
    'Genetic Algorithm': np.sum(results_df['genetic_alg'][0]),
    'MIMIC': np.sum(results_df['mimic'][0])
}

# Plot Wall Clock Time for each algorithm
plt.figure(figsize=(8, 6))
plt.bar(total_times.keys(), total_times.values(), color=['blue', 'green', 'red', 'purple'])
plt.title('Wall Clock Time per Algorithm')
plt.xlabel('Algorithm')
plt.ylabel('Total Time (seconds)')
plt.show()

In [None]:
# Calculate and Plot Function Evaluations Metrics
plt.figure(figsize=(12, 8))
plt.bar(range(len(results)), [val['total_fevs'] for val in results.values()], color='blue', align='center')
plt.xticks(range(len(results)), list(results.keys()))
plt.title('Total Function Evaluations per Algorithm')
plt.xlabel('Algorithm')
plt.ylabel('Total Function Evaluations')
plt.show()

# Function Evaluations per Second
plt.figure(figsize=(12, 8))
plt.bar(range(len(results)), [val['fevals_per_sec'] for val in results.values()], color='green', align='center')
plt.xticks(range(len(results)), list(results.keys()))
plt.title('Function Evaluations per Second per Algorithm')
plt.xlabel('Algorithm')
plt.ylabel('Function Evaluations per Second')
plt.show()

# Function Evaluations per Iteration
plt.figure(figsize=(12, 8))
plt.bar(range(len(results)), [val['fevals_per_iteration'] for val in results.values()], color='red', align='center')
plt.xticks(range(len(results)), list(results.keys()))
plt.title('Function Evaluations per Iteration per Algorithm')
plt.xlabel('Algorithm')
plt.ylabel('Function Evaluations per Iteration')
plt.show()


In [None]:
import pandas as pd

# Creating a DataFrame from the results dictionary
data = {
    'Algorithm': [],
    'Average Time (s)': [],
    'Average Fitness': [],
    'Total Function Evaluations': [],
    'Fevals per Second': [],
    'Fevals per Iteration': []
}

for algo, metrics in results.items():
    data['Algorithm'].append(algo)
    data['Average Time (s)'].append(np.mean(metrics['times']))
    data['Average Fitness'].append(np.mean(metrics['fitness_scores']))
    data['Total Function Evaluations'].append(metrics['total_fevs'])
    data['Fevals per Second'].append(metrics['fevals_per_sec'])
    data['Fevals per Iteration'].append(metrics['fevals_per_iteration'])

results_df_table = pd.DataFrame(data)

# Display the DataFrame to check the output
results_df_table


In [None]:
rhc_restarts = [0, 5, 10, 20]
rhc_results = {}

for restarts in rhc_restarts:
    _, best_fitness, _ = mr.random_hill_climb(
        problem_4peaks, restarts=restarts, max_attempts=200, max_iters=1000, 
        curve=True, random_state=42
    )
    rhc_results[restarts] = best_fitness

# You can store these results in a DataFrame or dictionary for later analysis
rhc_results_df = pd.DataFrame(rhc_results.items(), columns=['Restarts', 'Best Fitness'])
rhc_results_df

In [None]:
# Example for Simulated Annealing parameter tuning
sa_temps = [1, 10, 50, 100]
sa_decay_consts = [0.001, 0.005, 0.01]

sa_results = {}
for temp in sa_temps:
    for decay in sa_decay_consts:
        schedule = mr.ExpDecay(init_temp=temp, exp_const=decay, min_temp=0.001)
        _, best_fitness, _ = mr.simulated_annealing(
            problem_4peaks, schedule=schedule, max_attempts=200, max_iters=1000, 
            curve=True, random_state=42
        )
        sa_results[(temp, decay)] = best_fitness

sa_result_df = pd.DataFrame(sa_results.items(), columns=['(Initial Temp, Decay Const)', 'Best Fitness'])
sa_result_df

In [None]:
ga_pop_sizes = [100, 200, 500]
ga_mut_probs = [0.05, 0.1, 0.2]
ga_results = {}

for pop_size in ga_pop_sizes:
    for mut_prob in ga_mut_probs:
        _, best_fitness, _ = mr.genetic_alg(
            problem_4peaks, pop_size=pop_size, mutation_prob=mut_prob,
            max_attempts=200, max_iters=1000, curve=True, random_state=42
        )
        ga_results[(pop_size, mut_prob)] = best_fitness

# Store results in a structured format
ga_results_df = pd.DataFrame(ga_results.items(), columns=['(Population Size, Mutation Prob)', 'Best Fitness'])
ga_results_df

In [None]:
mimic_pop_sizes = [100, 200, 500]
mimic_keep_pcts = [0.1, 0.2, 0.3]
mimic_results = {}

for pop_size in mimic_pop_sizes:
    for keep_pct in mimic_keep_pcts:
        _, best_fitness, _ = mr.mimic(
            problem_4peaks, pop_size=pop_size, keep_pct=keep_pct,
            max_attempts=200, max_iters=1000, curve=True, random_state=42
        )
        mimic_results[(pop_size, keep_pct)] = best_fitness

# Format results appropriately for analysis
mimic_results_df = pd.DataFrame(mimic_results.items(), columns=['(Population Size, Keep %)', 'Best Fitness'])

In [None]:
mimic_results_df