In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from datetime import datetime
from pathlib import Path
import tempfile

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pyprojroot import here
import imageio.v2 as imageio

from knapsack import KnapsackGA
from knapsack.items import items

# Knapsack - Genetic Algorithm

In [None]:
MAX_WEIGHT = 20
MAX_GENERATIONS = 50
POP_SIZE = 50

In [None]:
ga = KnapsackGA(items, max_weight=MAX_WEIGHT, population_size=POP_SIZE)
print(ga)

In [None]:
solution, result = ga.run(generations=MAX_GENERATIONS)
print(f"{solution=}\n{result.best_fitness=}\n{result.runtime=}s")

These are the items that made the cut:

In [None]:
print("\n".join([i.name for i in solution]))

## Results and Visualisations

### Create results subdirectory


In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

results_path = here("results") / timestamp
results_path.mkdir(parents=True, exist_ok=True)

In [None]:
data = pd.DataFrame([record[:-1] for record in result.history], columns=["generation", "best_fitness", "avg_fitness"])

### Plot average population value over time

In [None]:
fig, ax = plt.subplots(figsize=(8, 5))
sns.lineplot(data, x="generation", y="best_fitness", ax=ax, label="Best value")
sns.lineplot(data, x="generation", y="avg_fitness", ax=ax, label="Average value")

ax.set(title="Results", xlabel="Generation", ylabel="Value")
ax.legend(loc="upper right", bbox_to_anchor=(1.35, 1))

params = ga.params()
param_text = "Parameters\n\n" + "\n".join(f"{k}: {v}" for k, v in params.items())
ax.text(1.05, 0.8, param_text, transform=ax.transAxes, fontsize=10, verticalalignment='top')
fig.tight_layout()

In [None]:
filename = "ga_results.png"
filepath = results_path / filename

# Save the figure
fig.savefig(filepath, bbox_inches="tight")

### Plot distributions of value and weight

In [None]:
def plot_generation_distributions(generation: int, values: list[float], weights: list[float], value_axis_lims=None, weight_axis_lims=None):
    fig, (value_ax, weight_ax) = plt.subplots(1, 2, figsize=(12, 5))

    # Plot histogram of values
    value_ax.hist(values, bins=15, color='lightgreen', edgecolor='black', alpha=0.7)
    value_ax.set_title('Value Distribution')
    value_ax.set_xlabel('Total Value')
    value_ax.set_ylabel('Number of Solutions')
    value_ax.set_ylim(0, POP_SIZE)
    if value_axis_lims:
        value_ax.set_xlim(value_axis_lims[0], value_axis_lims[1] * 1.1)

    max_value = max(values)
    value_ax.axvline(x=max_value, color='green', linestyle='--', linewidth=2)
    value_ax.annotate(f'Best: {max_value:.1f}', 
                    xy=(max_value, value_ax.get_ylim()[1]*0.9),
                    xytext=(max_value*0.75, value_ax.get_ylim()[1]*0.9),
                    arrowprops=dict(arrowstyle='->'))

    # Plot histogram of weights
    weight_ax.hist(weights, bins=15, color='skyblue', edgecolor='black', alpha=0.7)
    weight_ax.set_title('Weight Distribution')
    weight_ax.set_xlabel('Total Weight')
    weight_ax.set_ylabel('Number of Solutions')
    weight_ax.set_ylim(0, POP_SIZE)
    if weight_axis_lims:
        weight_ax.set_xlim(weight_axis_lims[0], weight_axis_lims[1] * 1.1)

    weight_ax.axvline(x=MAX_WEIGHT, color='red', linestyle='--', linewidth=2)
    weight_ax.annotate(f'Capacity: {MAX_WEIGHT}', 
                    xy=(MAX_WEIGHT, weight_ax.get_ylim()[1]*0.9),
                    xytext=(MAX_WEIGHT*1.1, weight_ax.get_ylim()[1]*0.9),
                    arrowprops=dict(arrowstyle='->'))

    # Update main plot styling
    fig.suptitle(f"Generation {generation}")
    plt.tight_layout()
    plt.close(fig)

    return fig    

Calculate the max value and weight across all generations.

In [None]:
stats = []
for record in result.history:
    stats.extend(record[-1])

values = [r[0] for r in stats]
weights = [r[1] for r in stats]

min_value, max_value = min(values), max(values)
min_weight, max_weight = min(weights), max(weights)

In [None]:
generation_to_plot = 0

stats = result.history[generation_to_plot][-1]
values = [result[0] for result in stats]
weights = [result[1] for result in stats]

fig = plot_generation_distributions(
    generation_to_plot,
    values,
    weights,
    value_axis_lims=(min_value, max_value),
    weight_axis_lims=(min_weight, max_weight)
)
filename = f"{generation_to_plot}_generation_distribution.png"
filepath = results_path / filename
fig.savefig(filepath, bbox_inches="tight")
fig

In [None]:
generation_to_plot = MAX_GENERATIONS

stats = result.history[generation_to_plot][-1]
values = [result[0] for result in stats]
weights = [result[1] for result in stats]

fig = plot_generation_distributions(
    generation_to_plot,
    values,
    weights,
    value_axis_lims=(min_value, max_value),
    weight_axis_lims=(min_weight, max_weight)
)
filename = f"{generation_to_plot}_generation_distribution.png"
filepath = results_path / filename
fig.savefig(filepath, bbox_inches="tight")
fig

### Create GIF

In [None]:
with tempfile.TemporaryDirectory() as tmpdirname:

    tmpdir = Path(tmpdirname)

    # generate frames
    frame_paths = []
    for record in result.history:
        generation, stats = record[0], record[-1]

        values = [result[0] for result in stats]
        weights = [result[1] for result in stats]
        fig = plot_generation_distributions(
            generation,
            values,
            weights,
            value_axis_lims=(min_value, max_value),
            weight_axis_lims=(min_weight, max_weight)
        )

        filename = f"{generation}_generation_distribution.png"
        filepath = tmpdir / filename
        fig.savefig(filepath, dpi=100)

        frame_paths.append(filepath)
    
    with imageio.get_writer(results_path / "distributions.gif", mode='I', fps=2, loop=0) as writer:
        for frame_path in frame_paths:
            image = imageio.imread(frame_path)
            writer.append_data(image)
