From 66701bd4938d432e244b0b6578fda6e6ae1c0d62 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:10:27 +0100
Subject: [PATCH 01/28] Genetic algorithm for Knapsack

---
 genetic_algorithm/knapsack.py | 168 ++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 genetic_algorithm/knapsack.py

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
new file mode 100644
index 000000000000..0be7e0ce1cfb
--- /dev/null
+++ b/genetic_algorithm/knapsack.py
@@ -0,0 +1,168 @@
+"""Did you know that Genetic Algorithms can be used to quickly approximate combinatorial optimization problems such as knapsack?"""
+
+import random
+from dataclasses import dataclass
+
+random.seed(42)
+
+# =========================== Problem setup: Knapsack ===========================
+
+KNAPSACK_N_ITEMS = 42                   # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE = (10, 100)        # Range of item values
+KNAPSACK_WEIGHT_RANGE = (5, 50)         # Range of item weights
+KNAPSACK_CAPACITY_RATIO = 0.5           # Capacity as a fraction of total weight
+
+@dataclass
+class Item:
+    value: int
+    weight: int
+
+def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weight_range: tuple[int, int], capacity_ratio=float) -> tuple[list[Item], int]:
+    """Generates a random knapsack problem instance."""
+    items = []
+    for _ in range(n_items):
+        value = random.randint(*value_range)
+        weight = random.randint(*weight_range)
+        items.append(Item(value=value, weight=weight))
+    # We set capacity as a fraction of total weight
+    capacity = int(sum(it.weight for it in items) * capacity_ratio)
+    return items, capacity
+
+items, capacity = generate_knapsack_instance(n_items=KNAPSACK_N_ITEMS, value_range=KNAPSACK_VALUE_RANGE, weight_range=KNAPSACK_WEIGHT_RANGE, capacity_ratio=KNAPSACK_CAPACITY_RATIO)
+
+
+
+# ============================== GA Representation ==============================
+
+# HYPERPARAMETERS (For tuning the GA)
+
+POPULATION_SIZE = 120
+GENERATIONS = 200
+CROSSOVER_PROBABILITY = 0.9
+MUTATION_PROBABILITY = 0.01
+TOURNAMENT_K = 3
+ELITISM = 2
+
+OVERWEIGHT_PENALTY_FACTOR = 10
+
+Genome = list[int] # An index list where 1 means item is included, 0 means excluded
+
+def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
+    """Evaluation function - calculates the fitness of each candidate based on total value and weight."""
+    total_value = 0
+    total_weight = 0
+    for gene, item in zip(genome, items):
+        if gene:
+            total_value += item.value
+            total_weight += item.weight
+    if total_weight > capacity:
+        # Penalize overweight solutions: return small value scaled by overflow
+        overflow = (total_weight - capacity)
+        total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
+    return total_value, total_weight
+
+def random_genome(n: int) -> Genome:
+    """Generates a random genome of length n."""
+    return [random.randint(0,1) for _ in range(n)]
+
+def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
+    """Performs tournament selection to choose genomes from the population.
+    Note that other selection strategies exist such as roulette wheel, rank-based, etc.
+    """
+    contenders = random.sample(list(zip(population, fitnesses)), k)
+    get_fitness = lambda x: x[1]
+    return max(contenders, key=get_fitness)[0][:]
+
+def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
+    """Performs single-point crossover between two genomes.
+    Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
+    min_length = min(len(a), len(b))
+    if random.random() > p_crossover or min_length < 2:
+        return a[:], b[:]
+    cutoff_point = random.randint(1, min_length - 1)
+    return a[:cutoff_point]+b[cutoff_point:], b[:cutoff_point]+a[cutoff_point:]
+
+def mutation(g: Genome, p_mutation: int) -> Genome:
+    """Performs bit-flip mutation on a genome.
+    Note that other mutation strategies exist such as swap mutation, scramble mutation, etc.
+    """
+    return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
+
+def run_ga(
+    items: list[Item],
+    capacity: int,
+    pop_size=POPULATION_SIZE,
+    generations=GENERATIONS,
+    p_crossover=CROSSOVER_PROBABILITY,
+    p_mutation=MUTATION_PROBABILITY,
+    tournament_k=TOURNAMENT_K,
+    elitism=ELITISM,
+):
+    """Runs the genetic algorithm to solve the knapsack problem."""
+    n = len(items)
+    population = [random_genome(n) for _ in range(pop_size)]
+    best_history = []  # track best fitness per generation
+    avg_history = []
+    best_overall = None
+    best_fit_overall = -1
+
+    for _ in range(generations):
+        fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
+        best_fit = max(fitnesses)
+        best_idx = fitnesses.index(best_fit)
+        best_history.append(best_fit)
+        avg_fit = sum(fitnesses) / pop_size
+        avg_history.append(avg_fit)
+
+        if best_fit > best_fit_overall:
+            best_fit_overall = best_fit
+            best_overall = population[best_idx][:]
+
+        # Elitism
+        get_fitness = lambda i: fitnesses[i]
+        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism] # Sort the population by fitness and get the top `elitism` indices
+        elites = [population[i][:] for i in elite_indices] # Make nepo babies
+
+        # New generation
+        new_pop = elites[:]
+        while len(new_pop) < pop_size:
+            parent1 = selection(population, fitnesses, k=tournament_k)
+            parent2 = selection(population, fitnesses, k=tournament_k)
+            child1, child2 = crossover(parent1, parent2, p_crossover)
+            child1 = mutation(child1, p_mutation)
+            child2 = mutation(child2, p_mutation)
+            new_pop.extend([child1, child2])
+        population = new_pop[:pop_size]
+
+    # Final evaluation of the best
+    best_value, best_weight = evaluate(best_overall, items, capacity)
+    return {
+        "best_genome": best_overall,
+        "best_value": best_value,
+        "best_weight": best_weight,
+        "capacity": capacity,
+        "best_history": best_history,
+        "avg_history": avg_history,
+    }
+
+result = run_ga(items, capacity)
+
+best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
+
+print(f"Knapsack capacity: {result["capacity"]}")
+print(f"Best solution: value = {result["best_value"]}, weight = {result["best_weight"]}")
+
+# print("Items included in the best solution:", best_items)
+
+# import matplotlib.pyplot as plt
+
+# # Plot fitness curves
+# plt.figure()
+# plt.plot(result["best_history"], label="Best fitness")
+# plt.plot(result["avg_history"], label="Average fitness")
+# plt.title("GA on Knapsack: Fitness over Generations")
+# plt.xlabel("Generation")
+# plt.ylabel("Fitness")
+# plt.legend()
+# plt.tight_layout()
+# plt.show()

From 2d11ae12d718903367883f9e0edf8e43c7da8162 Mon Sep 17 00:00:00 2001
From: Dang-Hoang-Tung <Dang-Hoang-Tung@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:10:41 +0000
Subject: [PATCH 02/28] updating DIRECTORY.md

---
 DIRECTORY.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DIRECTORY.md b/DIRECTORY.md
index 36acb3b97f1e..b2c13f74278f 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -460,6 +460,7 @@
 
 ## Genetic Algorithm
   * [Basic String](genetic_algorithm/basic_string.py)
+  * [Knapsack](genetic_algorithm/knapsack.py)
 
 ## Geodesy
   * [Haversine Distance](geodesy/haversine_distance.py)

From 1d2d89e9d0cb82f8b8c160dc07aa9fe0e08ae723 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:25:57 +0000
Subject: [PATCH 03/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 51 +++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 0be7e0ce1cfb..0ce0e8ba22e6 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -7,17 +7,24 @@
 
 # =========================== Problem setup: Knapsack ===========================
 
-KNAPSACK_N_ITEMS = 42                   # Number of items in the knapsack problem
-KNAPSACK_VALUE_RANGE = (10, 100)        # Range of item values
-KNAPSACK_WEIGHT_RANGE = (5, 50)         # Range of item weights
-KNAPSACK_CAPACITY_RATIO = 0.5           # Capacity as a fraction of total weight
+KNAPSACK_N_ITEMS = 42  # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE = (10, 100)  # Range of item values
+KNAPSACK_WEIGHT_RANGE = (5, 50)  # Range of item weights
+KNAPSACK_CAPACITY_RATIO = 0.5  # Capacity as a fraction of total weight
+
 
 @dataclass
 class Item:
     value: int
     weight: int
 
-def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weight_range: tuple[int, int], capacity_ratio=float) -> tuple[list[Item], int]:
+
+def generate_knapsack_instance(
+    n_items: int,
+    value_range: tuple[int, int],
+    weight_range: tuple[int, int],
+    capacity_ratio=float,
+) -> tuple[list[Item], int]:
     """Generates a random knapsack problem instance."""
     items = []
     for _ in range(n_items):
@@ -28,8 +35,13 @@ def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weigh
     capacity = int(sum(it.weight for it in items) * capacity_ratio)
     return items, capacity
 
-items, capacity = generate_knapsack_instance(n_items=KNAPSACK_N_ITEMS, value_range=KNAPSACK_VALUE_RANGE, weight_range=KNAPSACK_WEIGHT_RANGE, capacity_ratio=KNAPSACK_CAPACITY_RATIO)
 
+items, capacity = generate_knapsack_instance(
+    n_items=KNAPSACK_N_ITEMS,
+    value_range=KNAPSACK_VALUE_RANGE,
+    weight_range=KNAPSACK_WEIGHT_RANGE,
+    capacity_ratio=KNAPSACK_CAPACITY_RATIO,
+)
 
 
 # ============================== GA Representation ==============================
@@ -45,7 +57,8 @@ def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weigh
 
 OVERWEIGHT_PENALTY_FACTOR = 10
 
-Genome = list[int] # An index list where 1 means item is included, 0 means excluded
+Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
+
 
 def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
     """Evaluation function - calculates the fitness of each candidate based on total value and weight."""
@@ -57,13 +70,15 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
             total_weight += item.weight
     if total_weight > capacity:
         # Penalize overweight solutions: return small value scaled by overflow
-        overflow = (total_weight - capacity)
+        overflow = total_weight - capacity
         total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
     return total_value, total_weight
 
+
 def random_genome(n: int) -> Genome:
     """Generates a random genome of length n."""
-    return [random.randint(0,1) for _ in range(n)]
+    return [random.randint(0, 1) for _ in range(n)]
+
 
 def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     """Performs tournament selection to choose genomes from the population.
@@ -73,6 +88,7 @@ def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     get_fitness = lambda x: x[1]
     return max(contenders, key=get_fitness)[0][:]
 
+
 def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
     """Performs single-point crossover between two genomes.
     Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
@@ -80,7 +96,8 @@ def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]
     if random.random() > p_crossover or min_length < 2:
         return a[:], b[:]
     cutoff_point = random.randint(1, min_length - 1)
-    return a[:cutoff_point]+b[cutoff_point:], b[:cutoff_point]+a[cutoff_point:]
+    return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
+
 
 def mutation(g: Genome, p_mutation: int) -> Genome:
     """Performs bit-flip mutation on a genome.
@@ -88,6 +105,7 @@ def mutation(g: Genome, p_mutation: int) -> Genome:
     """
     return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
 
+
 def run_ga(
     items: list[Item],
     capacity: int,
@@ -120,8 +138,10 @@ def run_ga(
 
         # Elitism
         get_fitness = lambda i: fitnesses[i]
-        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism] # Sort the population by fitness and get the top `elitism` indices
-        elites = [population[i][:] for i in elite_indices] # Make nepo babies
+        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[
+            :elitism
+        ]  # Sort the population by fitness and get the top `elitism` indices
+        elites = [population[i][:] for i in elite_indices]  # Make nepo babies
 
         # New generation
         new_pop = elites[:]
@@ -145,12 +165,15 @@ def run_ga(
         "avg_history": avg_history,
     }
 
+
 result = run_ga(items, capacity)
 
 best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
 
-print(f"Knapsack capacity: {result["capacity"]}")
-print(f"Best solution: value = {result["best_value"]}, weight = {result["best_weight"]}")
+print(f"Knapsack capacity: {result['capacity']}")
+print(
+    f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
+)
 
 # print("Items included in the best solution:", best_items)
 

From 001589925fa689ecae78b21f1be38e74a1aabaf3 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:40:26 +0100
Subject: [PATCH 04/28] Update with doctests

---
 genetic_algorithm/knapsack.py | 254 ++++++++++++++++++++++++++--------
 1 file changed, 194 insertions(+), 60 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 0ce0e8ba22e6..6f8881af06e5 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -1,49 +1,76 @@
-"""Did you know that Genetic Algorithms can be used to quickly approximate combinatorial optimization problems such as knapsack?"""
+"""Did you know that Genetic Algorithms can be used to quickly approximate
+combinatorial optimization problems such as knapsack?
+
+Run doctests:
+    python -m doctest -v ga_knapsack.py
+"""
 
 import random
 from dataclasses import dataclass
 
+# Keep module-level RNG deterministic for examples that rely on random,
+# but individual doctests re-seed locally as needed.
 random.seed(42)
 
 # =========================== Problem setup: Knapsack ===========================
 
-KNAPSACK_N_ITEMS = 42  # Number of items in the knapsack problem
-KNAPSACK_VALUE_RANGE = (10, 100)  # Range of item values
-KNAPSACK_WEIGHT_RANGE = (5, 50)  # Range of item weights
-KNAPSACK_CAPACITY_RATIO = 0.5  # Capacity as a fraction of total weight
-
+KNAPSACK_N_ITEMS: int = 42                   # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100)        # Range of item values
+KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50)         # Range of item weights
+KNAPSACK_CAPACITY_RATIO: float = 0.5           # Capacity as a fraction of total weight
 
 @dataclass
 class Item:
     value: int
     weight: int
 
-
 def generate_knapsack_instance(
     n_items: int,
     value_range: tuple[int, int],
     weight_range: tuple[int, int],
-    capacity_ratio=float,
+    capacity_ratio: float
 ) -> tuple[list[Item], int]:
-    """Generates a random knapsack problem instance."""
+    """
+    Generates a random knapsack problem instance.
+
+    Returns a tuple: (items, capacity), where items is a list of Item(value, weight)
+    and capacity is an int computed as floor(capacity_ratio * total_weight).
+
+    Examples
+    --------
+    Use a tiny, deterministic instance to validate shape and capacity range:
+
+    >>> random.seed(0)
+    >>> items, cap = generate_knapsack_instance(
+    ...     n_items=3,
+    ...     value_range=(5, 5),
+    ...     weight_range=(10, 10),
+    ...     capacity_ratio=0.5
+    ... )
+    >>> len(items), cap
+    (3, 15)
+    >>> all(isinstance(it, Item) for it in items)
+    True
+    >>> [it.value for it in items], [it.weight for it in items]
+    ([5, 5, 5], [10, 10, 10])
+    """
     items = []
     for _ in range(n_items):
         value = random.randint(*value_range)
         weight = random.randint(*weight_range)
         items.append(Item(value=value, weight=weight))
-    # We set capacity as a fraction of total weight
+    # Capacity as a fraction of total weight
     capacity = int(sum(it.weight for it in items) * capacity_ratio)
     return items, capacity
 
-
+# Example instance (guarded by __main__ below for printing)
 items, capacity = generate_knapsack_instance(
     n_items=KNAPSACK_N_ITEMS,
     value_range=KNAPSACK_VALUE_RANGE,
     weight_range=KNAPSACK_WEIGHT_RANGE,
-    capacity_ratio=KNAPSACK_CAPACITY_RATIO,
+    capacity_ratio=KNAPSACK_CAPACITY_RATIO
 )
 
-
 # ============================== GA Representation ==============================
 
 # HYPERPARAMETERS (For tuning the GA)
@@ -59,9 +86,30 @@ def generate_knapsack_instance(
 
 Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
 
-
 def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
-    """Evaluation function - calculates the fitness of each candidate based on total value and weight."""
+    """
+    Calculates fitness (value) and weight of a candidate solution. If overweight,
+    the returned value is penalized; weight is the actual summed weight.
+
+    Returns (value, weight).
+
+    Examples
+    --------
+    Feasible genome (no penalty):
+
+    >>> it = [Item(10, 4), Item(7, 3), Item(5, 2)]
+    >>> genome = [1, 0, 1]  # take items 0 and 2
+    >>> evaluate(genome, it, capacity=7)
+    (15, 6)
+
+    Overweight genome (penalty applies):
+    Total value = 10+7+5 = 22, total weight = 9, capacity = 7, overflow = 2
+    Penalized value = max(0, 22 - 2 * OVERWEIGHT_PENALTY_FACTOR) = 2
+
+    >>> genome = [1, 1, 1]
+    >>> evaluate(genome, it, capacity=7)
+    (2, 9)
+    """
     total_value = 0
     total_weight = 0
     for gene, item in zip(genome, items):
@@ -69,20 +117,42 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
             total_value += item.value
             total_weight += item.weight
     if total_weight > capacity:
-        # Penalize overweight solutions: return small value scaled by overflow
-        overflow = total_weight - capacity
+        overflow = (total_weight - capacity)
         total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
     return total_value, total_weight
 
 
 def random_genome(n: int) -> Genome:
-    """Generates a random genome of length n."""
-    return [random.randint(0, 1) for _ in range(n)]
+    """
+    Generates a random genome (list of 0/1) of length n.
 
+    Examples
+    --------
+    Check length and content are 0/1 bits:
+
+    >>> random.seed(123)
+    >>> g = random_genome(5)
+    >>> len(g), set(g).issubset({0, 1})
+    (5, True)
+    """
+    return [random.randint(0, 1) for _ in range(n)]
 
 def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
-    """Performs tournament selection to choose genomes from the population.
+    """
+    Performs tournament selection to choose a genome from the population.
+
     Note that other selection strategies exist such as roulette wheel, rank-based, etc.
+
+    Examples
+    --------
+    Deterministic tournament with fixed seed (k=2):
+
+    >>> random.seed(1)
+    >>> pop = [[0,0,0], [1,0,0], [1,1,0], [1,1,1]]
+    >>> fits = [0, 5, 9, 7]
+    >>> parent = selection(pop, fits, k=2)
+    >>> parent in pop
+    True
     """
     contenders = random.sample(list(zip(population, fitnesses)), k)
     get_fitness = lambda x: x[1]
@@ -90,18 +160,54 @@ def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
 
 
 def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
-    """Performs single-point crossover between two genomes.
-    Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
+    """
+    Performs single-point crossover between two genomes.
+    If crossover does not occur (random > p_crossover) or genomes are too short,
+    returns copies of the parents.
+
+    Note: other crossover strategies exist (two-point, uniform, etc.).
+
+    Examples
+    --------
+    Force crossover with p=1.0 and fixed RNG; verify lengths and bit content:
+
+    >>> random.seed(2)
+    >>> a, b = [0,0,0,0], [1,1,1,1]
+    >>> c1, c2 = crossover(a, b, p_crossover=1.0)
+    >>> len(c1) == len(a) == len(c2) == len(b)
+    True
+    >>> set(c1).issubset({0,1}) and set(c2).issubset({0,1})
+    True
+
+    No crossover if p=0.0:
+
+    >>> c1, c2 = crossover([0,0,0], [1,1,1], p_crossover=0.0)
+    >>> c1, c2
+    ([0, 0, 0], [1, 1, 1])
+    """
     min_length = min(len(a), len(b))
     if random.random() > p_crossover or min_length < 2:
         return a[:], b[:]
     cutoff_point = random.randint(1, min_length - 1)
     return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
 
+def mutation(g: Genome, p_mutation: float) -> Genome:
+    """
+    Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
+
+    Note: other mutation strategies exist (swap, scramble, etc.).
+
+    Examples
+    --------
+    With probability 1.0, every bit flips:
 
-def mutation(g: Genome, p_mutation: int) -> Genome:
-    """Performs bit-flip mutation on a genome.
-    Note that other mutation strategies exist such as swap mutation, scramble mutation, etc.
+    >>> mutation([0, 1, 1, 0], p_mutation=1.0)
+    [1, 0, 0, 1]
+
+    With probability 0.0, nothing changes:
+
+    >>> mutation([0, 1, 1, 0], p_mutation=0.0)
+    [0, 1, 1, 0]
     """
     return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
 
@@ -109,14 +215,46 @@ def mutation(g: Genome, p_mutation: int) -> Genome:
 def run_ga(
     items: list[Item],
     capacity: int,
-    pop_size=POPULATION_SIZE,
-    generations=GENERATIONS,
-    p_crossover=CROSSOVER_PROBABILITY,
-    p_mutation=MUTATION_PROBABILITY,
-    tournament_k=TOURNAMENT_K,
-    elitism=ELITISM,
+    pop_size: int = POPULATION_SIZE,
+    generations: int = GENERATIONS,
+    p_crossover: float = CROSSOVER_PROBABILITY,
+    p_mutation: float = MUTATION_PROBABILITY,
+    tournament_k: int = TOURNAMENT_K,
+    elitism: int = ELITISM,
 ):
-    """Runs the genetic algorithm to solve the knapsack problem."""
+    """
+    Runs the genetic algorithm to (approximately) solve the knapsack problem.
+
+    Returns a dict with keys:
+      - 'best_genome' (Genome)
+      - 'best_value' (int)
+      - 'best_weight' (int)
+      - 'capacity' (int)
+      - 'best_history' (list[int])
+      - 'avg_history' (list[float])
+
+    Examples
+    --------
+    Use a tiny instance and few generations to validate structure and lengths:
+
+    >>> random.seed(1234)
+    >>> tiny_items = [Item(5,2), Item(6,3), Item(2,1), Item(7,4)]
+    >>> cap = 5
+    >>> out = run_ga(
+    ...     tiny_items, cap,
+    ...     pop_size=10, generations=5,
+    ...     p_crossover=0.9, p_mutation=0.05,
+    ...     tournament_k=2, elitism=1
+    ... )
+    >>> sorted(out.keys())
+    ['avg_history', 'best_genome', 'best_history', 'best_value', 'best_weight', 'capacity']
+    >>> len(out['best_history']) == 5 and len(out['avg_history']) == 5
+    True
+    >>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
+    True
+    >>> out['capacity'] == cap
+    True
+    """
     n = len(items)
     population = [random_genome(n) for _ in range(pop_size)]
     best_history = []  # track best fitness per generation
@@ -138,10 +276,8 @@ def run_ga(
 
         # Elitism
         get_fitness = lambda i: fitnesses[i]
-        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[
-            :elitism
-        ]  # Sort the population by fitness and get the top `elitism` indices
-        elites = [population[i][:] for i in elite_indices]  # Make nepo babies
+        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism]
+        elites = [population[i][:] for i in elite_indices]
 
         # New generation
         new_pop = elites[:]
@@ -165,27 +301,25 @@ def run_ga(
         "avg_history": avg_history,
     }
 
-
-result = run_ga(items, capacity)
-
-best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
-
-print(f"Knapsack capacity: {result['capacity']}")
-print(
-    f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
-)
-
-# print("Items included in the best solution:", best_items)
-
-# import matplotlib.pyplot as plt
-
-# # Plot fitness curves
-# plt.figure()
-# plt.plot(result["best_history"], label="Best fitness")
-# plt.plot(result["avg_history"], label="Average fitness")
-# plt.title("GA on Knapsack: Fitness over Generations")
-# plt.xlabel("Generation")
-# plt.ylabel("Fitness")
-# plt.legend()
-# plt.tight_layout()
-# plt.show()
+# ================================ Script entry =================================
+
+if __name__ == "__main__":
+    result = run_ga(items, capacity)
+    best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
+
+    print(f"Knapsack capacity: {result['capacity']}")
+    print(f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}")
+    # Uncomment to inspect chosen items:
+    # print("Items included in the best solution:", best_items)
+
+    # Optional: plot fitness curves
+    # import matplotlib.pyplot as plt
+    # plt.figure()
+    # plt.plot(result["best_history"], label="Best fitness")
+    # plt.plot(result["avg_history"], label="Average fitness")
+    # plt.title("GA on Knapsack: Fitness over Generations")
+    # plt.xlabel("Generation")
+    # plt.ylabel("Fitness")
+    # plt.legend()
+    # plt.tight_layout()
+    # plt.show()

From e1bd46b7b791cf20e5616db4e089043e6079162d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:40:48 +0000
Subject: [PATCH 05/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 6f8881af06e5..29b8e5eb5da3 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -14,21 +14,23 @@
 
 # =========================== Problem setup: Knapsack ===========================
 
-KNAPSACK_N_ITEMS: int = 42                   # Number of items in the knapsack problem
-KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100)        # Range of item values
-KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50)         # Range of item weights
-KNAPSACK_CAPACITY_RATIO: float = 0.5           # Capacity as a fraction of total weight
+KNAPSACK_N_ITEMS: int = 42  # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100)  # Range of item values
+KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50)  # Range of item weights
+KNAPSACK_CAPACITY_RATIO: float = 0.5  # Capacity as a fraction of total weight
+
 
 @dataclass
 class Item:
     value: int
     weight: int
 
+
 def generate_knapsack_instance(
     n_items: int,
     value_range: tuple[int, int],
     weight_range: tuple[int, int],
-    capacity_ratio: float
+    capacity_ratio: float,
 ) -> tuple[list[Item], int]:
     """
     Generates a random knapsack problem instance.
@@ -63,12 +65,13 @@ def generate_knapsack_instance(
     capacity = int(sum(it.weight for it in items) * capacity_ratio)
     return items, capacity
 
+
 # Example instance (guarded by __main__ below for printing)
 items, capacity = generate_knapsack_instance(
     n_items=KNAPSACK_N_ITEMS,
     value_range=KNAPSACK_VALUE_RANGE,
     weight_range=KNAPSACK_WEIGHT_RANGE,
-    capacity_ratio=KNAPSACK_CAPACITY_RATIO
+    capacity_ratio=KNAPSACK_CAPACITY_RATIO,
 )
 
 # ============================== GA Representation ==============================
@@ -86,6 +89,7 @@ def generate_knapsack_instance(
 
 Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
 
+
 def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,
@@ -117,7 +121,7 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
             total_value += item.value
             total_weight += item.weight
     if total_weight > capacity:
-        overflow = (total_weight - capacity)
+        overflow = total_weight - capacity
         total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
     return total_value, total_weight
 
@@ -137,6 +141,7 @@ def random_genome(n: int) -> Genome:
     """
     return [random.randint(0, 1) for _ in range(n)]
 
+
 def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     """
     Performs tournament selection to choose a genome from the population.
@@ -191,6 +196,7 @@ def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]
     cutoff_point = random.randint(1, min_length - 1)
     return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
 
+
 def mutation(g: Genome, p_mutation: float) -> Genome:
     """
     Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
@@ -301,6 +307,7 @@ def run_ga(
         "avg_history": avg_history,
     }
 
+
 # ================================ Script entry =================================
 
 if __name__ == "__main__":
@@ -308,7 +315,9 @@ def run_ga(
     best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
 
     print(f"Knapsack capacity: {result['capacity']}")
-    print(f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}")
+    print(
+        f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
+    )
     # Uncomment to inspect chosen items:
     # print("Items included in the best solution:", best_items)
 

From 9a8b66d61dc9b01eafe96b68c5ca66d54de28700 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:43:25 +0100
Subject: [PATCH 06/28] Update Genome to genome_t

---
 genetic_algorithm/knapsack.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 29b8e5eb5da3..ad42eebede53 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -87,10 +87,10 @@ def generate_knapsack_instance(
 
 OVERWEIGHT_PENALTY_FACTOR = 10
 
-Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
+genome_t = list[int]  # An index list where 1 means item is included, 0 means excluded
 
 
-def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
+def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,
     the returned value is penalized; weight is the actual summed weight.
@@ -126,7 +126,7 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
     return total_value, total_weight
 
 
-def random_genome(n: int) -> Genome:
+def random_genome(n: int) -> genome_t:
     """
     Generates a random genome (list of 0/1) of length n.
 
@@ -142,7 +142,7 @@ def random_genome(n: int) -> Genome:
     return [random.randint(0, 1) for _ in range(n)]
 
 
-def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
+def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genome_t:
     """
     Performs tournament selection to choose a genome from the population.
 
@@ -164,7 +164,7 @@ def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     return max(contenders, key=get_fitness)[0][:]
 
 
-def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
+def crossover(a: genome_t, b: genome_t, p_crossover: float) -> tuple[genome_t, genome_t]:
     """
     Performs single-point crossover between two genomes.
     If crossover does not occur (random > p_crossover) or genomes are too short,
@@ -197,7 +197,7 @@ def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]
     return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
 
 
-def mutation(g: Genome, p_mutation: float) -> Genome:
+def mutation(g: genome_t, p_mutation: float) -> genome_t:
     """
     Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
 
@@ -232,7 +232,7 @@ def run_ga(
     Runs the genetic algorithm to (approximately) solve the knapsack problem.
 
     Returns a dict with keys:
-      - 'best_genome' (Genome)
+      - 'best_genome' (genome_t)
       - 'best_value' (int)
       - 'best_weight' (int)
       - 'capacity' (int)

From 06981bfb309e7498f8a0997522c5e1c491740e05 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:43:45 +0000
Subject: [PATCH 07/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index ad42eebede53..1d06acc8bad5 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -164,7 +164,9 @@ def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genom
     return max(contenders, key=get_fitness)[0][:]
 
 
-def crossover(a: genome_t, b: genome_t, p_crossover: float) -> tuple[genome_t, genome_t]:
+def crossover(
+    a: genome_t, b: genome_t, p_crossover: float
+) -> tuple[genome_t, genome_t]:
     """
     Performs single-point crossover between two genomes.
     If crossover does not occur (random > p_crossover) or genomes are too short,

From 9732f6d72d0ba1a28289b719f8d9632863fb3826 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:50:17 +0100
Subject: [PATCH 08/28] Fix variable names

---
 genetic_algorithm/knapsack.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 1d06acc8bad5..f99e1dff9841 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -126,7 +126,7 @@ def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, i
     return total_value, total_weight
 
 
-def random_genome(n: int) -> genome_t:
+def random_genome(length: int) -> genome_t:
     """
     Generates a random genome (list of 0/1) of length n.
 
@@ -139,10 +139,10 @@ def random_genome(n: int) -> genome_t:
     >>> len(g), set(g).issubset({0, 1})
     (5, True)
     """
-    return [random.randint(0, 1) for _ in range(n)]
+    return [random.randint(0, 1) for _ in range(length)]
 
 
-def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genome_t:
+def selection(population: list[genome_t], fitnesses: list[int], tournament_k: int) -> genome_t:
     """
     Performs tournament selection to choose a genome from the population.
 
@@ -159,13 +159,13 @@ def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genom
     >>> parent in pop
     True
     """
-    contenders = random.sample(list(zip(population, fitnesses)), k)
-    get_fitness = lambda x: x[1]
+    contenders = random.sample(list(zip(population, fitnesses)), tournament_k)
+    get_fitness = lambda contender: contender[1]
     return max(contenders, key=get_fitness)[0][:]
 
 
 def crossover(
-    a: genome_t, b: genome_t, p_crossover: float
+    genome_1: genome_t, genome_2: genome_t, p_crossover: float
 ) -> tuple[genome_t, genome_t]:
     """
     Performs single-point crossover between two genomes.
@@ -192,14 +192,14 @@ def crossover(
     >>> c1, c2
     ([0, 0, 0], [1, 1, 1])
     """
-    min_length = min(len(a), len(b))
+    min_length = min(len(genome_1), len(genome_2))
     if random.random() > p_crossover or min_length < 2:
-        return a[:], b[:]
+        return genome_1[:], genome_2[:]
     cutoff_point = random.randint(1, min_length - 1)
-    return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
+    return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[:cutoff_point] + genome_1[cutoff_point:]
 
 
-def mutation(g: genome_t, p_mutation: float) -> genome_t:
+def mutation(genome: genome_t, p_mutation: float) -> genome_t:
     """
     Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
 
@@ -217,7 +217,7 @@ def mutation(g: genome_t, p_mutation: float) -> genome_t:
     >>> mutation([0, 1, 1, 0], p_mutation=0.0)
     [0, 1, 1, 0]
     """
-    return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
+    return [(1 - gene) if random.random() < p_mutation else gene for gene in genome]
 
 
 def run_ga(
@@ -290,8 +290,8 @@ def run_ga(
         # New generation
         new_pop = elites[:]
         while len(new_pop) < pop_size:
-            parent1 = selection(population, fitnesses, k=tournament_k)
-            parent2 = selection(population, fitnesses, k=tournament_k)
+            parent1 = selection(population, fitnesses, tournament_k=tournament_k)
+            parent2 = selection(population, fitnesses, tournament_k=tournament_k)
             child1, child2 = crossover(parent1, parent2, p_crossover)
             child1 = mutation(child1, p_mutation)
             child2 = mutation(child2, p_mutation)

From 836da98383f67f4124770ea69bc3edbe9f0d9e78 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:50:38 +0000
Subject: [PATCH 09/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index f99e1dff9841..ed40e4eab1ab 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -142,7 +142,9 @@ def random_genome(length: int) -> genome_t:
     return [random.randint(0, 1) for _ in range(length)]
 
 
-def selection(population: list[genome_t], fitnesses: list[int], tournament_k: int) -> genome_t:
+def selection(
+    population: list[genome_t], fitnesses: list[int], tournament_k: int
+) -> genome_t:
     """
     Performs tournament selection to choose a genome from the population.
 
@@ -196,7 +198,9 @@ def crossover(
     if random.random() > p_crossover or min_length < 2:
         return genome_1[:], genome_2[:]
     cutoff_point = random.randint(1, min_length - 1)
-    return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[:cutoff_point] + genome_1[cutoff_point:]
+    return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[
+        :cutoff_point
+    ] + genome_1[cutoff_point:]
 
 
 def mutation(genome: genome_t, p_mutation: float) -> genome_t:

From 7f3403e9d291c5151b6181d0902d9f288970ac3d Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:51:20 +0100
Subject: [PATCH 10/28] Add type annotation for run_ga

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index ed40e4eab1ab..853139efcf94 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -233,7 +233,7 @@ def run_ga(
     p_mutation: float = MUTATION_PROBABILITY,
     tournament_k: int = TOURNAMENT_K,
     elitism: int = ELITISM,
-):
+) -> dict:
     """
     Runs the genetic algorithm to (approximately) solve the knapsack problem.
 

From a563fcd354fdad1c9808bf78169456a197227ce3 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:52:53 +0100
Subject: [PATCH 11/28] Update variable names

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 853139efcf94..a4f51a600540 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -318,7 +318,7 @@ def run_ga(
 
 if __name__ == "__main__":
     result = run_ga(items, capacity)
-    best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
+    best_items = [items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1]
 
     print(f"Knapsack capacity: {result['capacity']}")
     print(

From dabdf9a8e414e6725bedf9dde1bf1ac8be5afca8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:53:15 +0000
Subject: [PATCH 12/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index a4f51a600540..358a6622c7ef 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -318,7 +318,9 @@ def run_ga(
 
 if __name__ == "__main__":
     result = run_ga(items, capacity)
-    best_items = [items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1]
+    best_items = [
+        items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
+    ]
 
     print(f"Knapsack capacity: {result['capacity']}")
     print(

From 23dcc94a4b7c4936f914498d4eb1b6ee6c249ef4 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:55:02 +0100
Subject: [PATCH 13/28] Update variable names

---
 genetic_algorithm/knapsack.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 358a6622c7ef..85514ee1a4b8 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -267,8 +267,7 @@ def run_ga(
     >>> out['capacity'] == cap
     True
     """
-    n = len(items)
-    population = [random_genome(n) for _ in range(pop_size)]
+    population = [random_genome(len(items)) for _ in range(pop_size)]
     best_history = []  # track best fitness per generation
     avg_history = []
     best_overall = None
@@ -287,9 +286,9 @@ def run_ga(
             best_overall = population[best_idx][:]
 
         # Elitism
-        get_fitness = lambda i: fitnesses[i]
+        get_fitness = lambda idx: fitnesses[idx]
         elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism]
-        elites = [population[i][:] for i in elite_indices]
+        elites = [population[idx][:] for idx in elite_indices]
 
         # New generation
         new_pop = elites[:]

From 2f74223834f5119cb77f25aa5c24095babed4c9b Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:03:28 +0100
Subject: [PATCH 14/28] Fix issues

---
 genetic_algorithm/knapsack.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 85514ee1a4b8..e60db3001431 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -162,8 +162,7 @@ def selection(
     True
     """
     contenders = random.sample(list(zip(population, fitnesses)), tournament_k)
-    get_fitness = lambda contender: contender[1]
-    return max(contenders, key=get_fitness)[0][:]
+    return max(contenders, key=lambda contender: contender[1])[0][:]
 
 
 def crossover(
@@ -258,8 +257,6 @@ def run_ga(
     ...     p_crossover=0.9, p_mutation=0.05,
     ...     tournament_k=2, elitism=1
     ... )
-    >>> sorted(out.keys())
-    ['avg_history', 'best_genome', 'best_history', 'best_value', 'best_weight', 'capacity']
     >>> len(out['best_history']) == 5 and len(out['avg_history']) == 5
     True
     >>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
@@ -286,8 +283,8 @@ def run_ga(
             best_overall = population[best_idx][:]
 
         # Elitism
-        get_fitness = lambda idx: fitnesses[idx]
-        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism]
+        sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
+        elite_indices = sorted_indices.reverse[:elitism]
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation
@@ -317,18 +314,19 @@ def run_ga(
 
 if __name__ == "__main__":
     result = run_ga(items, capacity)
-    best_items = [
-        items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
-    ]
+    best_value, best_weight = result["best_value"], result["best_weight"]
 
     print(f"Knapsack capacity: {result['capacity']}")
     print(
-        f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
+        f"Best solution: value = {best_value}, weight = {best_weight}"
     )
-    # Uncomment to inspect chosen items:
+    # # Uncomment to inspect chosen items:
+    # best_items = [
+    #     items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
+    # ]
     # print("Items included in the best solution:", best_items)
 
-    # Optional: plot fitness curves
+    # # Optional: plot fitness curves
     # import matplotlib.pyplot as plt
     # plt.figure()
     # plt.plot(result["best_history"], label="Best fitness")

From 430e8127109119c948c74e339291961db51e213e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:03:50 +0000
Subject: [PATCH 15/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index e60db3001431..cc55098dc96a 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -317,9 +317,7 @@ def run_ga(
     best_value, best_weight = result["best_value"], result["best_weight"]
 
     print(f"Knapsack capacity: {result['capacity']}")
-    print(
-        f"Best solution: value = {best_value}, weight = {best_weight}"
-    )
+    print(f"Best solution: value = {best_value}, weight = {best_weight}")
     # # Uncomment to inspect chosen items:
     # best_items = [
     #     items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1

From aac570c362cf6377c8b2856326ab9012b6dbbe70 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:05:30 +0100
Subject: [PATCH 16/28] Fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index cc55098dc96a..bcbae9ebc3f4 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -284,7 +284,7 @@ def run_ga(
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
-        elite_indices = sorted_indices.reverse[:elitism]
+        elite_indices = sorted_indices.reverse()[:elitism]
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation

From 73ab9b8bab973aba81707d56afb57c0eb76a0de4 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:07:43 +0100
Subject: [PATCH 17/28] Fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index bcbae9ebc3f4..6d17518d3449 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -284,7 +284,7 @@ def run_ga(
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
-        elite_indices = sorted_indices.reverse()[:elitism]
+        elite_indices = sorted_indices[::-1][:elitism] # reverse and take top indices
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation

From 82dc97e3d500e8bd55364304bcf681509643c1fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:08:05 +0000
Subject: [PATCH 18/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 6d17518d3449..66bcc5cac580 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -284,7 +284,7 @@ def run_ga(
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
-        elite_indices = sorted_indices[::-1][:elitism] # reverse and take top indices
+        elite_indices = sorted_indices[::-1][:elitism]  # reverse and take top indices
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation

From 7240f16dc5dbeb640e2ba27fba311efdcd955e45 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:12:03 +0100
Subject: [PATCH 19/28] Fix type issues

---
 genetic_algorithm/knapsack.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 66bcc5cac580..d1f42b87eeba 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -265,22 +265,22 @@ def run_ga(
     True
     """
     population = [random_genome(len(items)) for _ in range(pop_size)]
-    best_history = []  # track best fitness per generation
-    avg_history = []
-    best_overall = None
-    best_fit_overall = -1
+    best_fitness_history: list[int] = []  # track best fitness per generation
+    avg_fitness_history: list[int] = []
+    best_genome_overall: genome_t = []
+    best_fitness_overall: int = -1
 
     for _ in range(generations):
         fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
         best_fit = max(fitnesses)
         best_idx = fitnesses.index(best_fit)
-        best_history.append(best_fit)
+        best_fitness_history.append(best_fit)
         avg_fit = sum(fitnesses) / pop_size
-        avg_history.append(avg_fit)
+        avg_fitness_history.append(avg_fit)
 
-        if best_fit > best_fit_overall:
-            best_fit_overall = best_fit
-            best_overall = population[best_idx][:]
+        if best_fit > best_fitness_overall:
+            best_fitness_overall = best_fit
+            best_genome_overall = population[best_idx][:]
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
@@ -299,14 +299,14 @@ def run_ga(
         population = new_pop[:pop_size]
 
     # Final evaluation of the best
-    best_value, best_weight = evaluate(best_overall, items, capacity)
+    best_value, best_weight = evaluate(best_genome_overall, items, capacity)
     return {
-        "best_genome": best_overall,
+        "best_genome": best_genome_overall,
         "best_value": best_value,
         "best_weight": best_weight,
         "capacity": capacity,
-        "best_history": best_history,
-        "avg_history": avg_history,
+        "best_fitness_history": best_fitness_history,
+        "avg_fitness_history": avg_fitness_history,
     }
 
 
@@ -327,8 +327,8 @@ def run_ga(
     # # Optional: plot fitness curves
     # import matplotlib.pyplot as plt
     # plt.figure()
-    # plt.plot(result["best_history"], label="Best fitness")
-    # plt.plot(result["avg_history"], label="Average fitness")
+    # plt.plot(result["best_fitness_history"], label="Best fitness")
+    # plt.plot(result["avg_fitness_history"], label="Average fitness")
     # plt.title("GA on Knapsack: Fitness over Generations")
     # plt.xlabel("Generation")
     # plt.ylabel("Fitness")

From 3e83e09157a764b06412744e6660c62b184c3ff0 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:12:58 +0100
Subject: [PATCH 20/28] Fix type issues

---
 genetic_algorithm/knapsack.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index d1f42b87eeba..314fb30b0e5e 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -89,7 +89,6 @@ def generate_knapsack_instance(
 
 genome_t = list[int]  # An index list where 1 means item is included, 0 means excluded
 
-
 def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,

From 9ced9cf98212a2dac31b11500618c8f13ee9cf9b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:13:38 +0000
Subject: [PATCH 21/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 314fb30b0e5e..d1f42b87eeba 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -89,6 +89,7 @@ def generate_knapsack_instance(
 
 genome_t = list[int]  # An index list where 1 means item is included, 0 means excluded
 
+
 def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,

From 67ba8b3bfe23ac6bb83ae0593f5292a0a4302a41 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:14:53 +0100
Subject: [PATCH 22/28] Fix type issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index d1f42b87eeba..27a35e2e9902 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -266,7 +266,7 @@ def run_ga(
     """
     population = [random_genome(len(items)) for _ in range(pop_size)]
     best_fitness_history: list[int] = []  # track best fitness per generation
-    avg_fitness_history: list[int] = []
+    avg_fitness_history: list[float] = []
     best_genome_overall: genome_t = []
     best_fitness_overall: int = -1
 

From 5b44a5d1ae55ec0ba9c3b768874e1a629096c248 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:21:10 +0100
Subject: [PATCH 23/28] fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 27a35e2e9902..efac9f477e85 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -257,7 +257,7 @@ def run_ga(
     ...     p_crossover=0.9, p_mutation=0.05,
     ...     tournament_k=2, elitism=1
     ... )
-    >>> len(out['best_history']) == 5 and len(out['avg_history']) == 5
+    >>> len(out['best_fitness_history']) == 5 and len(out['avg_fitness_history']) == 5
     True
     >>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
     True

From 319a38fb696a731eea65786ea2037d924b17b761 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:21:46 +0100
Subject: [PATCH 24/28] fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index efac9f477e85..8c30da49d5df 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -157,7 +157,7 @@ def selection(
     >>> random.seed(1)
     >>> pop = [[0,0,0], [1,0,0], [1,1,0], [1,1,1]]
     >>> fits = [0, 5, 9, 7]
-    >>> parent = selection(pop, fits, k=2)
+    >>> parent = selection(pop, fits, tournament_k=2)
     >>> parent in pop
     True
     """

From 5ced3fe7b87cc03af74912f5e94d929c69eaf0b7 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:28:57 +0100
Subject: [PATCH 25/28] Polish up

---
 genetic_algorithm/knapsack.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 8c30da49d5df..c55e1f30add6 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -1,15 +1,17 @@
 """Did you know that Genetic Algorithms can be used to quickly approximate
 combinatorial optimization problems such as knapsack?
 
+Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm
+Evolutionary computation: https://en.wikipedia.org/wiki/Evolutionary_computation
+Knapsack problem: https://en.wikipedia.org/wiki/Knapsack_problem
+
 Run doctests:
-    python -m doctest -v ga_knapsack.py
+    python -m doctest -v knapsack.py
 """
 
 import random
 from dataclasses import dataclass
 
-# Keep module-level RNG deterministic for examples that rely on random,
-# but individual doctests re-seed locally as needed.
 random.seed(42)
 
 # =========================== Problem setup: Knapsack ===========================

From 0c2970bb2b4ca67feb4c6edc63effbc2d6e9ac97 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:30:23 +0100
Subject: [PATCH 26/28] Polish up

---
 genetic_algorithm/knapsack.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index c55e1f30add6..e581ca9f0020 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -1,6 +1,12 @@
 """Did you know that Genetic Algorithms can be used to quickly approximate
 combinatorial optimization problems such as knapsack?
 
+It is commonly known that combinatorial optimization problems can be solved using
+dynamic programming. It is lesser known that genetic algorithms (or evolutionary
+computing in general) can reach the best solution fairly quickly in a lot of cases.
+Otherwise, it can still approximate a very good solution (in life, good is good 
+enough).
+
 Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm
 Evolutionary computation: https://en.wikipedia.org/wiki/Evolutionary_computation
 Knapsack problem: https://en.wikipedia.org/wiki/Knapsack_problem

From 0c246e60ad25dc696bfba5fc7f4f95176d49da19 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:31:01 +0000
Subject: [PATCH 27/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index e581ca9f0020..270dc0c9acfd 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -4,7 +4,7 @@
 It is commonly known that combinatorial optimization problems can be solved using
 dynamic programming. It is lesser known that genetic algorithms (or evolutionary
 computing in general) can reach the best solution fairly quickly in a lot of cases.
-Otherwise, it can still approximate a very good solution (in life, good is good 
+Otherwise, it can still approximate a very good solution (in life, good is good
 enough).
 
 Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm

From 602d631d0fee85ac97af60e671630fcb2628a49b Mon Sep 17 00:00:00 2001
From: Dang-Hoang-Tung <Dang-Hoang-Tung@users.noreply.github.com>
Date: Wed, 8 Oct 2025 12:44:08 +0000
Subject: [PATCH 28/28] updating DIRECTORY.md

---
 DIRECTORY.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DIRECTORY.md b/DIRECTORY.md
index b2c13f74278f..f82cf43b8786 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -195,6 +195,7 @@
     * [Permutations](data_structures/arrays/permutations.py)
     * [Prefix Sum](data_structures/arrays/prefix_sum.py)
     * [Product Sum](data_structures/arrays/product_sum.py)
+    * [Rotate Array](data_structures/arrays/rotate_array.py)
     * [Sparse Table](data_structures/arrays/sparse_table.py)
     * [Sudoku Solver](data_structures/arrays/sudoku_solver.py)
   * Binary Tree