Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
66701bd
Genetic algorithm for Knapsack
Dang-Hoang-Tung Oct 5, 2025
2d11ae1
updating DIRECTORY.md
Dang-Hoang-Tung Oct 5, 2025
1d2d89e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
0015899
Update with doctests
Dang-Hoang-Tung Oct 5, 2025
e1bd46b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
9a8b66d
Update Genome to genome_t
Dang-Hoang-Tung Oct 5, 2025
06981bf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
9732f6d
Fix variable names
Dang-Hoang-Tung Oct 5, 2025
836da98
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
7f3403e
Add type annotation for run_ga
Dang-Hoang-Tung Oct 5, 2025
a563fcd
Update variable names
Dang-Hoang-Tung Oct 5, 2025
dabdf9a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
23dcc94
Update variable names
Dang-Hoang-Tung Oct 5, 2025
2f74223
Fix issues
Dang-Hoang-Tung Oct 5, 2025
430e812
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
aac570c
Fix issues
Dang-Hoang-Tung Oct 5, 2025
73ab9b8
Fix issues
Dang-Hoang-Tung Oct 5, 2025
82dc97e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
7240f16
Fix type issues
Dang-Hoang-Tung Oct 5, 2025
3e83e09
Fix type issues
Dang-Hoang-Tung Oct 5, 2025
9ced9cf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
67ba8b3
Fix type issues
Dang-Hoang-Tung Oct 5, 2025
5b44a5d
fix issues
Dang-Hoang-Tung Oct 5, 2025
319a38f
fix issues
Dang-Hoang-Tung Oct 5, 2025
5ced3fe
Polish up
Dang-Hoang-Tung Oct 5, 2025
0c2970b
Polish up
Dang-Hoang-Tung Oct 5, 2025
0c246e6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
602d631
updating DIRECTORY.md
Dang-Hoang-Tung Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
* [Permutations](data_structures/arrays/permutations.py)
* [Prefix Sum](data_structures/arrays/prefix_sum.py)
* [Product Sum](data_structures/arrays/product_sum.py)
* [Rotate Array](data_structures/arrays/rotate_array.py)
* [Sparse Table](data_structures/arrays/sparse_table.py)
* [Sudoku Solver](data_structures/arrays/sudoku_solver.py)
* Binary Tree
Expand Down Expand Up @@ -460,6 +461,7 @@

## Genetic Algorithm
* [Basic String](genetic_algorithm/basic_string.py)
* [Knapsack](genetic_algorithm/knapsack.py)

## Geodesy
* [Haversine Distance](geodesy/haversine_distance.py)
Expand Down
345 changes: 345 additions & 0 deletions genetic_algorithm/knapsack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
"""Did you know that Genetic Algorithms can be used to quickly approximate
combinatorial optimization problems such as knapsack?

It is commonly known that combinatorial optimization problems can be solved using
dynamic programming. It is lesser known that genetic algorithms (or evolutionary
computing in general) can reach the best solution fairly quickly in a lot of cases.
Otherwise, it can still approximate a very good solution (in life, good is good
enough).

Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm
Evolutionary computation: https://en.wikipedia.org/wiki/Evolutionary_computation
Knapsack problem: https://en.wikipedia.org/wiki/Knapsack_problem

Run doctests:
python -m doctest -v knapsack.py
"""

import random
from dataclasses import dataclass

random.seed(42)

# =========================== Problem setup: Knapsack ===========================

KNAPSACK_N_ITEMS: int = 42 # Number of items in the knapsack problem
KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100) # Range of item values
KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50) # Range of item weights
KNAPSACK_CAPACITY_RATIO: float = 0.5 # Capacity as a fraction of total weight


@dataclass
class Item:
value: int
weight: int


def generate_knapsack_instance(
n_items: int,
value_range: tuple[int, int],
weight_range: tuple[int, int],
capacity_ratio: float,
) -> tuple[list[Item], int]:
"""
Generates a random knapsack problem instance.

Returns a tuple: (items, capacity), where items is a list of Item(value, weight)
and capacity is an int computed as floor(capacity_ratio * total_weight).

Examples
--------
Use a tiny, deterministic instance to validate shape and capacity range:

>>> random.seed(0)
>>> items, cap = generate_knapsack_instance(
... n_items=3,
... value_range=(5, 5),
... weight_range=(10, 10),
... capacity_ratio=0.5
... )
>>> len(items), cap
(3, 15)
>>> all(isinstance(it, Item) for it in items)
True
>>> [it.value for it in items], [it.weight for it in items]
([5, 5, 5], [10, 10, 10])
"""
items = []
for _ in range(n_items):
value = random.randint(*value_range)
weight = random.randint(*weight_range)
items.append(Item(value=value, weight=weight))
# Capacity as a fraction of total weight
capacity = int(sum(it.weight for it in items) * capacity_ratio)
return items, capacity


# Example instance (guarded by __main__ below for printing)
items, capacity = generate_knapsack_instance(
n_items=KNAPSACK_N_ITEMS,
value_range=KNAPSACK_VALUE_RANGE,
weight_range=KNAPSACK_WEIGHT_RANGE,
capacity_ratio=KNAPSACK_CAPACITY_RATIO,
)

# ============================== GA Representation ==============================

# HYPERPARAMETERS (For tuning the GA)

POPULATION_SIZE = 120
GENERATIONS = 200
CROSSOVER_PROBABILITY = 0.9
MUTATION_PROBABILITY = 0.01
TOURNAMENT_K = 3
ELITISM = 2

OVERWEIGHT_PENALTY_FACTOR = 10

genome_t = list[int] # An index list where 1 means item is included, 0 means excluded


def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
"""
Calculates fitness (value) and weight of a candidate solution. If overweight,
the returned value is penalized; weight is the actual summed weight.

Returns (value, weight).

Examples
--------
Feasible genome (no penalty):

>>> it = [Item(10, 4), Item(7, 3), Item(5, 2)]
>>> genome = [1, 0, 1] # take items 0 and 2
>>> evaluate(genome, it, capacity=7)
(15, 6)

Overweight genome (penalty applies):
Total value = 10+7+5 = 22, total weight = 9, capacity = 7, overflow = 2
Penalized value = max(0, 22 - 2 * OVERWEIGHT_PENALTY_FACTOR) = 2

>>> genome = [1, 1, 1]
>>> evaluate(genome, it, capacity=7)
(2, 9)
"""
total_value = 0
total_weight = 0
for gene, item in zip(genome, items):
if gene:
total_value += item.value
total_weight += item.weight
if total_weight > capacity:
overflow = total_weight - capacity
total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
return total_value, total_weight


def random_genome(length: int) -> genome_t:
"""
Generates a random genome (list of 0/1) of length n.

Examples
--------
Check length and content are 0/1 bits:

>>> random.seed(123)
>>> g = random_genome(5)
>>> len(g), set(g).issubset({0, 1})
(5, True)
"""
return [random.randint(0, 1) for _ in range(length)]


def selection(
population: list[genome_t], fitnesses: list[int], tournament_k: int
) -> genome_t:
"""
Performs tournament selection to choose a genome from the population.

Note that other selection strategies exist such as roulette wheel, rank-based, etc.

Examples
--------
Deterministic tournament with fixed seed (k=2):

>>> random.seed(1)
>>> pop = [[0,0,0], [1,0,0], [1,1,0], [1,1,1]]
>>> fits = [0, 5, 9, 7]
>>> parent = selection(pop, fits, tournament_k=2)
>>> parent in pop
True
"""
contenders = random.sample(list(zip(population, fitnesses)), tournament_k)
return max(contenders, key=lambda contender: contender[1])[0][:]


def crossover(
genome_1: genome_t, genome_2: genome_t, p_crossover: float
) -> tuple[genome_t, genome_t]:
"""
Performs single-point crossover between two genomes.
If crossover does not occur (random > p_crossover) or genomes are too short,
returns copies of the parents.

Note: other crossover strategies exist (two-point, uniform, etc.).

Examples
--------
Force crossover with p=1.0 and fixed RNG; verify lengths and bit content:

>>> random.seed(2)
>>> a, b = [0,0,0,0], [1,1,1,1]
>>> c1, c2 = crossover(a, b, p_crossover=1.0)
>>> len(c1) == len(a) == len(c2) == len(b)
True
>>> set(c1).issubset({0,1}) and set(c2).issubset({0,1})
True

No crossover if p=0.0:

>>> c1, c2 = crossover([0,0,0], [1,1,1], p_crossover=0.0)
>>> c1, c2
([0, 0, 0], [1, 1, 1])
"""
min_length = min(len(genome_1), len(genome_2))
if random.random() > p_crossover or min_length < 2:
return genome_1[:], genome_2[:]
cutoff_point = random.randint(1, min_length - 1)
return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[
:cutoff_point
] + genome_1[cutoff_point:]


def mutation(genome: genome_t, p_mutation: float) -> genome_t:
"""
Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.

Note: other mutation strategies exist (swap, scramble, etc.).

Examples
--------
With probability 1.0, every bit flips:

>>> mutation([0, 1, 1, 0], p_mutation=1.0)
[1, 0, 0, 1]

With probability 0.0, nothing changes:

>>> mutation([0, 1, 1, 0], p_mutation=0.0)
[0, 1, 1, 0]
"""
return [(1 - gene) if random.random() < p_mutation else gene for gene in genome]


def run_ga(
items: list[Item],
capacity: int,
pop_size: int = POPULATION_SIZE,
generations: int = GENERATIONS,
p_crossover: float = CROSSOVER_PROBABILITY,
p_mutation: float = MUTATION_PROBABILITY,
tournament_k: int = TOURNAMENT_K,
elitism: int = ELITISM,
) -> dict:
"""
Runs the genetic algorithm to (approximately) solve the knapsack problem.

Returns a dict with keys:
- 'best_genome' (genome_t)
- 'best_value' (int)
- 'best_weight' (int)
- 'capacity' (int)
- 'best_history' (list[int])
- 'avg_history' (list[float])

Examples
--------
Use a tiny instance and few generations to validate structure and lengths:

>>> random.seed(1234)
>>> tiny_items = [Item(5,2), Item(6,3), Item(2,1), Item(7,4)]
>>> cap = 5
>>> out = run_ga(
... tiny_items, cap,
... pop_size=10, generations=5,
... p_crossover=0.9, p_mutation=0.05,
... tournament_k=2, elitism=1
... )
>>> len(out['best_fitness_history']) == 5 and len(out['avg_fitness_history']) == 5
True
>>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
True
>>> out['capacity'] == cap
True
"""
population = [random_genome(len(items)) for _ in range(pop_size)]
best_fitness_history: list[int] = [] # track best fitness per generation
avg_fitness_history: list[float] = []
best_genome_overall: genome_t = []
best_fitness_overall: int = -1

for _ in range(generations):
fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
best_fit = max(fitnesses)
best_idx = fitnesses.index(best_fit)
best_fitness_history.append(best_fit)
avg_fit = sum(fitnesses) / pop_size
avg_fitness_history.append(avg_fit)

if best_fit > best_fitness_overall:
best_fitness_overall = best_fit
best_genome_overall = population[best_idx][:]

# Elitism
sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
elite_indices = sorted_indices[::-1][:elitism] # reverse and take top indices
elites = [population[idx][:] for idx in elite_indices]

# New generation
new_pop = elites[:]
while len(new_pop) < pop_size:
parent1 = selection(population, fitnesses, tournament_k=tournament_k)
parent2 = selection(population, fitnesses, tournament_k=tournament_k)
child1, child2 = crossover(parent1, parent2, p_crossover)
child1 = mutation(child1, p_mutation)
child2 = mutation(child2, p_mutation)
new_pop.extend([child1, child2])
population = new_pop[:pop_size]

# Final evaluation of the best
best_value, best_weight = evaluate(best_genome_overall, items, capacity)
return {
"best_genome": best_genome_overall,
"best_value": best_value,
"best_weight": best_weight,
"capacity": capacity,
"best_fitness_history": best_fitness_history,
"avg_fitness_history": avg_fitness_history,
}


# ================================ Script entry =================================

if __name__ == "__main__":
result = run_ga(items, capacity)
best_value, best_weight = result["best_value"], result["best_weight"]

print(f"Knapsack capacity: {result['capacity']}")
print(f"Best solution: value = {best_value}, weight = {best_weight}")
# # Uncomment to inspect chosen items:
# best_items = [
# items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
# ]
# print("Items included in the best solution:", best_items)

# # Optional: plot fitness curves
# import matplotlib.pyplot as plt
# plt.figure()
# plt.plot(result["best_fitness_history"], label="Best fitness")
# plt.plot(result["avg_fitness_history"], label="Average fitness")
# plt.title("GA on Knapsack: Fitness over Generations")
# plt.xlabel("Generation")
# plt.ylabel("Fitness")
# plt.legend()
# plt.tight_layout()
# plt.show()