# Computer Assignment 2 (Genetics)

In [2]:
import random
from timeit import default_timer as timer

In [9]:
CROSSOVER_PROBABILITY = 0.8
MUTATION_PROBABILITY = 0.05
CARRY_PERCENTAGE = 0.2
POPULATION_SIZE = 100
MULTI_START_COUNT = 10

In [4]:
OPERATOR_PRECEDENCE = {
    '+': 1,
    '-': 1,
    '%': 2,
    '*': 2
}

OPERATOR_FUNCTION = {
    '+': lambda x, y: x + y,
    '-': lambda x, y: x - y,
    '%': lambda x, y: x % y,
    '*': lambda x, y: x * y
}

## Part 1: Basic Concepts

Each gene will be considered as a character which can be either an operand or operator. Each chromosome will consist of some genes and the number of these genes will be the equation length. Each operator gene will be placed in the middle of two operand genes. This is why the number of operand genes will be always one unit more than the number of operator genes.

## Part 2: Initial Population

The initial population size is defined in the `POPULATION_SIZE` variable.

## Part 3: Fitness Function

The fitness function determines how near the equation is to the target value. The more near the equation is to the target value, the more fitness it has. The fitness function is defined as follows:

$$fitness = \frac{1}{1 + \left|target - equation\right|}$$

Based on the above formula, the fitness of an equation is always between 0 and 1. The fitness of an equation is 1 if the equation is equal to the target value.

## Part 4: Implementing Crossover and Mutation and Generating Next Population

### Crossover

In order to make a crossover pool, first we generate a random number between 0 and 1. Then, for each pair of chromosomes in mating pool, if the number is more than `CROSSOVER_PROBABILITY`, we select the chromosomes for the next phase without any change. Otherwise, we swap the genes in the middle of two random points, between the chromosomes. In this case, we have created a pair of child chromosomes from their parents.

### Mutation

For each gene in each chromosome of the crossover pool, we generate a random number between 0 and 1. If the number is less than `MUTATION_PROBABILITY`, we change the gene to a random character. It is important to note that if the gene is an operator, we change it to another operator. Otherwise, we change it to another operand.

## Part 5: Running the Genetic Algorithm

The algorithm is as follows:

In [11]:
class EquationBuilder:
    operators: list[str]
    operands: list[int]
    equationLength: int
    goalNumber: int
    population: list[list]

    def __init__(self, operators: list[str], operands: list[int], equationLength: int, goalNumber: int, maxGenCount: int):
        self.operators = operators
        self.operands = operands
        self.equationLength = equationLength
        self.goalNumber = goalNumber
        self.maxGenCount = maxGenCount

    def makeFirstPopulation(self) -> list[list]:
        operandCount = int(self.equationLength / 2) + 1
        operatorCount = int(self.equationLength / 2)

        population = []
        for _ in range(POPULATION_SIZE):
            chromosome = [0] * self.equationLength
            for j in range(operandCount):
                chromosome[j * 2] = random.choice(self.operands)
            for j in range(operatorCount):
                chromosome[j * 2 + 1] = random.choice(self.operators)  # type: ignore
            population.append(chromosome)
        return population

    def findEquation(self, multiStartCount: int = 1) -> tuple[list, bool]:
        bestSolution = None
        for _ in range(multiStartCount):
            self.population = self.makeFirstPopulation()
            for _ in range(self.maxGenCount):
                random.shuffle(self.population)

                fitnesses = [self.calcFitness(self.population[i]) for i in range(POPULATION_SIZE)]
                if max(fitnesses) == 1:
                    return self.population[fitnesses.index(1)], True

                bestChromosomes = [x for _, x in sorted(zip(fitnesses, self.population), key=lambda pair: pair[0], reverse=True)]
                if bestSolution is None or self.calcFitness(bestSolution) < self.calcFitness(bestChromosomes[0]):
                    bestSolution = bestChromosomes[0]
                carriedChromosomes = []
                for i in range(0, int(POPULATION_SIZE * CARRY_PERCENTAGE)):
                    carriedChromosomes.append(bestChromosomes[i])

                matingPool = self.createMatingPool(bestChromosomes)
                crossoverPool = self.createCrossoverPool(matingPool)
                self.population.clear()

                for i in range(POPULATION_SIZE - int(POPULATION_SIZE * CARRY_PERCENTAGE)):
                    self.population.append(self.mutate(crossoverPool[i]))

                self.population.extend(carriedChromosomes)

        return bestSolution, False # type: ignore

    def createMatingPool(self, bestChromosomes: list[list]) -> list[list]:
        ranks = list(reversed(range(1, POPULATION_SIZE + 1)))
        matingPool = []
        for i in range(POPULATION_SIZE):
            for _ in range(ranks[i]):
                matingPool.append(bestChromosomes[i])
        random.shuffle(matingPool)
        return matingPool[:POPULATION_SIZE]

    def createCrossoverPool(self, matingPool: list[list]) -> list[list]:
        crossoverPool = []
        for i in range(0, len(matingPool) - 1, 2):
            if random.random() > CROSSOVER_PROBABILITY:
                crossoverPool.append(matingPool[i])
                crossoverPool.append(matingPool[i + 1])
            else:
                children = self.crossover(matingPool[i], matingPool[i + 1])
                crossoverPool.extend(children)
        return crossoverPool

    def crossover(self, chromosome1: list, chromosome2: list) -> tuple[list, list]:
        crossoverPoint1 = random.randint(0, self.equationLength - 1)
        crossoverPoint2 = random.randint(0, self.equationLength - 1)
        if crossoverPoint1 > crossoverPoint2:
            crossoverPoint1, crossoverPoint2 = crossoverPoint2, crossoverPoint1

        chromosome1 = list(chromosome1)
        chromosome2 = list(chromosome2)
        for i in range(crossoverPoint1, crossoverPoint2):
            chromosome1[i], chromosome2[i] = chromosome2[i], chromosome1[i]

        return chromosome1, chromosome2

    def mutate(self, chromosome: list) -> list:
        chromosome = list(chromosome)
        for i in range(len(chromosome)):
            if random.random() < MUTATION_PROBABILITY:
                if chromosome[i] in self.operators:
                    chromosome[i] = random.choice(self.operators)
                else:
                    chromosome[i] = random.choice(self.operands)
        return chromosome

    def calcFitness(self, chromosome: list) -> float:
        return 1 / (abs(self.goalNumber - eval("".join(map(str, chromosome)))) + 1)

## Part 6: Questions

### 1. How can very large or very small population sizes affect the performance of the algorithm?

If the population size is very small, the algorithm may not be able to find the best solution because it may not have enough chromosomes to select from. On the other hand, if the population size is very large, the algorithm may take a long time to find the best solution and it may be unnecessary to spend that much time.

### 2. What happens if the population size increases in each generation?

This **may** result in a better solution. However, it increases the time complexity and the memory usage. Also, it is unnecessary to do this because we can remove the chromosomes with the lowest fitness from the population in order to keep the population size constant.

### 3. What is the effect of crossover and mutation? Is it possible to use only one of them?

Crossover is used to create new chromosomes from the existing chromosomes. Mutation is used to change the genes of the chromosomes. If we use only one of them, we may not be able to find the best solution. For example, if we use only crossover, we may stop at a local maximum. If we use only mutation, we may not be able to find the best solution. It is important to note that the crossover and mutation probabilities should be chosen carefully. The crossover probability is usually at least 80% and the mutation probability is usually at most 5%.

### 4. How to accelerate the algorithm?

Fitness function, parameters such as the probability of crossover and mutation, and the population size should be chosen carefully. Also, the crossover and mutation functions can affect the performance of the algorithm.

### 5. How to stop the algorithm if it is not converging?

A common problem in genetic algorithms is that it may stop at a local maximum instead of the global maximum. Mutation is a good way to solve this problem. Also, we can limit the number of generations in order to stop the algorithm if it is not converging. In this case, we may also use multi-start to increase the probability of finding the global maximum.

### 6. How to stop the algorithm if there exists no solution?

As mentioned in the previous question, we can limit the number of generations in order to stop the algorithm if there exists no solution. We can use the following formula for the generation limit:

$$GenLimit = 2\times POPULATION\_SIZE\times equationLength$$

In [12]:
def getBuildTime(equationBuilder: EquationBuilder, testCount: int) -> float:
    buildTimes = []
    for _ in range(testCount):
        start = timer()
        equationBuilder.findEquation()
        end = timer()
        buildTimes.append(end - start)
    return sum(buildTimes) / len(buildTimes)

In [13]:
operands = [1, 2, 3, 4, 5, 6, 7, 8]
operators = ['+', '-', '*']
equationLength = 21
goalNumber = 18019

In [14]:
equationBuilder = EquationBuilder(operators, operands, equationLength, goalNumber, equationLength * POPULATION_SIZE * 2)
equation, isFound = equationBuilder.findEquation(MULTI_START_COUNT)
if not isFound:
    print("No equation found! Best solution:")
print(*equation, "=", eval("".join(map(str, equation))))

4 * 6 + 5 * 6 * 3 * 5 * 5 * 8 - 1 - 5 + 1 = 18019


As it is shown above, the answer is found.

In [17]:
print(f"Average Build Time: {getBuildTime(equationBuilder, 10):.4f}s")

Average Build Time: 0.1507s


The algorithm can find the solution in about 0.2s approximately.

In [21]:
noAnswerEquation = EquationBuilder(['+', '-'], [1, 2, 3], 5, 10, 5 * POPULATION_SIZE * 2)
result = noAnswerEquation.findEquation(MULTI_START_COUNT)
print(*result[0])
print("Expected:", 10)
print("Actual:", eval("".join(map(str, result[0]))))

3 + 3 + 3
Expected: 10
Actual: 9


As the above equation cannot be built, the algorithm has stopped after some time and the nearest answer is returned.